From 88002c337c40d19072b9004af298e9de71d01abc Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 30 Jun 2025 17:04:04 +0700 Subject: [PATCH 001/219] initial skeleton for debug draw ex --- 34_DebugDraw/CMakeLists.txt | 39 ++ .../app_resources/simple.fragment.hlsl | 11 + 34_DebugDraw/app_resources/simple.vertex.hlsl | 21 ++ 34_DebugDraw/app_resources/simple_common.hlsl | 20 + 34_DebugDraw/config.json.template | 28 ++ 34_DebugDraw/include/CDrawAABB.h | 5 + 34_DebugDraw/include/common.hpp | 26 ++ 34_DebugDraw/include/transform.hpp | 154 ++++++++ 34_DebugDraw/main.cpp | 348 ++++++++++++++++++ 34_DebugDraw/pipeline.groovy | 50 +++ 34_DebugDraw/src/CDrawAABB.cpp | 5 + 34_DebugDraw/src/transform.cpp | 0 CMakeLists.txt | 2 + 13 files changed, 709 insertions(+) create mode 100644 34_DebugDraw/CMakeLists.txt create mode 100644 34_DebugDraw/app_resources/simple.fragment.hlsl create mode 100644 34_DebugDraw/app_resources/simple.vertex.hlsl create mode 100644 34_DebugDraw/app_resources/simple_common.hlsl create mode 100644 34_DebugDraw/config.json.template create mode 100644 34_DebugDraw/include/CDrawAABB.h create mode 100644 34_DebugDraw/include/common.hpp create mode 100644 34_DebugDraw/include/transform.hpp create mode 100644 34_DebugDraw/main.cpp create mode 100644 34_DebugDraw/pipeline.groovy create mode 100644 34_DebugDraw/src/CDrawAABB.cpp create mode 100644 34_DebugDraw/src/transform.cpp diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt new file mode 100644 index 000000000..4031b45c6 --- /dev/null +++ b/34_DebugDraw/CMakeLists.txt @@ -0,0 +1,39 @@ +if(NBL_BUILD_IMGUI) + set(NBL_EXTRA_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/CDrawAABB.cpp" # TODO remove when moved to nabla + ) + + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + # TODO remove + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} geometryCreatorSpirvBRD) # TODO probably can remove + + # TODO probably remove when moved to nabla + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple.fragment.hlsl b/34_DebugDraw/app_resources/simple.fragment.hlsl new file mode 100644 index 000000000..ccd8bdffd --- /dev/null +++ b/34_DebugDraw/app_resources/simple.fragment.hlsl @@ -0,0 +1,11 @@ +#pragma shader_stage(fragment) + +#include "simple_common.hlsl" + +[shader("pixel")] +float32_t4 main(PSInput input) : SV_TARGET +{ + float32_t4 outColor = input.color; + + return outColor; +} \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple.vertex.hlsl b/34_DebugDraw/app_resources/simple.vertex.hlsl new file mode 100644 index 000000000..3dfb8d621 --- /dev/null +++ b/34_DebugDraw/app_resources/simple.vertex.hlsl @@ -0,0 +1,21 @@ +#pragma shader_stage(vertex) + +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "simple_common.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[shader("vertex")] +PSInput main(uint vertexID : SV_VertexID) +{ + PSInput output; + + float32_t4 vertex = (bda::__ptr::create(pc.pVertices) + vertexID).deref_restrict().load(); + + output.position = vertex; + output.color = float32_t4(1, 0, 0, 1); + + return output; +} \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple_common.hlsl b/34_DebugDraw/app_resources/simple_common.hlsl new file mode 100644 index 000000000..6567165fb --- /dev/null +++ b/34_DebugDraw/app_resources/simple_common.hlsl @@ -0,0 +1,20 @@ +#ifndef _DRAW_AABB_SIMPLE_COMMON_HLSL +#define _DRAW_AABB_SIMPLE_COMMON_HLSL + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +struct SPushConstants +{ + // mat4 transform + uint64_t pVertices; +}; + +#ifdef __HLSL_VERSION +struct PSInput +{ + float32_t4 position : SV_Position; + float32_t4 color : TEXCOORD0; +}; +#endif + +#endif diff --git a/34_DebugDraw/config.json.template b/34_DebugDraw/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/34_DebugDraw/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h new file mode 100644 index 000000000..65a8142a1 --- /dev/null +++ b/34_DebugDraw/include/CDrawAABB.h @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla \ No newline at end of file diff --git a/34_DebugDraw/include/common.hpp b/34_DebugDraw/include/common.hpp new file mode 100644 index 000000000..014f9643e --- /dev/null +++ b/34_DebugDraw/include/common.hpp @@ -0,0 +1,26 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +// common api +#include "CCamera.hpp" +#include "SimpleWindowedApplication.hpp" +#include "CEventCallback.hpp" + +// the example's headers +#include "transform.hpp" +#include "CGeomtryCreatorScene.hpp" +#include "CDrawAABB.h" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace geometrycreator; + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/34_DebugDraw/include/transform.hpp b/34_DebugDraw/include/transform.hpp new file mode 100644 index 000000000..88a78f751 --- /dev/null +++ b/34_DebugDraw/include/transform.hpp @@ -0,0 +1,154 @@ +#ifndef __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ + +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + +static constexpr inline auto OfflineSceneTextureIx = 1u; + +struct TransformRequestParams +{ + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; + float camDistance = 8.f; +}; + +void EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ + + SImResourceInfo info; + info.textureID = OfflineSceneTextureIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + if(params.enableViewManipulate) + ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp new file mode 100644 index 000000000..53d028891 --- /dev/null +++ b/34_DebugDraw/main.cpp @@ -0,0 +1,348 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "common.hpp" +#include "app_resources/simple_common.hlsl" + +class DebugDrawSampleApp final : public examples::SimpleWindowedApplication +{ + using device_base_t = examples::SimpleWindowedApplication; + + _NBL_STATIC_INLINE_CONSTEXPR uint32_t WIN_W = 1280, WIN_H = 720; + +public: + inline DebugDrawSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WIN_W; + params.height = WIN_H; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "DebugDrawSampleApp"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto* geometry = m_assetManager->getGeometryCreator(); + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + ISwapchain::SCreationParams swapchainParams = { .surface = m_surface->getSurface() }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + auto* renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + + { + IGPUBuffer::SCreationParams params; + params.size = sizeof(float32_t4) * vertices.size(); + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + m_utils->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = getTransferUpQueue() }, + std::move(params), + vertices.data() + ).move_into(verticesBuffer); + } + + auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = localInputCWD; + auto bundle = m_assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) + { + m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); + exit(-1); + } + + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + if (!shaderSrc) + return nullptr; + + return m_device->compileShader({ shaderSrc.get() }); + }; + auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); + auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + + const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, .size = sizeof(SPushConstants) }; + const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); + + IGPUGraphicsPipeline::SCreationParams params[1] = {}; + params[0].layout = pipelineLayout.get(); + params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main", }; + params[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main", }; + params[0].cached = { + .primitiveAssembly = { + .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + params[0].renderpass = renderpass; + + if (!m_device->createGraphicsPipelines(nullptr, params, &m_pipeline)) + return logFail("Graphics pipeline creation failed"); + + m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + camera.mapKeysToArrows(); + + return true; + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + // render whole scene to offline frame buffer & submit + + auto* const cmdbuf = m_cmdBufs.data()[resourceIx].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("DebugDrawSampleApp IMGUI Frame"); + + auto* queue = getGraphicsQueue(); + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WIN_W; + viewport.height = WIN_H; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SRenderpassBeginInfo beginInfo = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + + SPushConstants pc; + pc.pVertices = verticesBuffer->getDeviceAddress(); + + cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); + cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + cmdbuf->draw(vertices.size(), 1, 0, 0); + + cmdbuf->endRenderPass(); + } + cmdbuf->end(); + + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + if (queue->submit(infos) == IQueue::RESULT::SUCCESS) + { + const nbl::video::ISemaphore::SWaitInfo waitInfos[] = + { { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + } }; + + m_device->blockForSemaphores(waitInfos); // this is not solution, quick wa to not throw validation errors + } + else + --m_realFrameIx; + } + } + + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +private: + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr m_pipeline; + smart_refctd_ptr m_semaphore; + smart_refctd_ptr m_cmdPool; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + smart_refctd_ptr m_assetManager; + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + core::smart_refctd_ptr m_descriptorSetPool; + + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + video::CDumbPresentationOracle oracle; + + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + + TransformRequestParams transformParams; + bool isPerspective = true, isLH = true, flipGizmoY = true, move = false; + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + + bool firstFrame = true; + + std::array vertices = { float32_t4(0,0,0,0), float32_t4(0,1,0,0) }; + smart_refctd_ptr verticesBuffer; +}; + +NBL_MAIN_FUNC(DebugDrawSampleApp) \ No newline at end of file diff --git a/34_DebugDraw/pipeline.groovy b/34_DebugDraw/pipeline.groovy new file mode 100644 index 000000000..4c0efec03 --- /dev/null +++ b/34_DebugDraw/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CDebugDrawBuilder extends IBuilder +{ + public CDebugDrawBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CDebugDrawBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp new file mode 100644 index 000000000..65a8142a1 --- /dev/null +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla \ No newline at end of file diff --git a/34_DebugDraw/src/transform.cpp b/34_DebugDraw/src/transform.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/CMakeLists.txt b/CMakeLists.txt index 31ebaddf9..88f208241 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,8 @@ if(NBL_BUILD_EXAMPLES) # Showcase compute pathtracing add_subdirectory(30_ComputeShaderPathTracer EXCLUDE_FROM_ALL) + add_subdirectory(34_DebugDraw EXCLUDE_FROM_ALL) + add_subdirectory(38_EXRSplit EXCLUDE_FROM_ALL) # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) # add_subdirectory(39_DenoiserTonemapper EXCLUDE_FROM_ALL) From 3df145b22a01b100fb536745e8e731b2ce84cb73 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 10:49:35 +0700 Subject: [PATCH 002/219] use camera with lines --- 34_DebugDraw/app_resources/simple.vertex.hlsl | 2 +- 34_DebugDraw/app_resources/simple_common.hlsl | 6 +- 34_DebugDraw/main.cpp | 58 ++++++++++++++++++- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/34_DebugDraw/app_resources/simple.vertex.hlsl b/34_DebugDraw/app_resources/simple.vertex.hlsl index 3dfb8d621..9651154bd 100644 --- a/34_DebugDraw/app_resources/simple.vertex.hlsl +++ b/34_DebugDraw/app_resources/simple.vertex.hlsl @@ -14,7 +14,7 @@ PSInput main(uint vertexID : SV_VertexID) float32_t4 vertex = (bda::__ptr::create(pc.pVertices) + vertexID).deref_restrict().load(); - output.position = vertex; + output.position = mul(pc.MVP, vertex); output.color = float32_t4(1, 0, 0, 1); return output; diff --git a/34_DebugDraw/app_resources/simple_common.hlsl b/34_DebugDraw/app_resources/simple_common.hlsl index 6567165fb..5fc0d0b63 100644 --- a/34_DebugDraw/app_resources/simple_common.hlsl +++ b/34_DebugDraw/app_resources/simple_common.hlsl @@ -5,7 +5,11 @@ struct SPushConstants { - // mat4 transform +#ifdef __HLSL_VERSION + float32_t4x4 MVP; +#else + float MVP[4*4]; +#endif uint64_t pVertices; }; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 53d028891..d339c7505 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -53,6 +53,13 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication m_assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); auto* geometry = m_assetManager->getGeometryCreator(); + { + core::vectorSIMDf cameraPosition(14, 8, 12); + core::vectorSIMDf cameraTarget(0, 0, 0); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, zNear, zFar); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, moveSpeed, rotateSpeed); + } + m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); @@ -164,7 +171,6 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); m_winMgr->show(m_window.get()); oracle.reportBeginFrameRecord(); - camera.mapKeysToArrows(); return true; } @@ -191,6 +197,25 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + // render whole scene to offline frame buffer & submit auto* const cmdbuf = m_cmdBufs.data()[resourceIx].get(); @@ -198,6 +223,25 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->beginDebugMarker("DebugDrawSampleApp IMGUI Frame"); + { + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + camera.endInputProcessing(nextPresentationTimestamp); + } + + core::matrix4SIMD modelViewProjectionMatrix; + { + const auto viewMatrix = camera.getViewMatrix(); + const auto projectionMatrix = camera.getProjectionMatrix(); + const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); + + core::matrix3x4SIMD modelMatrix; + modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); + modelMatrix.setRotation(quaternion(0, 0, 0)); + modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + } + auto* queue = getGraphicsQueue(); asset::SViewport viewport; @@ -211,6 +255,12 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication } cmdbuf->setViewport(0u, 1u, &viewport); + VkRect2D scissor{ + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() } + }; + cmdbuf->setScissor(0u, 1u, &scissor); + const VkRect2D currentRenderArea = { .offset = {0,0}, @@ -229,11 +279,13 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication }; SPushConstants pc; + memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); pc.pVertices = verticesBuffer->getDeviceAddress(); cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + cmdbuf->setLineWidth(1.f); cmdbuf->draw(vertices.size(), 1, 0, 0); cmdbuf->endRenderPass(); @@ -327,7 +379,7 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication core::smart_refctd_ptr m_descriptorSetPool; - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera; video::CDumbPresentationOracle oracle; uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed @@ -341,7 +393,7 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication bool firstFrame = true; - std::array vertices = { float32_t4(0,0,0,0), float32_t4(0,1,0,0) }; + std::array vertices = { float32_t4(0,0,0,1), float32_t4(10,10,-10,1) }; smart_refctd_ptr verticesBuffer; }; From 6d5a495f757066941976a57985aca46c088cb626 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 11:27:20 +0700 Subject: [PATCH 003/219] fixes from mesh_loader merge --- 34_DebugDraw/CMakeLists.txt | 1 - 34_DebugDraw/include/common.hpp | 14 +++++--------- 34_DebugDraw/main.cpp | 20 +++++++++----------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt index 4031b45c6..12f418600 100644 --- a/34_DebugDraw/CMakeLists.txt +++ b/34_DebugDraw/CMakeLists.txt @@ -16,7 +16,6 @@ if(NBL_BUILD_IMGUI) ) nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} geometryCreatorSpirvBRD) # TODO probably can remove # TODO probably remove when moved to nabla if(NBL_EMBED_BUILTIN_RESOURCES) diff --git a/34_DebugDraw/include/common.hpp b/34_DebugDraw/include/common.hpp index 014f9643e..599c9a2e9 100644 --- a/34_DebugDraw/include/common.hpp +++ b/34_DebugDraw/include/common.hpp @@ -3,14 +3,11 @@ #include -// common api -#include "CCamera.hpp" -#include "SimpleWindowedApplication.hpp" -#include "CEventCallback.hpp" +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/CEventCallback.hpp" +#include "nbl/examples/examples.hpp" -// the example's headers -#include "transform.hpp" -#include "CGeomtryCreatorScene.hpp" #include "CDrawAABB.h" using namespace nbl; @@ -20,7 +17,6 @@ using namespace system; using namespace asset; using namespace ui; using namespace video; -using namespace scene; -using namespace geometrycreator; +using namespace nbl::examples; #endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index d339c7505..3c29b258b 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -5,9 +5,10 @@ #include "common.hpp" #include "app_resources/simple_common.hlsl" -class DebugDrawSampleApp final : public examples::SimpleWindowedApplication +class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; _NBL_STATIC_INLINE_CONSTEXPR uint32_t WIN_W = 1280, WIN_H = 720; @@ -20,7 +21,7 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication if (!m_surface) { { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); IWindow::SCreationParams params = {}; params.callback = core::make_smart_refctd_ptr(); params.width = WIN_W; @@ -49,9 +50,8 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - - m_assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto* geometry = m_assetManager->getGeometryCreator(); + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; { core::vectorSIMDf cameraPosition(14, 8, 12); @@ -133,7 +133,7 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); lparams.workingDirectory = localInputCWD; - auto bundle = m_assetManager->getAsset(filePath, lparams); + auto bundle = m_assetMgr->getAsset(filePath, lparams); if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); @@ -372,10 +372,9 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_assetManager; core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; core::smart_refctd_ptr m_descriptorSetPool; @@ -384,7 +383,6 @@ class DebugDrawSampleApp final : public examples::SimpleWindowedApplication uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - TransformRequestParams transformParams; bool isPerspective = true, isLH = true, flipGizmoY = true, move = false; float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; float viewWidth = 10.f; From c5dae9857a22ecb9388f4d2cb757ffe2140e2b47 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 11:38:43 +0700 Subject: [PATCH 004/219] removed unused files --- 34_DebugDraw/CMakeLists.txt | 1 - 34_DebugDraw/include/transform.hpp | 154 ----------------------------- 34_DebugDraw/src/transform.cpp | 0 3 files changed, 155 deletions(-) delete mode 100644 34_DebugDraw/include/transform.hpp delete mode 100644 34_DebugDraw/src/transform.cpp diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt index 12f418600..60c07b1b7 100644 --- a/34_DebugDraw/CMakeLists.txt +++ b/34_DebugDraw/CMakeLists.txt @@ -1,6 +1,5 @@ if(NBL_BUILD_IMGUI) set(NBL_EXTRA_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/CDrawAABB.cpp" # TODO remove when moved to nabla ) diff --git a/34_DebugDraw/include/transform.hpp b/34_DebugDraw/include/transform.hpp deleted file mode 100644 index 88a78f751..000000000 --- a/34_DebugDraw/include/transform.hpp +++ /dev/null @@ -1,154 +0,0 @@ -#ifndef __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ - -#include "nbl/ui/ICursorControl.h" -#include "nbl/ext/ImGui/ImGui.h" -#include "imgui/imgui_internal.h" -#include "imguizmo/ImGuizmo.h" - -static constexpr inline auto OfflineSceneTextureIx = 1u; - -struct TransformRequestParams -{ - bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; - float camDistance = 8.f; -}; - -void EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) -{ - static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); - static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); - static bool useSnap = false; - static float snap[3] = { 1.f, 1.f, 1.f }; - static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; - static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; - static bool boundSizing = false; - static bool boundSizingSnap = false; - - if (params.editTransformDecomposition) - { - if (ImGui::IsKeyPressed(ImGuiKey_T)) - mCurrentGizmoOperation = ImGuizmo::TRANSLATE; - if (ImGui::IsKeyPressed(ImGuiKey_R)) - mCurrentGizmoOperation = ImGuizmo::ROTATE; - if (ImGui::IsKeyPressed(ImGuiKey_S)) - mCurrentGizmoOperation = ImGuizmo::SCALE; - if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) - mCurrentGizmoOperation = ImGuizmo::TRANSLATE; - ImGui::SameLine(); - if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) - mCurrentGizmoOperation = ImGuizmo::ROTATE; - ImGui::SameLine(); - if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) - mCurrentGizmoOperation = ImGuizmo::SCALE; - if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) - mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; - float matrixTranslation[3], matrixRotation[3], matrixScale[3]; - ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); - ImGui::InputFloat3("Tr", matrixTranslation); - ImGui::InputFloat3("Rt", matrixRotation); - ImGui::InputFloat3("Sc", matrixScale); - ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); - - if (mCurrentGizmoOperation != ImGuizmo::SCALE) - { - if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) - mCurrentGizmoMode = ImGuizmo::LOCAL; - ImGui::SameLine(); - if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) - mCurrentGizmoMode = ImGuizmo::WORLD; - } - if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) - useSnap = !useSnap; - ImGui::Checkbox("##UseSnap", &useSnap); - ImGui::SameLine(); - - switch (mCurrentGizmoOperation) - { - case ImGuizmo::TRANSLATE: - ImGui::InputFloat3("Snap", &snap[0]); - break; - case ImGuizmo::ROTATE: - ImGui::InputFloat("Angle Snap", &snap[0]); - break; - case ImGuizmo::SCALE: - ImGui::InputFloat("Scale Snap", &snap[0]); - break; - } - ImGui::Checkbox("Bound Sizing", &boundSizing); - if (boundSizing) - { - ImGui::PushID(3); - ImGui::Checkbox("##BoundSizing", &boundSizingSnap); - ImGui::SameLine(); - ImGui::InputFloat3("Snap", boundsSnap); - ImGui::PopID(); - } - } - - ImGuiIO& io = ImGui::GetIO(); - float viewManipulateRight = io.DisplaySize.x; - float viewManipulateTop = 0; - static ImGuiWindowFlags gizmoWindowFlags = 0; - - /* - for the "useWindow" case we just render to a gui area, - otherwise to fake full screen transparent window - - note that for both cases we make sure gizmo being - rendered is aligned to our texture scene using - imgui "cursor" screen positions - */ - - SImResourceInfo info; - info.textureID = OfflineSceneTextureIx; - info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; - - if (params.useWindow) - { - ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); - ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); - ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); - ImGui::Begin("Gizmo", 0, gizmoWindowFlags); - ImGuizmo::SetDrawlist(); - - ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); - ImVec2 windowPos = ImGui::GetWindowPos(); - ImVec2 cursorPos = ImGui::GetCursorScreenPos(); - - ImGui::Image(info, contentRegionSize); - ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - - viewManipulateRight = cursorPos.x + contentRegionSize.x; - viewManipulateTop = cursorPos.y; - - ImGuiWindow* window = ImGui::GetCurrentWindow(); - gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); - } - else - { - ImGui::SetNextWindowPos(ImVec2(0, 0)); - ImGui::SetNextWindowSize(io.DisplaySize); - ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window - ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); - - ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); - ImVec2 cursorPos = ImGui::GetCursorScreenPos(); - - ImGui::Image(info, contentRegionSize); - ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - - viewManipulateRight = cursorPos.x + contentRegionSize.x; - viewManipulateTop = cursorPos.y; - } - - ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); - - if(params.enableViewManipulate) - ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); - - ImGui::End(); - ImGui::PopStyleColor(); -} - -#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/34_DebugDraw/src/transform.cpp b/34_DebugDraw/src/transform.cpp deleted file mode 100644 index e69de29bb..000000000 From 17b53a858ce5dc0e47878ca9a9e9fb9fe927d86e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 14:46:04 +0700 Subject: [PATCH 005/219] draw aabb from push constant --- 34_DebugDraw/app_resources/simple.vertex.hlsl | 4 +- 34_DebugDraw/main.cpp | 52 +++++++++++++++---- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/34_DebugDraw/app_resources/simple.vertex.hlsl b/34_DebugDraw/app_resources/simple.vertex.hlsl index 9651154bd..5f7b9c429 100644 --- a/34_DebugDraw/app_resources/simple.vertex.hlsl +++ b/34_DebugDraw/app_resources/simple.vertex.hlsl @@ -12,9 +12,9 @@ PSInput main(uint vertexID : SV_VertexID) { PSInput output; - float32_t4 vertex = (bda::__ptr::create(pc.pVertices) + vertexID).deref_restrict().load(); + float32_t3 vertex = (bda::__ptr::create(pc.pVertices) + vertexID).deref_restrict().load(); - output.position = mul(pc.MVP, vertex); + output.position = mul(pc.MVP, float32_t4(vertex, 1)); output.color = float32_t4(1, 0, 0, 1); return output; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 3c29b258b..68fa6593c 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -117,8 +117,9 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti m_surface->recreateSwapchain(); { + std::array vertices = getVerticesFromAABB(testAABB); IGPUBuffer::SCreationParams params; - params.size = sizeof(float32_t4) * vertices.size(); + params.size = sizeof(float32_t3) * vertices.size(); params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; m_utils->createFilledDeviceLocalBufferOnDedMem( @@ -286,7 +287,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); cmdbuf->setLineWidth(1.f); - cmdbuf->draw(vertices.size(), 1, 0, 0); + cmdbuf->draw(24, 1, 0, 0); cmdbuf->endRenderPass(); } @@ -360,6 +361,45 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti } private: + std::array getVerticesFromAABB(core::aabbox3d& aabb) + { + const auto& pMin = aabb.MinEdge; + const auto& pMax = aabb.MaxEdge; + + std::array vertices; + vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); + + vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); + vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); + vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); + + return vertices; + } + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers constexpr static inline uint32_t MaxFramesInFlight = 3u; @@ -383,15 +423,9 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - bool isPerspective = true, isLH = true, flipGizmoY = true, move = false; float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - - bool firstFrame = true; - std::array vertices = { float32_t4(0,0,0,1), float32_t4(10,10,-10,1) }; + core::aabbox3d testAABB = core::aabbox3d({ 0, 0, 0 }, { 10, 10, -10 }); smart_refctd_ptr verticesBuffer; }; From a62cbed4b229e46a1ca59b4682c784d0ce467dd2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 16:59:46 +0700 Subject: [PATCH 006/219] move single aabb stuff into CDrawAABB --- 34_DebugDraw/include/CDrawAABB.h | 45 +++++++++++++++++- 34_DebugDraw/main.cpp | 18 ++++++-- 34_DebugDraw/src/CDrawAABB.cpp | 78 +++++++++++++++++++++++++++++++- 3 files changed, 134 insertions(+), 7 deletions(-) diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 65a8142a1..998e31b7f 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -2,4 +2,47 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla \ No newline at end of file +// TODO move this into nabla + +#include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifndef _NBL_EXT_DRAW_AABB_H_ +#define _NBL_EXT_DRAW_AABB_H_ + +namespace nbl::ext::drawdebug +{ +class DrawAABB final : public core::IReferenceCounted +{ +public: + struct SCreationParameters + { + asset::SPushConstantRange pushConstantRange; + }; + + // creates an instance that draws one AABB via push constant + static core::smart_refctd_ptr create(SCreationParameters&& params); + + // creates an instance that draws multiple AABBs using streaming buffer + // TODO + + // creates default pipeline layout for push constant version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + + inline const SCreationParameters& getCreationParameters() const { return m_creationParams; } + + // records draw command for single AABB, user has to set pipeline outside + bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + +protected: + DrawAABB(SCreationParameters&& _params); + ~DrawAABB() override; + +private: + SCreationParameters m_creationParams; +}; +} + +#endif diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 68fa6593c..5af301f35 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -116,8 +116,17 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); m_surface->recreateSwapchain(); + { + ext::drawdebug::DrawAABB::SCreationParameters params; + params.pushConstantRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SPushConstants) + }; + drawAABB = ext::drawdebug::DrawAABB::create(std::move(params)); + } { - std::array vertices = getVerticesFromAABB(testAABB); + auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB); IGPUBuffer::SCreationParams params; params.size = sizeof(float32_t3) * vertices.size(); params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; @@ -152,8 +161,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, .size = sizeof(SPushConstants) }; - const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); + const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), drawAABB->getCreationParameters().pushConstantRange); IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = pipelineLayout.get(); @@ -286,8 +294,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - cmdbuf->setLineWidth(1.f); - cmdbuf->draw(24, 1, 0, 0); + drawAABB->renderSingle(cmdbuf); cmdbuf->endRenderPass(); } @@ -425,6 +432,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + smart_refctd_ptr drawAABB; core::aabbox3d testAABB = core::aabbox3d({ 0, 0, 0 }, { 10, 10, -10 }); smart_refctd_ptr verticesBuffer; }; diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index 65a8142a1..8dc99f693 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -2,4 +2,80 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla \ No newline at end of file +// TODO move this into nabla + +#include "CDrawAABB.h" + +using namespace nbl; +using namespace hlsl; + +namespace nbl::ext::drawdebug +{ + +core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) +{ + return core::smart_refctd_ptr(new DrawAABB(std::move(params))); +} + +DrawAABB::DrawAABB(SCreationParameters&& _params) + : m_creationParams(_params) +{ +} + +DrawAABB::~DrawAABB() +{ +} + +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) +{ + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + +bool DrawAABB::renderSingle(video::IGPUCommandBuffer* commandBuffer) +{ + commandBuffer->setLineWidth(1.f); + commandBuffer->draw(24, 1, 0, 0); + + return true; +} + +std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) +{ + const auto& pMin = aabb.MinEdge; + const auto& pMax = aabb.MaxEdge; + + std::array vertices; + vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); + + vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); + vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); + vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); + + return vertices; +} + +} From 5fa8874cca7ed3e7162792d2f68a020b99309322 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 2 Jul 2025 12:06:05 +0700 Subject: [PATCH 007/219] default pipeline creation func --- 34_DebugDraw/include/CDrawAABB.h | 2 ++ 34_DebugDraw/main.cpp | 15 +++------------ 34_DebugDraw/src/CDrawAABB.cpp | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 998e31b7f..76fc35612 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -29,6 +29,8 @@ class DrawAABB final : public core::IReferenceCounted // creates default pipeline layout for push constant version static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + static bool createDefaultPipeline(core::smart_refctd_ptr* pipeline, video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + inline const SCreationParameters& getCreationParameters() const { return m_creationParams; } // records draw command for single AABB, user has to set pipeline outside diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 5af301f35..089aafa00 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -163,18 +163,9 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), drawAABB->getCreationParameters().pushConstantRange); - IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = pipelineLayout.get(); - params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main", }; - params[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main", }; - params[0].cached = { - .primitiveAssembly = { - .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - params[0].renderpass = renderpass; - - if (!m_device->createGraphicsPipelines(nullptr, params, &m_pipeline)) + IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; + IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; + if (!ext::drawdebug::DrawAABB::createDefaultPipeline(&m_pipeline, m_device.get(), pipelineLayout.get(), renderpass, vs, fs)) return logFail("Graphics pipeline creation failed"); m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index 8dc99f693..e5c18f636 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -31,6 +31,22 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } +bool DrawAABB::createDefaultPipeline(core::smart_refctd_ptr* pipeline, video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +{ + video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; + params[0].layout = layout; + params[0].vertexShader = vertex; + params[0].fragmentShader = fragment; + params[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + params[0].renderpass = renderpass; + + return device->createGraphicsPipelines(nullptr, params, pipeline); +} + bool DrawAABB::renderSingle(video::IGPUCommandBuffer* commandBuffer) { commandBuffer->setLineWidth(1.f); From 704a0fba4d2f8e49cbab544f75fb9801df6568e4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 3 Jul 2025 17:04:33 +0700 Subject: [PATCH 008/219] got streaming buffer working and drawing --- .../app_resources/multi_aabb.vertex.hlsl | 23 +++ 34_DebugDraw/main.cpp | 159 +++++++++++++++++- 2 files changed, 175 insertions(+), 7 deletions(-) create mode 100644 34_DebugDraw/app_resources/multi_aabb.vertex.hlsl diff --git a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl new file mode 100644 index 000000000..1bd6d85e1 --- /dev/null +++ b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl @@ -0,0 +1,23 @@ +#pragma shader_stage(vertex) + +#include "simple_common.hlsl" + +struct VSInput +{ + [[vk::location(0)]] float32_t3 position : POSITION; +}; + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[shader("vertex")] +PSInput main(VSInput input) +{ + PSInput output; + + output.position = mul(pc.MVP, float32_t4(input.position, 1)); + output.color = float32_t4(0, 1, 0, 1); + + return output; +} \ No newline at end of file diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 089aafa00..527086469 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -138,6 +138,48 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti ).move_into(verticesBuffer); } + // create streaming buffer + // TODO move into CDrawAABB + { + auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_INDIRECT_BUFFER_BIT) | asset::IBuffer::EUF_VERTEX_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; + + auto getRequiredAccessFlags = [&](const bitflag& properties) + { + bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); + + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_READ; + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_WRITE; + + return flags; + }; + + IGPUBuffer::SCreationParams mdiCreationParams = {}; + mdiCreationParams.usage = RequiredUsageFlags; + mdiCreationParams.size = mdiBufferDefaultSize; + + auto buffer = m_device->createBuffer(std::move(mdiCreationParams)); + buffer->setObjectDebugName("MDI Upstream Buffer"); + + auto memoryReqs = buffer->getMemoryReqs(); + memoryReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = m_device->allocate(memoryReqs, buffer.get(), RequiredAllocateFlags); + { + const bool allocated = allocation.isValid(); + assert(allocated); + } + auto memory = allocation.memory; + + if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) + m_logger->log("Could not map device memory!", ILogger::ELL_ERROR); + + streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); + } + auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lparams = {}; @@ -158,15 +200,51 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti return m_device->compileShader({ shaderSrc.get() }); }; - auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); - auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + { + auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); + auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + + const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), drawAABB->getCreationParameters().pushConstantRange); + + IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; + IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; + if (!ext::drawdebug::DrawAABB::createDefaultPipeline(&m_pipeline, m_device.get(), pipelineLayout.get(), renderpass, vs, fs)) + return logFail("Graphics pipeline creation failed"); + } + { + auto vertexShader = compileShader("app_resources/multi_aabb.vertex.hlsl"); + auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + + const auto pipelineLayout = m_device->createPipelineLayout({ &drawAABB->getCreationParameters().pushConstantRange , 1 }, nullptr, nullptr, nullptr, nullptr); - const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), drawAABB->getCreationParameters().pushConstantRange); + SVertexInputParams vertexInputParams{}; + { + vertexInputParams.enabledBindingFlags = 0b1u; + vertexInputParams.enabledAttribFlags = 0b1u; + + vertexInputParams.bindings[0].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + vertexInputParams.bindings[0].stride = sizeof(float32_t3); + + auto& position = vertexInputParams.attributes[0]; + position.format = EF_R32G32B32_SFLOAT; + position.relativeOffset = 0u; + position.binding = 0u; + } - IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; - IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; - if (!ext::drawdebug::DrawAABB::createDefaultPipeline(&m_pipeline, m_device.get(), pipelineLayout.get(), renderpass, vs, fs)) - return logFail("Graphics pipeline creation failed"); + video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; + params[0].layout = pipelineLayout.get(); + params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; + params[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + params[0].cached = { + .vertexInput = vertexInputParams, + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + params[0].renderpass = renderpass; + + m_device->createGraphicsPipelines(nullptr, params, &m_streamingPipeline); + } m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); m_winMgr->show(m_window.get()); @@ -287,8 +365,70 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); drawAABB->renderSingle(cmdbuf); + cmdbuf->bindGraphicsPipeline(m_streamingPipeline.get()); + cmdbuf->pushConstants(m_streamingPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + + // TODO bind vertex buffer (streaming buffer) + const SBufferBinding binding = + { + .offset = 0u, + .buffer = smart_refctd_ptr(streamingBuffer.get()->getBuffer()) + }; + cmdbuf->bindVertexBuffers(0u, 1u, &binding); + + // fill streaming buffer: indirect draw command, then vertex buffer + { + auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB2); + uint32_t indirectDrawCount = 1u; + + using offset_t = streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(VkDrawIndirectCommand), sizeof(float32_t3) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); + + auto* streaming = streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + using suballocator_t = core::LinearAddressAllocatorST; + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t indirectDrawByteOffset = imaginaryChunk.alloc_addr(sizeof(VkDrawIndirectCommand) * indirectDrawCount, sizeof(VkDrawIndirectCommand)); + uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(sizeof(float32_t3) * vertices.size(), sizeof(float32_t3)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + auto* drawIndirectIt = reinterpret_cast(streamingPtr + indirectDrawByteOffset); + for (auto i = 0u; i < indirectDrawCount; i++) + { + drawIndirectIt->firstVertex = 0; + drawIndirectIt->firstInstance = i; + drawIndirectIt->vertexCount = vertices.size(); + drawIndirectIt->instanceCount = 1; + drawIndirectIt++; + } + memcpy(streamingPtr + vertexByteOffset, vertices.data(), sizeof(vertices[0]) * vertices.size()); + + assert(!streaming->needsManualFlushOrInvalidate()); + + // TODO cmdbuf draw indirect + auto mdiBinding = binding; + mdiBinding.offset = indirectDrawByteOffset; + cmdbuf->drawIndirect(binding, 1, sizeof(VkDrawIndirectCommand)); + + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + streaming->multi_deallocate(1, &inputOffset, &totalSize, drawFinished); + } + cmdbuf->endRenderPass(); } + cmdbuf->endDebugMarker(); cmdbuf->end(); { @@ -404,6 +544,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; smart_refctd_ptr m_pipeline; + smart_refctd_ptr m_streamingPipeline; smart_refctd_ptr m_semaphore; smart_refctd_ptr m_cmdPool; uint64_t m_realFrameIx = 0; @@ -425,7 +566,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti smart_refctd_ptr drawAABB; core::aabbox3d testAABB = core::aabbox3d({ 0, 0, 0 }, { 10, 10, -10 }); + core::aabbox3d testAABB2 = core::aabbox3d({ 2, 4, -1 }, { 7, 8, 5 }); smart_refctd_ptr verticesBuffer; + + using streaming_buffer_t = video::StreamingTransientDataBufferST>; + smart_refctd_ptr streamingBuffer; }; NBL_MAIN_FUNC(DebugDrawSampleApp) \ No newline at end of file From a81e62f56b2fbb04f84ae7fe3b2d2cad1ef941a7 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 4 Jul 2025 11:46:23 +0700 Subject: [PATCH 009/219] draw with instances --- 34_DebugDraw/app_resources/common.hlsl | 35 ++++++++ .../app_resources/multi_aabb.vertex.hlsl | 24 +++--- 34_DebugDraw/app_resources/simple.vertex.hlsl | 2 +- 34_DebugDraw/app_resources/simple_common.hlsl | 2 +- 34_DebugDraw/main.cpp | 80 ++++++++++--------- 5 files changed, 93 insertions(+), 50 deletions(-) create mode 100644 34_DebugDraw/app_resources/common.hlsl diff --git a/34_DebugDraw/app_resources/common.hlsl b/34_DebugDraw/app_resources/common.hlsl new file mode 100644 index 000000000..e1a42d5b6 --- /dev/null +++ b/34_DebugDraw/app_resources/common.hlsl @@ -0,0 +1,35 @@ +#ifndef _DRAW_AABB_COMMON_HLSL +#define _DRAW_AABB_COMMON_HLSL + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +struct InstanceData +{ +#ifdef __HLSL_VERSION + float32_t3x4 transform; +#else + float transform[3*4]; +#endif + float32_t3 color; +}; + +struct SPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 MVP; +#else + float MVP[4*4]; +#endif + uint64_t pVertexBuffer; + uint64_t pInstanceBuffer; +}; + +#ifdef __HLSL_VERSION +struct PSInput +{ + float32_t4 position : SV_Position; + float32_t4 color : TEXCOORD0; +}; +#endif + +#endif diff --git a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl index 1bd6d85e1..37673deee 100644 --- a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl +++ b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl @@ -1,23 +1,29 @@ #pragma shader_stage(vertex) -#include "simple_common.hlsl" - -struct VSInput -{ - [[vk::location(0)]] float32_t3 position : POSITION; -}; +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "common.hlsl" using namespace nbl::hlsl; [[vk::push_constant]] SPushConstants pc; [shader("vertex")] -PSInput main(VSInput input) +PSInput main() { PSInput output; - output.position = mul(pc.MVP, float32_t4(input.position, 1)); - output.color = float32_t4(0, 1, 0, 1); + float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); + + float32_t4x4 transform; + transform[0] = instance.transform[0]; + transform[1] = instance.transform[1]; + transform[2] = instance.transform[2]; + transform[3] = float32_t4(0, 0, 0, 1); + float32_t4 position = mul(transform, float32_t4(vertex, 1)); + output.position = mul(pc.MVP, position); + output.color = float32_t4(instance.color, 1); return output; } \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple.vertex.hlsl b/34_DebugDraw/app_resources/simple.vertex.hlsl index 5f7b9c429..9e362ee75 100644 --- a/34_DebugDraw/app_resources/simple.vertex.hlsl +++ b/34_DebugDraw/app_resources/simple.vertex.hlsl @@ -5,7 +5,7 @@ using namespace nbl::hlsl; -[[vk::push_constant]] SPushConstants pc; +[[vk::push_constant]] SSimplePushConstants pc; [shader("vertex")] PSInput main(uint vertexID : SV_VertexID) diff --git a/34_DebugDraw/app_resources/simple_common.hlsl b/34_DebugDraw/app_resources/simple_common.hlsl index 5fc0d0b63..d74d64a8d 100644 --- a/34_DebugDraw/app_resources/simple_common.hlsl +++ b/34_DebugDraw/app_resources/simple_common.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -struct SPushConstants +struct SSimplePushConstants { #ifdef __HLSL_VERSION float32_t4x4 MVP; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 527086469..237b99fac 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -4,6 +4,7 @@ #include "common.hpp" #include "app_resources/simple_common.hlsl" +#include "app_resources/common.hlsl" class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -121,7 +122,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti params.pushConstantRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, - .size = sizeof(SPushConstants) + .size = sizeof(SSimplePushConstants) }; drawAABB = ext::drawdebug::DrawAABB::create(std::move(params)); } @@ -214,8 +215,13 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti { auto vertexShader = compileShader("app_resources/multi_aabb.vertex.hlsl"); auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SPushConstants) + }; - const auto pipelineLayout = m_device->createPipelineLayout({ &drawAABB->getCreationParameters().pushConstantRange , 1 }, nullptr, nullptr, nullptr, nullptr); + const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); SVertexInputParams vertexInputParams{}; { @@ -236,14 +242,14 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; params[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; params[0].cached = { - .vertexInput = vertexInputParams, .primitiveAssembly = { .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, } }; params[0].renderpass = renderpass; - m_device->createGraphicsPipelines(nullptr, params, &m_streamingPipeline); + if (!m_device->createGraphicsPipelines(nullptr, params, &m_streamingPipeline)) + return logFail("Could not create streaming pipeline!"); } m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); @@ -356,33 +362,24 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti .renderArea = currentRenderArea }; - SPushConstants pc; - memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); - pc.pVertices = verticesBuffer->getDeviceAddress(); - - cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - cmdbuf->bindGraphicsPipeline(m_pipeline.get()); - cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - drawAABB->renderSingle(cmdbuf); - - cmdbuf->bindGraphicsPipeline(m_streamingPipeline.get()); - cmdbuf->pushConstants(m_streamingPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - - // TODO bind vertex buffer (streaming buffer) - const SBufferBinding binding = { - .offset = 0u, - .buffer = smart_refctd_ptr(streamingBuffer.get()->getBuffer()) - }; - cmdbuf->bindVertexBuffers(0u, 1u, &binding); + SSimplePushConstants pc; + memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); + pc.pVertices = verticesBuffer->getDeviceAddress(); + + cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); + cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSimplePushConstants), &pc); + drawAABB->renderSingle(cmdbuf); + } // fill streaming buffer: indirect draw command, then vertex buffer { auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB2); - uint32_t indirectDrawCount = 1u; + uint32_t instanceCount = 4u; using offset_t = streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(VkDrawIndirectCommand), sizeof(float32_t3) }); + constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); // allocator initialization needs us to round up to PoT @@ -397,30 +394,35 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t indirectDrawByteOffset = imaginaryChunk.alloc_addr(sizeof(VkDrawIndirectCommand) * indirectDrawCount, sizeof(VkDrawIndirectCommand)); uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(sizeof(float32_t3) * vertices.size(), sizeof(float32_t3)); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); const uint32_t totalSize = imaginaryChunk.get_allocated_size(); std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - auto* drawIndirectIt = reinterpret_cast(streamingPtr + indirectDrawByteOffset); - for (auto i = 0u; i < indirectDrawCount; i++) + memcpy(streamingPtr + vertexByteOffset, vertices.data(), sizeof(vertices[0]) * vertices.size()); + auto* instancesIt = reinterpret_cast(streamingPtr + instancesByteOffset); + for (auto i = 0u; i < instanceCount; i++) { - drawIndirectIt->firstVertex = 0; - drawIndirectIt->firstInstance = i; - drawIndirectIt->vertexCount = vertices.size(); - drawIndirectIt->instanceCount = 1; - drawIndirectIt++; + core::matrix3x4SIMD instanceTransform; + instanceTransform.setTranslation(core::vectorSIMDf(i, 0, i, 0)); + instanceTransform.setScale(core::vectorSIMDf(i, i, i)); + memcpy(instancesIt->transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + instancesIt->color = float32_t3(i * 0.2, 1, 0); + instancesIt++; } - memcpy(streamingPtr + vertexByteOffset, vertices.data(), sizeof(vertices[0]) * vertices.size()); assert(!streaming->needsManualFlushOrInvalidate()); - // TODO cmdbuf draw indirect - auto mdiBinding = binding; - mdiBinding.offset = indirectDrawByteOffset; - cmdbuf->drawIndirect(binding, 1, sizeof(VkDrawIndirectCommand)); + SPushConstants pc; + memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); + pc.pVertexBuffer = streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pInstanceBuffer = streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + cmdbuf->bindGraphicsPipeline(m_streamingPipeline.get()); + cmdbuf->pushConstants(m_streamingPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + cmdbuf->draw(vertices.size(), instanceCount, 0, 0); const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; streaming->multi_deallocate(1, &inputOffset, &totalSize, drawFinished); @@ -565,8 +567,8 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; smart_refctd_ptr drawAABB; - core::aabbox3d testAABB = core::aabbox3d({ 0, 0, 0 }, { 10, 10, -10 }); - core::aabbox3d testAABB2 = core::aabbox3d({ 2, 4, -1 }, { 7, 8, 5 }); + core::aabbox3d testAABB = core::aabbox3d({ -5, -5, -5 }, { 10, 10, -10 }); + core::aabbox3d testAABB2 = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); smart_refctd_ptr verticesBuffer; using streaming_buffer_t = video::StreamingTransientDataBufferST>; From da63edf598390448a2cb5835b61ecb38ec8393c4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 4 Jul 2025 11:49:38 +0700 Subject: [PATCH 010/219] minor bug fix in creating instances --- 34_DebugDraw/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 237b99fac..40f265ae3 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -376,7 +376,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti // fill streaming buffer: indirect draw command, then vertex buffer { auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB2); - uint32_t instanceCount = 4u; + uint32_t instanceCount = 5u; using offset_t = streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); @@ -407,7 +407,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti { core::matrix3x4SIMD instanceTransform; instanceTransform.setTranslation(core::vectorSIMDf(i, 0, i, 0)); - instanceTransform.setScale(core::vectorSIMDf(i, i, i)); + instanceTransform.setScale(core::vectorSIMDf(i+1, i+1, i+1)); memcpy(instancesIt->transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); instancesIt->color = float32_t3(i * 0.2, 1, 0); instancesIt++; From 9ae72f51e507e9e9a579dd463db6fac1ae4f866c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 4 Jul 2025 14:36:11 +0700 Subject: [PATCH 011/219] move handling instances to CDrawAABB --- 34_DebugDraw/include/CDrawAABB.h | 13 ++++++++++--- 34_DebugDraw/src/CDrawAABB.cpp | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 76fc35612..68704a81b 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -4,12 +4,13 @@ // TODO move this into nabla -#include "nbl/video/declarations.h" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - #ifndef _NBL_EXT_DRAW_AABB_H_ #define _NBL_EXT_DRAW_AABB_H_ +#include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "../app_resources/common.hlsl" + namespace nbl::ext::drawdebug { class DrawAABB final : public core::IReferenceCounted @@ -38,12 +39,18 @@ class DrawAABB final : public core::IReferenceCounted static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color = { 1,0,0 }); + protected: DrawAABB(SCreationParameters&& _params); ~DrawAABB() override; private: SCreationParameters m_creationParams; + + std::vector m_instances; + std::array m_unitVertices; + constexpr static inline core::aabbox3d UnitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); }; } diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index e5c18f636..01c3cc550 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -94,4 +94,18 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color) +{ + InstanceData instance; + instance.color = color; + + core::matrix3x4SIMD instanceTransform; + instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); + const auto diagonal = aabb.MaxEdge - aabb.MinEdge; + instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); + memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + + m_instances.push_back(instance); +} + } From 7a22eef770a597526967c0ced0146e86e5d2bf07 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 4 Jul 2025 17:05:34 +0700 Subject: [PATCH 012/219] moved most important streaming stuff to CDrawAABB --- 34_DebugDraw/app_resources/common.hlsl | 2 +- 34_DebugDraw/include/CDrawAABB.h | 36 ++++-- 34_DebugDraw/main.cpp | 30 ++--- 34_DebugDraw/src/CDrawAABB.cpp | 168 ++++++++++++++++++++++++- 4 files changed, 201 insertions(+), 35 deletions(-) diff --git a/34_DebugDraw/app_resources/common.hlsl b/34_DebugDraw/app_resources/common.hlsl index e1a42d5b6..b7690b097 100644 --- a/34_DebugDraw/app_resources/common.hlsl +++ b/34_DebugDraw/app_resources/common.hlsl @@ -10,7 +10,7 @@ struct InstanceData #else float transform[3*4]; #endif - float32_t3 color; + nbl::hlsl::float32_t3 color; }; struct SPushConstants diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 68704a81b..33ff1b1f6 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -16,9 +16,25 @@ namespace nbl::ext::drawdebug class DrawAABB final : public core::IReferenceCounted { public: - struct SCreationParameters + struct SCachedCreationParameters + { + using streaming_buffer_t = video::StreamingTransientDataBufferST>; + + static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : SCachedCreationParameters { asset::SPushConstantRange pushConstantRange; + + core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr renderpass = nullptr; }; // creates an instance that draws one AABB via push constant @@ -30,27 +46,33 @@ class DrawAABB final : public core::IReferenceCounted // creates default pipeline layout for push constant version static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - static bool createDefaultPipeline(core::smart_refctd_ptr* pipeline, video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + static smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - inline const SCreationParameters& getCreationParameters() const { return m_creationParams; } + inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + bool render(video::IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color = { 1,0,0 }); protected: - DrawAABB(SCreationParameters&& _params); + DrawAABB(SCreationParameters&& _params, smart_refctd_ptr pipeline); ~DrawAABB() override; private: - SCreationParameters m_creationParams; + static smart_refctd_ptr createPipeline(SCreationParameters& params); + static bool createStreamingBuffer(SCreationParameters& params); std::vector m_instances; - std::array m_unitVertices; - constexpr static inline core::aabbox3d UnitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + std::array m_unitAABBVertices; + + SCachedCreationParameters m_cachedCreationParams; + + core::smart_refctd_ptr m_pipeline; }; } diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 40f265ae3..edf849657 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -117,13 +117,14 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); m_surface->recreateSwapchain(); + SPushConstantRange simplePcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SSimplePushConstants) + }; { ext::drawdebug::DrawAABB::SCreationParameters params; - params.pushConstantRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, - .size = sizeof(SSimplePushConstants) - }; + params.pushConstantRange = simplePcRange; drawAABB = ext::drawdebug::DrawAABB::create(std::move(params)); } { @@ -205,11 +206,12 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), drawAABB->getCreationParameters().pushConstantRange); + const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; - if (!ext::drawdebug::DrawAABB::createDefaultPipeline(&m_pipeline, m_device.get(), pipelineLayout.get(), renderpass, vs, fs)) + m_pipeline = ext::drawdebug::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); + if (!m_pipeline) return logFail("Graphics pipeline creation failed"); } { @@ -223,20 +225,6 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); - SVertexInputParams vertexInputParams{}; - { - vertexInputParams.enabledBindingFlags = 0b1u; - vertexInputParams.enabledAttribFlags = 0b1u; - - vertexInputParams.bindings[0].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; - vertexInputParams.bindings[0].stride = sizeof(float32_t3); - - auto& position = vertexInputParams.attributes[0]; - position.format = EF_R32G32B32_SFLOAT; - position.relativeOffset = 0u; - position.binding = 0u; - } - video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = pipelineLayout.get(); params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index 01c3cc550..ae57952ce 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -7,6 +7,10 @@ #include "CDrawAABB.h" using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; using namespace hlsl; namespace nbl::ext::drawdebug @@ -14,25 +18,131 @@ namespace nbl::ext::drawdebug core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { - return core::smart_refctd_ptr(new DrawAABB(std::move(params))); + auto* const logger = params.utilities->getLogger(); + + auto pipeline = createPipeline(params); + if (!pipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + if (!createStreamingBuffer(params)) + { + logger->log("Failed to create streaming buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); } -DrawAABB::DrawAABB(SCreationParameters&& _params) - : m_creationParams(_params) +DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) + : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) { + const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + m_unitAABBVertices = getVerticesFromAABB(unitAABB); } DrawAABB::~DrawAABB() { } +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) +{ + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = params.pipelineLayout.get(); + pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; + pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + pipelineParams[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + pipelineParams[0].renderpass = params.renderpass.get(); + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createGraphicsPipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + params.utilities->getLogger()->log("Could not create streaming pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +bool DrawAABB::createStreamingBuffer(SCreationParameters& params) +{ + const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; + + auto getRequiredAccessFlags = [&](const bitflag& properties) + { + bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); + + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_READ; + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_WRITE; + + return flags; + }; + + if (!params.streamingBuffer) + { + IGPUBuffer::SCreationParams mdiCreationParams = {}; + mdiCreationParams.usage = SCachedCreationParameters::RequiredUsageFlags; + mdiCreationParams.size = mdiBufferDefaultSize; + + auto buffer = params.utilities->getLogicalDevice()->createBuffer(std::move(mdiCreationParams)); + buffer->setObjectDebugName("AABB Streaming Buffer"); + + auto memoryReqs = buffer->getMemoryReqs(); + memoryReqs.memoryTypeBits &= params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = params.utilities->getLogicalDevice()->allocate(memoryReqs, buffer.get(), SCachedCreationParameters::RequiredAllocateFlags); + { + const bool allocated = allocation.isValid(); + assert(allocated); + } + auto memory = allocation.memory; + + if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) + params.utilities->getLogger()->log("Could not map device memory!", ILogger::ELL_ERROR); + + params.streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); + } + + auto buffer = params.streamingBuffer->getBuffer(); + auto binding = buffer->getBoundMemory(); + + const auto validation = std::to_array + ({ + std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), + std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) + std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") + }); + + for (const auto& [ok, error] : validation) + if (!ok) + { + params.utilities->getLogger()->log(error, ILogger::ELL_ERROR); + return false; + } + + return true; +} + core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } -bool DrawAABB::createDefaultPipeline(core::smart_refctd_ptr* pipeline, video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) { + smart_refctd_ptr pipeline; + video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = layout; params[0].vertexShader = vertex; @@ -44,10 +154,12 @@ bool DrawAABB::createDefaultPipeline(core::smart_refctd_ptrcreateGraphicsPipelines(nullptr, params, pipeline); + device->createGraphicsPipelines(nullptr, params, &pipeline); + + return pipeline; } -bool DrawAABB::renderSingle(video::IGPUCommandBuffer* commandBuffer) +bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) { commandBuffer->setLineWidth(1.f); commandBuffer->draw(24, 1, 0, 0); @@ -55,6 +167,50 @@ bool DrawAABB::renderSingle(video::IGPUCommandBuffer* commandBuffer) return true; } +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) +{ + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); + + auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + using suballocator_t = core::LinearAddressAllocatorST; + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(sizeof(float32_t3) * m_unitAABBVertices.size(), sizeof(float32_t3)); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * m_instances.size(), sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); + memcpy(streamingPtr + instancesByteOffset, m_instances.data(), sizeof(m_instances[0]) * m_instances.size()); + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); + pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); + commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->draw(m_unitAABBVertices.size(), m_instances.size(), 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + + return true; +} + std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) { const auto& pMin = aabb.MinEdge; From f18bf3872a5d1b35c1dd84ae2b514c814184bc98 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 7 Jul 2025 11:33:52 +0700 Subject: [PATCH 013/219] moved most core func to CDrawAABB --- 34_DebugDraw/include/CDrawAABB.h | 16 ++-- 34_DebugDraw/main.cpp | 130 ++++--------------------------- 34_DebugDraw/src/CDrawAABB.cpp | 39 ++++++++++ 3 files changed, 63 insertions(+), 122 deletions(-) diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 33ff1b1f6..3c8925fbd 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -31,7 +31,8 @@ class DrawAABB final : public core::IReferenceCounted struct SCreationParameters : SCachedCreationParameters { - asset::SPushConstantRange pushConstantRange; + core::smart_refctd_ptr assetManager = nullptr; + system::path localInputCWD; // TODO replace when working from nbl/ext core::smart_refctd_ptr pipelineLayout; core::smart_refctd_ptr renderpass = nullptr; @@ -46,25 +47,30 @@ class DrawAABB final : public core::IReferenceCounted // creates default pipeline layout for push constant version static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - static smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + // creates default pipeline layout for streaming version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - bool render(video::IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color = { 1,0,0 }); + void clearAABBs(); + protected: - DrawAABB(SCreationParameters&& _params, smart_refctd_ptr pipeline); + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); ~DrawAABB() override; private: - static smart_refctd_ptr createPipeline(SCreationParameters& params); + static core::smart_refctd_ptr createPipeline(SCreationParameters& params); static bool createStreamingBuffer(SCreationParameters& params); std::vector m_instances; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index edf849657..40362b1f8 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -123,8 +123,12 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti .size = sizeof(SSimplePushConstants) }; { - ext::drawdebug::DrawAABB::SCreationParameters params; - params.pushConstantRange = simplePcRange; + ext::drawdebug::DrawAABB::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.localInputCWD = localInputCWD; + params.pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; drawAABB = ext::drawdebug::DrawAABB::create(std::move(params)); } { @@ -140,48 +144,6 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti ).move_into(verticesBuffer); } - // create streaming buffer - // TODO move into CDrawAABB - { - auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_INDIRECT_BUFFER_BIT) | asset::IBuffer::EUF_VERTEX_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; - - auto getRequiredAccessFlags = [&](const bitflag& properties) - { - bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); - - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_READ; - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_WRITE; - - return flags; - }; - - IGPUBuffer::SCreationParams mdiCreationParams = {}; - mdiCreationParams.usage = RequiredUsageFlags; - mdiCreationParams.size = mdiBufferDefaultSize; - - auto buffer = m_device->createBuffer(std::move(mdiCreationParams)); - buffer->setObjectDebugName("MDI Upstream Buffer"); - - auto memoryReqs = buffer->getMemoryReqs(); - memoryReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - - auto allocation = m_device->allocate(memoryReqs, buffer.get(), RequiredAllocateFlags); - { - const bool allocated = allocation.isValid(); - assert(allocated); - } - auto memory = allocation.memory; - - if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) - m_logger->log("Could not map device memory!", ILogger::ELL_ERROR); - - streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); - } - auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lparams = {}; @@ -214,31 +176,6 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti if (!m_pipeline) return logFail("Graphics pipeline creation failed"); } - { - auto vertexShader = compileShader("app_resources/multi_aabb.vertex.hlsl"); - auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, - .size = sizeof(SPushConstants) - }; - - const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); - - video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = pipelineLayout.get(); - params[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; - params[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; - params[0].cached = { - .primitiveAssembly = { - .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - params[0].renderpass = renderpass; - - if (!m_device->createGraphicsPipelines(nullptr, params, &m_streamingPipeline)) - return logFail("Could not create streaming pipeline!"); - } m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); m_winMgr->show(m_window.get()); @@ -361,59 +298,18 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti drawAABB->renderSingle(cmdbuf); } - // fill streaming buffer: indirect draw command, then vertex buffer { - auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB2); - uint32_t instanceCount = 5u; - - using offset_t = streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); - // allocator initialization needs us to round up to PoT - const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); - - auto* streaming = streamingBuffer.get(); - - auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); - assert(streamingPtr); - - using suballocator_t = core::LinearAddressAllocatorST; - offset_t inputOffset = 0u; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(sizeof(float32_t3) * vertices.size(), sizeof(float32_t3)); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - - memcpy(streamingPtr + vertexByteOffset, vertices.data(), sizeof(vertices[0]) * vertices.size()); - auto* instancesIt = reinterpret_cast(streamingPtr + instancesByteOffset); - for (auto i = 0u; i < instanceCount; i++) + const uint32_t aabbCount = 4u; + drawAABB->clearAABBs(); + for (auto i = 0u; i < aabbCount; i++) { - core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(i, 0, i, 0)); - instanceTransform.setScale(core::vectorSIMDf(i+1, i+1, i+1)); - memcpy(instancesIt->transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); - instancesIt->color = float32_t3(i * 0.2, 1, 0); - instancesIt++; + float i2 = (i+1) * 2; + core::aabbox3d aabb = { float(i), 0.f, float(i), i2+i, i2, i2+i}; + drawAABB->addAABB(aabb); } - assert(!streaming->needsManualFlushOrInvalidate()); - - SPushConstants pc; - memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); - pc.pVertexBuffer = streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; - pc.pInstanceBuffer = streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - - cmdbuf->bindGraphicsPipeline(m_streamingPipeline.get()); - cmdbuf->pushConstants(m_streamingPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - cmdbuf->draw(vertices.size(), instanceCount, 0, 0); - const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - streaming->multi_deallocate(1, &inputOffset, &totalSize, drawFinished); + drawAABB->render(cmdbuf, drawFinished, modelViewProjectionMatrix.pointer()); } cmdbuf->endRenderPass(); diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index ae57952ce..defd67bbb 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -49,6 +49,30 @@ DrawAABB::~DrawAABB() smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) { + auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = params.utilities->getLogger(); + lparams.workingDirectory = params.localInputCWD; + auto bundle = params.assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) + { + params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); + exit(-1); + } + + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + if (!shaderSrc) + return nullptr; + + return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); + }; + + auto vertexShader = compileShader("app_resources/multi_aabb.vertex.hlsl"); + auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = params.pipelineLayout.get(); pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; @@ -139,6 +163,16 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) +{ + SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SPushConstants) + }; + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) { smart_refctd_ptr pipeline; @@ -264,4 +298,9 @@ void DrawAABB::addAABB(const core::aabbox3d& aabb, const hlsl::float32_t3 m_instances.push_back(instance); } +void DrawAABB::clearAABBs() +{ + m_instances.clear(); +} + } From 09ef478a818b4b860be8a2dcfe8192323e3549b5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 11:55:46 +0700 Subject: [PATCH 014/219] handle streaming buffer overflow --- 34_DebugDraw/app_resources/common.hlsl | 2 +- .../app_resources/multi_aabb.vertex.hlsl | 2 +- 34_DebugDraw/include/CDrawAABB.h | 2 +- 34_DebugDraw/main.cpp | 2 +- 34_DebugDraw/src/CDrawAABB.cpp | 64 +++++++++++-------- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/34_DebugDraw/app_resources/common.hlsl b/34_DebugDraw/app_resources/common.hlsl index b7690b097..6e42628ed 100644 --- a/34_DebugDraw/app_resources/common.hlsl +++ b/34_DebugDraw/app_resources/common.hlsl @@ -10,7 +10,7 @@ struct InstanceData #else float transform[3*4]; #endif - nbl::hlsl::float32_t3 color; + nbl::hlsl::float32_t4 color; }; struct SPushConstants diff --git a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl index 37673deee..ab7a51833 100644 --- a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl +++ b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl @@ -23,7 +23,7 @@ PSInput main() transform[3] = float32_t4(0, 0, 0, 1); float32_t4 position = mul(transform, float32_t4(vertex, 1)); output.position = mul(pc.MVP, position); - output.color = float32_t4(instance.color, 1); + output.color = instance.color; return output; } \ No newline at end of file diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h index 3c8925fbd..d474426f7 100644 --- a/34_DebugDraw/include/CDrawAABB.h +++ b/34_DebugDraw/include/CDrawAABB.h @@ -61,7 +61,7 @@ class DrawAABB final : public core::IReferenceCounted static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color = { 1,0,0 }); + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 40362b1f8..bf4decb07 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -305,7 +305,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti { float i2 = (i+1) * 2; core::aabbox3d aabb = { float(i), 0.f, float(i), i2+i, i2, i2+i}; - drawAABB->addAABB(aabb); + drawAABB->addAABB(aabb, {1,0,0,(i+1)*0.2}); } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp index defd67bbb..085b0fb9c 100644 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ b/34_DebugDraw/src/CDrawAABB.cpp @@ -215,32 +215,44 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); - using suballocator_t = core::LinearAddressAllocatorST; - offset_t inputOffset = 0u; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(sizeof(float32_t3) * m_unitAABBVertices.size(), sizeof(float32_t3)); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * m_instances.size(), sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - - memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); - memcpy(streamingPtr + instancesByteOffset, m_instances.data(), sizeof(m_instances[0]) * m_instances.size()); - - assert(!streaming->needsManualFlushOrInvalidate()); + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once - SPushConstants pc; - memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); - pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); - commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->draw(m_unitAABBVertices.size(), m_instances.size(), 0, 0); - - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + auto instancesIt = m_instances.begin(); + const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); + const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; + const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); + using suballocator_t = core::LinearAddressAllocatorST; + while (instancesIt != m_instances.end()) + { + const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); + memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); + instancesIt += instanceCount; + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); + pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + } + // end loop return true; } @@ -284,7 +296,7 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t3& color) +void DrawAABB::addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) { InstanceData instance; instance.color = color; From aee85b4a4fa51de22ccd640ba3ae338911782429 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 14:00:55 +0700 Subject: [PATCH 015/219] update example scene --- 34_DebugDraw/main.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index bf4decb07..40ccfda9d 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -3,8 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" #include "app_resources/simple_common.hlsl" -#include "app_resources/common.hlsl" class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -299,13 +299,18 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti } { - const uint32_t aabbCount = 4u; + std::mt19937 gen(42); + std::uniform_real_distribution translate_dis(-50.f, 50.f); + std::uniform_real_distribution scale_dis(1.f, 10.f); + std::uniform_real_distribution color_dis(0.f, 1.f); + const uint32_t aabbCount = 200u; drawAABB->clearAABBs(); for (auto i = 0u; i < aabbCount; i++) { - float i2 = (i+1) * 2; - core::aabbox3d aabb = { float(i), 0.f, float(i), i2+i, i2, i2+i}; - drawAABB->addAABB(aabb, {1,0,0,(i+1)*0.2}); + core::vector3d pmin = { translate_dis(gen), translate_dis(gen), translate_dis(gen) }; + core::vector3d pmax = pmin + core::vector3d{ scale_dis(gen), scale_dis(gen), scale_dis(gen) }; + core::aabbox3d aabb = { pmin, pmax }; + drawAABB->addAABB(aabb, { color_dis(gen),color_dis(gen),color_dis(gen),1}); } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; From 738269ede1b9ee83cd5e44f86e290852ce6b0127 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 15:43:36 +0700 Subject: [PATCH 016/219] use debug draw extension --- 34_DebugDraw/CMakeLists.txt | 20 +- 34_DebugDraw/app_resources/common.hlsl | 35 -- .../app_resources/multi_aabb.vertex.hlsl | 29 -- 34_DebugDraw/include/CDrawAABB.h | 85 ----- 34_DebugDraw/include/common.hpp | 3 +- 34_DebugDraw/main.cpp | 16 +- 34_DebugDraw/src/CDrawAABB.cpp | 318 ------------------ 7 files changed, 15 insertions(+), 491 deletions(-) delete mode 100644 34_DebugDraw/app_resources/common.hlsl delete mode 100644 34_DebugDraw/app_resources/multi_aabb.vertex.hlsl delete mode 100644 34_DebugDraw/include/CDrawAABB.h delete mode 100644 34_DebugDraw/src/CDrawAABB.cpp diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt index 60c07b1b7..557280430 100644 --- a/34_DebugDraw/CMakeLists.txt +++ b/34_DebugDraw/CMakeLists.txt @@ -1,22 +1,14 @@ -if(NBL_BUILD_IMGUI) - set(NBL_EXTRA_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/src/CDrawAABB.cpp" # TODO remove when moved to nabla - ) - +if(NBL_BUILD_DEBUG_DRAW) set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" ) - # TODO remove - list(APPEND NBL_LIBRARIES - imtestengine - imguizmo - "${NBL_EXT_IMGUI_UI_LIB}" - ) + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) - # TODO probably remove when moved to nabla if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) set(RESOURCE_DIR "app_resources") @@ -34,4 +26,4 @@ if(NBL_BUILD_IMGUI) LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) endif() -endif() \ No newline at end of file +endif() diff --git a/34_DebugDraw/app_resources/common.hlsl b/34_DebugDraw/app_resources/common.hlsl deleted file mode 100644 index 6e42628ed..000000000 --- a/34_DebugDraw/app_resources/common.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _DRAW_AABB_COMMON_HLSL -#define _DRAW_AABB_COMMON_HLSL - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -struct InstanceData -{ -#ifdef __HLSL_VERSION - float32_t3x4 transform; -#else - float transform[3*4]; -#endif - nbl::hlsl::float32_t4 color; -}; - -struct SPushConstants -{ -#ifdef __HLSL_VERSION - float32_t4x4 MVP; -#else - float MVP[4*4]; -#endif - uint64_t pVertexBuffer; - uint64_t pInstanceBuffer; -}; - -#ifdef __HLSL_VERSION -struct PSInput -{ - float32_t4 position : SV_Position; - float32_t4 color : TEXCOORD0; -}; -#endif - -#endif diff --git a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl b/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl deleted file mode 100644 index ab7a51833..000000000 --- a/34_DebugDraw/app_resources/multi_aabb.vertex.hlsl +++ /dev/null @@ -1,29 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "common.hlsl" - -using namespace nbl::hlsl; - -[[vk::push_constant]] SPushConstants pc; - -[shader("vertex")] -PSInput main() -{ - PSInput output; - - float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); - InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); - - float32_t4x4 transform; - transform[0] = instance.transform[0]; - transform[1] = instance.transform[1]; - transform[2] = instance.transform[2]; - transform[3] = float32_t4(0, 0, 0, 1); - float32_t4 position = mul(transform, float32_t4(vertex, 1)); - output.position = mul(pc.MVP, position); - output.color = instance.color; - - return output; -} \ No newline at end of file diff --git a/34_DebugDraw/include/CDrawAABB.h b/34_DebugDraw/include/CDrawAABB.h deleted file mode 100644 index d474426f7..000000000 --- a/34_DebugDraw/include/CDrawAABB.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// TODO move this into nabla - -#ifndef _NBL_EXT_DRAW_AABB_H_ -#define _NBL_EXT_DRAW_AABB_H_ - -#include "nbl/video/declarations.h" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "../app_resources/common.hlsl" - -namespace nbl::ext::drawdebug -{ -class DrawAABB final : public core::IReferenceCounted -{ -public: - struct SCachedCreationParameters - { - using streaming_buffer_t = video::StreamingTransientDataBufferST>; - - static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - - core::smart_refctd_ptr utilities; - - //! optional, default MDI buffer allocated if not provided - core::smart_refctd_ptr streamingBuffer = nullptr; - }; - - struct SCreationParameters : SCachedCreationParameters - { - core::smart_refctd_ptr assetManager = nullptr; - system::path localInputCWD; // TODO replace when working from nbl/ext - - core::smart_refctd_ptr pipelineLayout; - core::smart_refctd_ptr renderpass = nullptr; - }; - - // creates an instance that draws one AABB via push constant - static core::smart_refctd_ptr create(SCreationParameters&& params); - - // creates an instance that draws multiple AABBs using streaming buffer - // TODO - - // creates default pipeline layout for push constant version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - - // creates default pipeline layout for streaming version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); - - static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - - inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } - - // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); - - static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - - void clearAABBs(); - -protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); - ~DrawAABB() override; - -private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params); - static bool createStreamingBuffer(SCreationParameters& params); - - std::vector m_instances; - std::array m_unitAABBVertices; - - SCachedCreationParameters m_cachedCreationParams; - - core::smart_refctd_ptr m_pipeline; -}; -} - -#endif diff --git a/34_DebugDraw/include/common.hpp b/34_DebugDraw/include/common.hpp index 599c9a2e9..e70eb47a8 100644 --- a/34_DebugDraw/include/common.hpp +++ b/34_DebugDraw/include/common.hpp @@ -8,7 +8,8 @@ #include "nbl/examples/common/CEventCallback.hpp" #include "nbl/examples/examples.hpp" -#include "CDrawAABB.h" +//#include "nbl/CDrawAABB.h" +#include "nbl/ext/DebugDraw/CDrawAABB.h" using namespace nbl; using namespace core; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 40ccfda9d..937d699b8 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" -#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" #include "app_resources/simple_common.hlsl" class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication @@ -123,16 +122,15 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti .size = sizeof(SSimplePushConstants) }; { - ext::drawdebug::DrawAABB::SCreationParameters params = {}; + ext::debugdraw::DrawAABB::SCreationParameters params = {}; params.assetManager = m_assetMgr; - params.localInputCWD = localInputCWD; - params.pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; - drawAABB = ext::drawdebug::DrawAABB::create(std::move(params)); + drawAABB = ext::debugdraw::DrawAABB::create(std::move(params)); } { - auto vertices = ext::drawdebug::DrawAABB::getVerticesFromAABB(testAABB); + auto vertices = ext::debugdraw::DrawAABB::getVerticesFromAABB(testAABB); IGPUBuffer::SCreationParams params; params.size = sizeof(float32_t3) * vertices.size(); params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; @@ -168,11 +166,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - const auto pipelineLayout = ext::drawdebug::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); + const auto pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; - m_pipeline = ext::drawdebug::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); + m_pipeline = ext::debugdraw::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); if (!m_pipeline) return logFail("Graphics pipeline creation failed"); } @@ -455,7 +453,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - smart_refctd_ptr drawAABB; + smart_refctd_ptr drawAABB; core::aabbox3d testAABB = core::aabbox3d({ -5, -5, -5 }, { 10, 10, -10 }); core::aabbox3d testAABB2 = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); smart_refctd_ptr verticesBuffer; diff --git a/34_DebugDraw/src/CDrawAABB.cpp b/34_DebugDraw/src/CDrawAABB.cpp deleted file mode 100644 index 085b0fb9c..000000000 --- a/34_DebugDraw/src/CDrawAABB.cpp +++ /dev/null @@ -1,318 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// TODO move this into nabla - -#include "CDrawAABB.h" - -using namespace nbl; -using namespace core; -using namespace video; -using namespace system; -using namespace asset; -using namespace hlsl; - -namespace nbl::ext::drawdebug -{ - -core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) -{ - auto* const logger = params.utilities->getLogger(); - - auto pipeline = createPipeline(params); - if (!pipeline) - { - logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); - return nullptr; - } - - if (!createStreamingBuffer(params)) - { - logger->log("Failed to create streaming buffer!", ILogger::ELL_ERROR); - return nullptr; - } - - return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); -} - -DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) - : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) -{ - const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - m_unitAABBVertices = getVerticesFromAABB(unitAABB); -} - -DrawAABB::~DrawAABB() -{ -} - -smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) -{ - auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = params.utilities->getLogger(); - lparams.workingDirectory = params.localInputCWD; - auto bundle = params.assetManager->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); - exit(-1); - } - - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; - - return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); - }; - - auto vertexShader = compileShader("app_resources/multi_aabb.vertex.hlsl"); - auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - - video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; - pipelineParams[0].layout = params.pipelineLayout.get(); - pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; - pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; - pipelineParams[0].cached = { - .primitiveAssembly = { - .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - pipelineParams[0].renderpass = params.renderpass.get(); - - smart_refctd_ptr pipeline; - params.utilities->getLogicalDevice()->createGraphicsPipelines(nullptr, pipelineParams, &pipeline); - if (!pipeline) - { - params.utilities->getLogger()->log("Could not create streaming pipeline!", ILogger::ELL_ERROR); - return nullptr; - } - - return pipeline; -} - -bool DrawAABB::createStreamingBuffer(SCreationParameters& params) -{ - const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; - - auto getRequiredAccessFlags = [&](const bitflag& properties) - { - bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); - - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_READ; - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_WRITE; - - return flags; - }; - - if (!params.streamingBuffer) - { - IGPUBuffer::SCreationParams mdiCreationParams = {}; - mdiCreationParams.usage = SCachedCreationParameters::RequiredUsageFlags; - mdiCreationParams.size = mdiBufferDefaultSize; - - auto buffer = params.utilities->getLogicalDevice()->createBuffer(std::move(mdiCreationParams)); - buffer->setObjectDebugName("AABB Streaming Buffer"); - - auto memoryReqs = buffer->getMemoryReqs(); - memoryReqs.memoryTypeBits &= params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - - auto allocation = params.utilities->getLogicalDevice()->allocate(memoryReqs, buffer.get(), SCachedCreationParameters::RequiredAllocateFlags); - { - const bool allocated = allocation.isValid(); - assert(allocated); - } - auto memory = allocation.memory; - - if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) - params.utilities->getLogger()->log("Could not map device memory!", ILogger::ELL_ERROR); - - params.streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); - } - - auto buffer = params.streamingBuffer->getBuffer(); - auto binding = buffer->getBoundMemory(); - - const auto validation = std::to_array - ({ - std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), - std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), - std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), - std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) - std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") - }); - - for (const auto& [ok, error] : validation) - if (!ok) - { - params.utilities->getLogger()->log(error, ILogger::ELL_ERROR); - return false; - } - - return true; -} - -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) -{ - return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); -} - -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) -{ - SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, - .size = sizeof(SPushConstants) - }; - return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); -} - -smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) -{ - smart_refctd_ptr pipeline; - - video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = layout; - params[0].vertexShader = vertex; - params[0].fragmentShader = fragment; - params[0].cached = { - .primitiveAssembly = { - .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - params[0].renderpass = renderpass; - - device->createGraphicsPipelines(nullptr, params, &pipeline); - - return pipeline; -} - -bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) -{ - commandBuffer->setLineWidth(1.f); - commandBuffer->draw(24, 1, 0, 0); - - return true; -} - -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) -{ - using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); - // allocator initialization needs us to round up to PoT - const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); - - auto* streaming = m_cachedCreationParams.streamingBuffer.get(); - - auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); - assert(streamingPtr); - - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once - - auto instancesIt = m_instances.begin(); - const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); - const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; - const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); - using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != m_instances.end()) - { - const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); - offset_t inputOffset = 0u; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - - memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); - memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); - instancesIt += instanceCount; - - assert(!streaming->needsManualFlushOrInvalidate()); - - SPushConstants pc; - memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); - pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - - commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); - - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); - } - // end loop - - return true; -} - -std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) -{ - const auto& pMin = aabb.MinEdge; - const auto& pMax = aabb.MaxEdge; - - std::array vertices; - vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); - - vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); - vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); - vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); - - return vertices; -} - -void DrawAABB::addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) -{ - InstanceData instance; - instance.color = color; - - core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); - const auto diagonal = aabb.MaxEdge - aabb.MinEdge; - instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); - memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); - - m_instances.push_back(instance); -} - -void DrawAABB::clearAABBs() -{ - m_instances.clear(); -} - -} From 61b1c0085ecb5add2b06a71c705b6a91143fcbb3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 17:16:37 +0700 Subject: [PATCH 017/219] removed old mesh loaders --- 29_MeshLoaders/CMakeLists.txt | 37 - 29_MeshLoaders/config.json.template | 28 - 29_MeshLoaders/main.cpp | 1404 --------------------------- 29_MeshLoaders/pipeline.groovy | 50 - 4 files changed, 1519 deletions(-) delete mode 100644 29_MeshLoaders/CMakeLists.txt delete mode 100644 29_MeshLoaders/config.json.template delete mode 100644 29_MeshLoaders/main.cpp delete mode 100644 29_MeshLoaders/pipeline.groovy diff --git a/29_MeshLoaders/CMakeLists.txt b/29_MeshLoaders/CMakeLists.txt deleted file mode 100644 index 07b0fd396..000000000 --- a/29_MeshLoaders/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -if(NBL_BUILD_IMGUI) - set(NBL_INCLUDE_SERACH_DIRECTORIES - "${CMAKE_CURRENT_SOURCE_DIR}/include" - ) - - list(APPEND NBL_LIBRARIES - imtestengine - "${NBL_EXT_IMGUI_UI_LIB}" - ) - - nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - - if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) - endif() -endif() - - diff --git a/29_MeshLoaders/config.json.template b/29_MeshLoaders/config.json.template deleted file mode 100644 index 2c42b001d..000000000 --- a/29_MeshLoaders/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [ "NBL_BUILD_MITSUBA_LOADER" ] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/29_MeshLoaders/main.cpp b/29_MeshLoaders/main.cpp deleted file mode 100644 index 6afb74a5c..000000000 --- a/29_MeshLoaders/main.cpp +++ /dev/null @@ -1,1404 +0,0 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include -#include "nbl/asset/utils/CGeometryCreator.h" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - -#include -#include - -using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; - - -class MeshLoadersApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication -{ - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; - - constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; - constexpr static inline uint32_t MaxFramesInFlight = 3u; - constexpr static inline uint8_t MaxUITextureCount = 1u; - - - public: - inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) - { - } - - inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override - { - auto retval = device_base_t::getPreferredDeviceFeatures(); - retval.accelerationStructure = true; - retval.rayQuery = true; - return retval; - } - - inline core::vector getSurfaces() const override - { - if (!m_surface) - { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WIN_W; - params.height = WIN_H; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "MeshLoadersApp"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } - - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); - } - - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; - - return {}; - } - - // so that we can use the same queue for asset converter and rendering - inline core::vector getQueueRequirements() const override - { - auto reqs = device_base_t::getQueueRequirements(); - reqs.front().requiredFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT; - reqs.front().requiredFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; - return reqs; - } - - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - -#if 0 - // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto sourceRaw = IAsset::castDown(assets[0]); - if (!sourceRaw) - return nullptr; - - return m_device->createShader({ sourceRaw.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - }; - - // load shaders - const auto raygenShader = loadCompileAndCreateShader("app_resources/raytrace.rgen.hlsl"); - const auto closestHitShader = loadCompileAndCreateShader("app_resources/raytrace.rchit.hlsl"); - const auto proceduralClosestHitShader = loadCompileAndCreateShader("app_resources/raytrace_procedural.rchit.hlsl"); - const auto intersectionHitShader = loadCompileAndCreateShader("app_resources/raytrace.rint.hlsl"); - const auto anyHitShaderColorPayload = loadCompileAndCreateShader("app_resources/raytrace.rahit.hlsl"); - const auto anyHitShaderShadowPayload = loadCompileAndCreateShader("app_resources/raytrace_shadow.rahit.hlsl"); - const auto missShader = loadCompileAndCreateShader("app_resources/raytrace.rmiss.hlsl"); - const auto missShadowShader = loadCompileAndCreateShader("app_resources/raytrace_shadow.rmiss.hlsl"); - const auto directionalLightCallShader = loadCompileAndCreateShader("app_resources/light_directional.rcall.hlsl"); - const auto pointLightCallShader = loadCompileAndCreateShader("app_resources/light_point.rcall.hlsl"); - const auto spotLightCallShader = loadCompileAndCreateShader("app_resources/light_spot.rcall.hlsl"); - const auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); -#endif - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - auto gQueue = getGraphicsQueue(); - - // Create renderpass and init surface - nbl::video::IGPURenderpass* renderpass; - { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); - - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = - { - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - renderpass = scResources->getRenderpass(); - - if (!renderpass) - return logFail("Failed to create Renderpass!"); - - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - } -#if 0 - auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - - m_converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - - for (auto i = 0u; i < MaxFramesInFlight; i++) - { - if (!pool) - return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) - return logFail("Couldn't create Command Buffer!"); - } -#endif - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); - -#if 0 - // create output images - m_hdrImage = m_device->createImage({ - { - .type = IGPUImage::ET_2D, - .samples = ICPUImage::ESCF_1_BIT, - .format = EF_R16G16B16A16_SFLOAT, - .extent = {WIN_W, WIN_H, 1}, - .mipLevels = 1, - .arrayLayers = 1, - .flags = IImage::ECF_NONE, - .usage = bitflag(IImage::EUF_STORAGE_BIT) | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_SAMPLED_BIT - } - }); - - if (!m_hdrImage || !m_device->allocate(m_hdrImage->getMemoryReqs(), m_hdrImage.get()).isValid()) - return logFail("Could not create HDR Image"); - - m_hdrImageView = m_device->createImageView({ - .flags = IGPUImageView::ECF_NONE, - .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT | IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT, - .image = m_hdrImage, - .viewType = IGPUImageView::E_TYPE::ET_2D, - .format = asset::EF_R16G16B16A16_SFLOAT - }); - - - - // ray trace pipeline and descriptor set layout setup - { - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, - } - }; - const auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - - const std::array dsLayoutPtrs = { descriptorSetLayout.get() }; - m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); - m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(descriptorSetLayout); - - const SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, - .offset = 0u, - .size = sizeof(SPushConstants), - }; - const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); - - IGPURayTracingPipeline::SCreationParams params = {}; - - enum RtDemoShader - { - RTDS_RAYGEN, - RTDS_MISS, - RTDS_MISS_SHADOW, - RTDS_CLOSEST_HIT, - RTDS_SPHERE_CLOSEST_HIT, - RTDS_ANYHIT_PRIMARY, - RTDS_ANYHIT_SHADOW, - RTDS_INTERSECTION, - RTDS_DIRECTIONAL_CALL, - RTDS_POINT_CALL, - RTDS_SPOT_CALL, - RTDS_COUNT - }; - - IGPUShader::SSpecInfo shaders[RTDS_COUNT]; - shaders[RTDS_RAYGEN] = { .shader = raygenShader.get() }; - shaders[RTDS_MISS] = { .shader = missShader.get() }; - shaders[RTDS_MISS_SHADOW] = { .shader = missShadowShader.get() }; - shaders[RTDS_CLOSEST_HIT] = { .shader = closestHitShader.get() }; - shaders[RTDS_SPHERE_CLOSEST_HIT] = { .shader = proceduralClosestHitShader.get() }; - shaders[RTDS_ANYHIT_PRIMARY] = { .shader = anyHitShaderColorPayload.get() }; - shaders[RTDS_ANYHIT_SHADOW] = { .shader = anyHitShaderShadowPayload.get() }; - shaders[RTDS_INTERSECTION] = { .shader = intersectionHitShader.get() }; - shaders[RTDS_DIRECTIONAL_CALL] = { .shader = directionalLightCallShader.get() }; - shaders[RTDS_POINT_CALL] = { .shader = pointLightCallShader.get() }; - shaders[RTDS_SPOT_CALL] = { .shader = spotLightCallShader.get() }; - - params.layout = pipelineLayout.get(); - params.shaders = std::span(shaders); - using RayTracingFlags = IGPURayTracingPipeline::SCreationParams::FLAGS; - params.flags = core::bitflag(RayTracingFlags::NO_NULL_MISS_SHADERS) | - RayTracingFlags::NO_NULL_INTERSECTION_SHADERS | - RayTracingFlags::NO_NULL_ANY_HIT_SHADERS; - - auto& shaderGroups = params.shaderGroups; - - shaderGroups.raygen = { .index = RTDS_RAYGEN }; - - IRayTracingPipelineBase::SGeneralShaderGroup missGroups[EMT_COUNT]; - missGroups[EMT_PRIMARY] = { .index = RTDS_MISS }; - missGroups[EMT_OCCLUSION] = { .index = RTDS_MISS_SHADOW }; - shaderGroups.misses = missGroups; - - auto getHitGroupIndex = [](E_GEOM_TYPE geomType, E_RAY_TYPE rayType) - { - return geomType * ERT_COUNT + rayType; - }; - IRayTracingPipelineBase::SHitShaderGroup hitGroups[E_RAY_TYPE::ERT_COUNT * E_GEOM_TYPE::EGT_COUNT]; - hitGroups[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { - .closestHit = RTDS_CLOSEST_HIT, - .anyHit = RTDS_ANYHIT_PRIMARY, - }; - hitGroups[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { - .closestHit = IGPURayTracingPipeline::SGeneralShaderGroup::Unused, - .anyHit = RTDS_ANYHIT_SHADOW, - }; - hitGroups[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { - .closestHit = RTDS_SPHERE_CLOSEST_HIT, - .anyHit = RTDS_ANYHIT_PRIMARY, - .intersection = RTDS_INTERSECTION, - }; - hitGroups[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { - .closestHit = IGPURayTracingPipeline::SGeneralShaderGroup::Unused, - .anyHit = RTDS_ANYHIT_SHADOW, - .intersection = RTDS_INTERSECTION, - }; - shaderGroups.hits = hitGroups; - - IRayTracingPipelineBase::SGeneralShaderGroup callableGroups[ELT_COUNT]; - callableGroups[ELT_DIRECTIONAL] = { .index = RTDS_DIRECTIONAL_CALL }; - callableGroups[ELT_POINT] = { .index = RTDS_POINT_CALL }; - callableGroups[ELT_SPOT] = { .index = RTDS_SPOT_CALL }; - shaderGroups.callables = callableGroups; - - params.cached.maxRecursionDepth = 1; - params.cached.dynamicStackSize = true; - - if (!m_device->createRayTracingPipelines(nullptr, { ¶ms, 1 }, &m_rayTracingPipeline)) - return logFail("Failed to create ray tracing pipeline"); - - calculateRayTracingStackSize(m_rayTracingPipeline); - - if (!createShaderBindingTable(m_rayTracingPipeline)) - return logFail("Could not create shader binding table"); - - } - - auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); - auto* geometryCreator = assetManager->getGeometryCreator(); - - if (!createIndirectBuffer()) - return logFail("Could not create indirect buffer"); - - if (!createAccelerationStructuresFromGeometry(geometryCreator)) - return logFail("Could not create acceleration structures from geometry creator"); - - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 - }; - auto defaultSampler = m_device->createSampler(samplerParams); - - { - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - } - }; - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - const IGPUShader::SSpecInfo fragSpec = { - .entryPoint = "main", - .shader = fragmentShader.get() - }; - - auto presentLayout = m_device->createPipelineLayout( - {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); - } - - // write descriptors - IGPUDescriptorSet::SDescriptorInfo infos[3]; - infos[0].desc = m_gpuTlas; - - infos[1].desc = m_hdrImageView; - if (!infos[1].desc) - return logFail("Failed to create image view"); - infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; - - infos[2].desc = m_hdrImageView; - infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[] = { - {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, - {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, - {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, - }; - m_device->updateDescriptorSets(std::span(writes), {}); - - // gui descriptor setup - { - using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; - { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ETC_REPEAT; - params.TextureWrapV = ETC_REPEAT; - params.TextureWrapW = ETC_REPEAT; - - m_ui.samplers.gui = m_device->createSampler(params); - m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); - } - - std::array, 69u> immutableSamplers; - for (auto& it : immutableSamplers) - it = smart_refctd_ptr(m_ui.samplers.scene); - - immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); - - nbl::ext::imgui::UI::SCreationParameters params; - - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; - params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; - params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); - params.renderpass = smart_refctd_ptr(renderpass); - params.streamingBuffer = nullptr; - params.subpassIx = 0u; - params.transfer = getGraphicsQueue(); - params.utilities = m_utils; - { - m_ui.manager = ext::imgui::UI::create(std::move(params)); - - // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - const auto& params = m_ui.manager->getCreationParameters(); - - IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; - descriptorPoolInfo.maxSets = 1u; - descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; - - m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); - assert(m_guiDescriptorSetPool); - - m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); - assert(m_ui.descriptorSet); - } - } - - m_ui.manager->registerListener( - [this]() -> void { - ImGuiIO& io = ImGui::GetIO(); - - m_camera.setProjectionMatrix([&]() - { - static matrix4SIMD projection; - - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( - core::radians(m_cameraSetting.fov), - io.DisplaySize.x / io.DisplaySize.y, - m_cameraSetting.zNear, - m_cameraSetting.zFar); - - return projection; - }()); - - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); - - ImGui::SameLine(); - - ImGui::Text("Camera"); - - ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); - Light m_oldLight = m_light; - int light_type = m_light.type; - ImGui::ListBox("LightType", &light_type, s_lightTypeNames, ELT_COUNT); - m_light.type = static_cast(light_type); - if (m_light.type == ELT_DIRECTIONAL) - { - ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); - } - else if (m_light.type == ELT_POINT) - { - ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); - } - else if (m_light.type == ELT_SPOT) - { - ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); - ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); - - float32_t dOuterCutoff = hlsl::degrees(acos(m_light.outerCutoff)); - if (ImGui::SliderFloat("Light Outer Cutoff", &dOuterCutoff, 0.0f, 45.0f)) - { - m_light.outerCutoff = cos(hlsl::radians(dOuterCutoff)); - } - } - ImGui::Checkbox("Use Indirect Command", &m_useIndirectCommand); - if (m_light != m_oldLight) - { - m_frameAccumulationCounter = 0; - } - - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - - ImGui::End(); - } - ); -#endif - // Set Camera - { - core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( - core::radians(60.0f), - WIN_W / WIN_H, - 0.01f, - 500.0f - ); - m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - } - - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - m_oracle.reportBeginFrameRecord(); - m_camera.mapKeysToWASD(); - - return true; - } - - bool updateGUIDescriptorSet() - { - // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout - static std::array descriptorInfo; - static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; - - descriptorInfo[ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); - - for (uint32_t i = 0; i < descriptorInfo.size(); ++i) - { - writes[i].dstSet = m_ui.descriptorSet.get(); - writes[i].binding = 0u; - writes[i].arrayElement = i; - writes[i].count = 1u; - } - writes[ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + ext::imgui::UI::FontAtlasTexId; - - return m_device->updateDescriptorSets(writes, {}); - } - - inline void workLoopBody() override - { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = - { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - - m_api->startCapture(); - -// update(); - - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[resourceIx].get(); - - if (!keepRunning()) - return; - - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->beginDebugMarker("Frame"); -#if 0 - const auto viewMatrix = m_camera.getViewMatrix(); - const auto projectionMatrix = m_camera.getProjectionMatrix(); - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); - - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); - - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; - imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, // previous frame read from framgent shader - .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }; - imageBarriers[0].image = m_hdrImage.get(); - imageBarriers[0].subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - imageBarriers[0].oldLayout = m_frameAccumulationCounter == 0 ? IImage::LAYOUT::UNDEFINED : IImage::LAYOUT::READ_ONLY_OPTIMAL; - imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); - } - - // Trace Rays Pass - { - SPushConstants pc; - pc.light = m_light; - pc.proceduralGeomInfoBuffer = m_proceduralGeomInfoBuffer->getDeviceAddress(); - pc.triangleGeomInfoBuffer = m_triangleGeomInfoBuffer->getDeviceAddress(); - pc.frameCounter = m_frameAccumulationCounter; - const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); - pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); - - cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); - cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); - cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); - cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); - if (m_useIndirectCommand) - { - cmdbuf->traceRaysIndirect( - SBufferBinding{ - .offset = 0, - .buffer = m_indirectBuffer, - }); - } - else - { - cmdbuf->traceRays( - m_shaderBindingTable.raygenGroupRange, - m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, - m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, - m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, - WIN_W, WIN_H, 1); - } - } - - // pipeline barrier - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; - imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }; - imageBarriers[0].image = m_hdrImage.get(); - imageBarriers[0].subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - imageBarriers[0].oldLayout = IImage::LAYOUT::GENERAL; - imageBarriers[0].newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); - } - - { - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - } - cmdbuf->setViewport(0u, 1u, &viewport); - - - VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; - cmdbuf->setScissor(defaultScisors); - - auto scRes = static_cast(m_surface->getSwapchainResources()); - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - const IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearColor, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; - - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDs.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); - - const auto uiParams = m_ui.manager->getCreationParameters(); - auto* uiPipeline = m_ui.manager->getPipeline(); - cmdbuf->bindGraphicsPipeline(uiPipeline); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); - m_ui.manager->render(cmdbuf, waitInfo); - - cmdbuf->endRenderPass(); - - } -#endif - cmdbuf->endDebugMarker(); - cmdbuf->end(); - - { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = - { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - } - }; - { - { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cmdbuf } - }; - - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; - -// updateGUIDescriptorSet(); - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; - } - } - - m_window->setCaption("[Nabla Engine] Ray Tracing Pipeline"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); - } - m_api->endCapture(); - m_frameAccumulationCounter++; - } -#if 0 - inline void update() - { - m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); - m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); - - static std::chrono::microseconds previousEventTimestamp{}; - - m_inputSystem->getDefaultMouse(&m_mouse); - m_inputSystem->getDefaultKeyboard(&m_keyboard); - - auto updatePresentationTimestamp = [&]() - { - m_currentImageAcquire = m_surface->acquireNextImage(); - - m_oracle.reportEndFrameRecord(); - const auto timestamp = m_oracle.getNextPresentationTimeStamp(); - m_oracle.reportBeginFrameRecord(); - - return timestamp; - }; - - const auto nextPresentationTimestamp = updatePresentationTimestamp(); - - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; - - m_camera.beginInputProcessing(nextPresentationTimestamp); - { - const auto& io = ImGui::GetIO(); - m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void - { - if (!io.WantCaptureMouse) - m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl - - for (const auto& e : events) // here capture - { - if (e.timeStamp < previousEventTimestamp) - continue; - - previousEventTimestamp = e.timeStamp; - capturedEvents.mouse.emplace_back(e); - - } - }, m_logger.get()); - - m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { - if (!io.WantCaptureKeyboard) - m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl - - for (const auto& e : events) // here capture - { - if (e.timeStamp < previousEventTimestamp) - continue; - - previousEventTimestamp = e.timeStamp; - capturedEvents.keyboard.emplace_back(e); - } - }, m_logger.get()); - - } - m_camera.endInputProcessing(nextPresentationTimestamp); - - const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); - const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); - - const ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = mousePosition, - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = mouseEvents, - .keyboardEvents = keyboardEvents - }; - - m_ui.manager->update(params); - } -#endif - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; - - return true; - } - - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } - - private: -#if 0 - bool createAccelerationStructuresFromGeometry(const IGeometryCreator* gc) - { - auto queue = getGraphicsQueue(); - // get geometries into ICPUBuffers - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!pool) - return logFail("Couldn't create Command Pool for geometry creation!"); - - const auto defaultMaterial = Material{ - .ambient = {0.2, 0.1, 0.1}, - .diffuse = {0.8, 0.3, 0.3}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - .alpha = 1.0f, - }; - - auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) - { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); - return transform; - }; - - core::matrix3x4SIMD planeTransform; - planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); - - // triangles geometries - const auto cpuObjects = std::array{ - ReferenceObjectCpu { - .meta = {.type = OT_RECTANGLE, .name = "Plane Mesh"}, - .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(10, 10)), - .material = defaultMaterial, - .transform = planeTransform, - }, - ReferenceObjectCpu { - .meta = {.type = OT_CUBE, .name = "Cube Mesh"}, - .data = gc->createCubeMesh(nbl::core::vector3df(1, 1, 1)), - .material = defaultMaterial, - .transform = getTranslationMatrix(0, 0.5f, 0), - }, - ReferenceObjectCpu { - .meta = {.type = OT_CUBE, .name = "Cube Mesh 2"}, - .data = gc->createCubeMesh(nbl::core::vector3df(1.5, 1.5, 1.5)), - .material = Material{ - .ambient = {0.1, 0.1, 0.2}, - .diffuse = {0.2, 0.2, 0.8}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - }, - .transform = getTranslationMatrix(-5.0f, 1.0f, 0), - }, - ReferenceObjectCpu { - .meta = {.type = OT_CUBE, .name = "Transparent Cube Mesh"}, - .data = gc->createCubeMesh(nbl::core::vector3df(1.5, 1.5, 1.5)), - .material = Material{ - .ambient = {0.1, 0.2, 0.1}, - .diffuse = {0.2, 0.8, 0.2}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - .alpha = 0.2, - }, - .transform = getTranslationMatrix(5.0f, 1.0f, 0), - }, - }; - - struct CPUTriBufferBindings - { - nbl::asset::SBufferBinding vertex, index; - }; - std::array cpuTriBuffers; - - for (uint32_t i = 0; i < cpuObjects.size(); i++) - { - const auto& cpuObject = cpuObjects[i]; - - auto vBuffer = smart_refctd_ptr(cpuObject.data.bindings[0].buffer); // no offset - auto vUsage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | - IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - vBuffer->addUsageFlags(vUsage); - vBuffer->setContentHash(vBuffer->computeContentHash()); - - auto iBuffer = smart_refctd_ptr(cpuObject.data.indexBuffer.buffer); // no offset - auto iUsage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | - IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - - if (cpuObject.data.indexType != EIT_UNKNOWN) - if (iBuffer) - { - iBuffer->addUsageFlags(iUsage); - iBuffer->setContentHash(iBuffer->computeContentHash()); - } - - cpuTriBuffers[i] = { - .vertex = {.offset = 0, .buffer = vBuffer}, - .index = {.offset = 0, .buffer = iBuffer}, - }; - - } - - // procedural geometries - using Aabb = IGPUBottomLevelAccelerationStructure::AABB_t; - - smart_refctd_ptr cpuProcBuffer; - { - ICPUBuffer::SCreationParams params; - params.size = NumberOfProceduralGeometries * sizeof(Aabb); - cpuProcBuffer = ICPUBuffer::create(std::move(params)); - } - - core::vector proceduralGeoms; - proceduralGeoms.reserve(NumberOfProceduralGeometries); - auto proceduralGeometries = reinterpret_cast(cpuProcBuffer->getPointer()); - for (int32_t i = 0; i < NumberOfProceduralGeometries; i++) - { - const auto middle_i = NumberOfProceduralGeometries / 2.0; - SProceduralGeomInfo sphere = { - .material = hlsl::_static_cast(Material{ - .ambient = {0.1, 0.05 * i, 0.1}, - .diffuse = {0.3, 0.2 * i, 0.3}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - }), - .center = float32_t3((i - middle_i) * 4.0, 2, 5.0), - .radius = 1, - }; - - proceduralGeoms.push_back(sphere); - const auto sphereMin = sphere.center - sphere.radius; - const auto sphereMax = sphere.center + sphere.radius; - proceduralGeometries[i] = { - vector3d(sphereMin.x, sphereMin.y, sphereMin.z), - vector3d(sphereMax.x, sphereMax.y, sphereMax.z) - }; - } - - { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = proceduralGeoms.size() * sizeof(SProceduralGeomInfo); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), proceduralGeoms.data()).move_into(m_proceduralGeomInfoBuffer); - } - - // get ICPUBuffers into ICPUBLAS - // TODO use one BLAS and multiple triangles/aabbs in one - const auto blasCount = std::size(cpuObjects) + 1; - const auto proceduralBlasIdx = std::size(cpuObjects); - - std::array, std::size(cpuObjects)+1u> cpuBlas; - for (uint32_t i = 0; i < blasCount; i++) - { - auto& blas = cpuBlas[i]; - blas = make_smart_refctd_ptr(); - - if (i == proceduralBlasIdx) - { - auto aabbs = make_refctd_dynamic_array>>(1u); - auto primitiveCounts = make_refctd_dynamic_array>(1u); - - auto& aabb = aabbs->front(); - auto& primCount = primitiveCounts->front(); - - primCount = NumberOfProceduralGeometries; - aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; - aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); - aabb.geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; // only allow opaque for now - - blas->setGeometries(std::move(aabbs), std::move(primitiveCounts)); - } - else - { - auto triangles = make_refctd_dynamic_array>>(1u); - auto primitiveCounts = make_refctd_dynamic_array>(1u); - - auto& tri = triangles->front(); - auto& primCount = primitiveCounts->front(); - const auto& geom = cpuObjects[i]; - const auto& cpuBuf = cpuTriBuffers[i]; - - const bool useIndex = geom.data.indexType != EIT_UNKNOWN; - const uint32_t vertexStride = geom.data.inputParams.bindings[0].stride; - const uint32_t numVertices = cpuBuf.vertex.buffer->getSize() / vertexStride; - - if (useIndex) - primCount = geom.data.indexCount / 3; - else - primCount = numVertices / 3; - - tri.vertexData[0] = cpuBuf.vertex; - tri.indexData = useIndex ? cpuBuf.index : cpuBuf.vertex; - tri.maxVertex = numVertices - 1; - tri.vertexStride = vertexStride; - tri.vertexFormat = EF_R32G32B32_SFLOAT; - tri.indexType = geom.data.indexType; - tri.geometryFlags = geom.material.isTransparent() ? - IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::NO_DUPLICATE_ANY_HIT_INVOCATION_BIT : - IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; - - blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); - } - - auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; - if (i == proceduralBlasIdx) - blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT; - - blas->setBuildFlags(blasFlags); - blas->setContentHash(blas->computeContentHash()); - } - - auto geomInfoBuffer = ICPUBuffer::create({ std::size(cpuObjects) * sizeof(STriangleGeomInfo) }); - STriangleGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); - - // get ICPUBLAS into ICPUTLAS - auto geomInstances = make_refctd_dynamic_array>(blasCount); - { - uint32_t i = 0; - for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) - { - const auto isProceduralInstance = i == proceduralBlasIdx; - ICPUTopLevelAccelerationStructure::StaticInstance inst; - inst.base.blas = cpuBlas[i]; - inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); - inst.base.instanceCustomIndex = i; - inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0;; - inst.base.mask = 0xFF; - inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; - - instance->instance = inst; - } - } - - auto cpuTlas = make_smart_refctd_ptr(); - cpuTlas->setInstances(std::move(geomInstances)); - cpuTlas->setBuildFlags(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); - - // convert with asset converter - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - struct MyInputs : CAssetConverter::SInputs - { - // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all - inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override - { - assert(memoryBacked); - return memoryBacked->getObjectType() != IDeviceMemoryBacked::EOT_BUFFER ? (~0u) : rebarMemoryTypes; - } - - uint32_t rebarMemoryTypes; - } inputs = {}; - inputs.logger = m_logger.get(); - inputs.rebarMemoryTypes = m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); - // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in - // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) - struct MyAllocator final : public IDeviceMemoryAllocator - { - ILogicalDevice* getDeviceForAllocations() const override { return device; } - - SAllocation allocate(const SAllocateInfo& info) override - { - auto retval = device->allocate(info); - // map what is mappable by default so ReBAR checks succeed - if (retval.isValid() && retval.memory->isMappable()) - retval.memory->map({ .offset = 0,.length = info.size }); - return retval; - } - - ILogicalDevice* device; - } myalloc; - myalloc.device = m_device.get(); - inputs.allocator = &myalloc; - - std::array tmpTlas; - std::array tmpBuffers; - { - tmpTlas[0] = cpuTlas.get(); - for (uint32_t i = 0; i < cpuObjects.size(); i++) - { - tmpBuffers[2 * i + 0] = cpuTriBuffers[i].vertex.buffer.get(); - tmpBuffers[2 * i + 1] = cpuTriBuffers[i].index.buffer.get(); - } - tmpBuffers[2 * proceduralBlasIdx] = cpuProcBuffer.get(); - - std::get>(inputs.assets) = tmpTlas; - std::get>(inputs.assets) = tmpBuffers; - } - - auto reservation = converter->reserve(inputs); - { - auto prepass = [&](const auto & references) -> bool - { - auto objects = reservation.getGPUObjects(); - uint32_t counter = {}; - for (auto& object : objects) - { - auto gpu = object.value; - auto* reference = references[counter]; - - if (reference) - { - if (!gpu) - { - m_logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); - return false; - } - } - counter++; - } - return true; - }; - - prepass.template operator() < ICPUTopLevelAccelerationStructure > (tmpTlas); - prepass.template operator() < ICPUBuffer > (tmpBuffers); - } - - constexpr auto CompBufferCount = 2; - std::array, CompBufferCount> compBufs = {}; - std::array compBufInfos = {}; - { - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs); - compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - for (auto i = 0; i < CompBufferCount; i++) - compBufInfos[i].cmdbuf = compBufs[i].get(); - } - auto compSema = m_device->createSemaphore(0u); - SIntendedSubmitInfo compute = {}; - compute.queue = queue; - compute.scratchCommandBuffers = compBufInfos; - compute.scratchSemaphore = { - .semaphore = compSema.get(), - .value = 0u, - .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT | PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT - }; - // convert - { - smart_refctd_ptr scratchAlloc; - { - constexpr auto MaxAlignment = 256; - constexpr auto MinAllocationSize = 1024; - const auto scratchSize = core::alignUp(reservation.getMaxASBuildScratchSize(false), MaxAlignment); - - - IGPUBuffer::SCreationParams creationParams = {}; - creationParams.size = scratchSize; - creationParams.usage = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - auto scratchBuffer = m_device->createBuffer(std::move(creationParams)); - - auto reqs = scratchBuffer->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); - - auto allocation = m_device->allocate(reqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - allocation.memory->map({ .offset = 0,.length = reqs.size }); - - scratchAlloc = make_smart_refctd_ptr( - SBufferRange{0ull, scratchSize, std::move(scratchBuffer)}, - core::allocator(), MaxAlignment, MinAllocationSize - ); - } - - struct MyParams final : CAssetConverter::SConvertParams - { - inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override - { - return finalUser; - } - inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override - { - return finalUser; - } - - uint8_t finalUser; - } params = {}; - params.utilities = m_utils.get(); - params.compute = &compute; - params.scratchForDeviceASBuild = scratchAlloc.get(); - params.finalUser = queue->getFamilyIndex(); - - auto future = reservation.convert(params); - if (future.copy() != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! - if (compute.getFutureScratchSemaphore().value>3) - m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); - - // assign gpu objects to output - auto&& tlases = reservation.getGPUObjects(); - m_gpuTlas = tlases[0].value; - auto&& buffers = reservation.getGPUObjects(); - for (uint32_t i = 0; i < cpuObjects.size(); i++) - { - auto& cpuObject = cpuObjects[i]; - - m_gpuTriangleGeometries.push_back(ReferenceObjectGpu{ - .meta = cpuObject.meta, - .bindings = { - .vertex = {.offset = 0, .buffer = buffers[2 * i + 0].value }, - .index = {.offset = 0, .buffer = buffers[2 * i + 1].value }, - }, - .vertexStride = cpuObject.data.inputParams.bindings[0].stride, - .indexType = cpuObject.data.indexType, - .indexCount = cpuObject.data.indexCount, - .material = hlsl::_static_cast(cpuObject.material), - .transform = cpuObject.transform, - }); - } - m_proceduralAabbBuffer = buffers[2 * proceduralBlasIdx].value; - - for (uint32_t i = 0; i < m_gpuTriangleGeometries.size(); i++) - { - const auto& gpuObject = m_gpuTriangleGeometries[i]; - const uint64_t vertexBufferAddress = gpuObject.bindings.vertex.buffer->getDeviceAddress(); - geomInfos[i] = { - .material = gpuObject.material, - .vertexBufferAddress = vertexBufferAddress, - .indexBufferAddress = gpuObject.useIndex() ? gpuObject.bindings.index.buffer->getDeviceAddress() : vertexBufferAddress, - .vertexStride = gpuObject.vertexStride, - .objType = gpuObject.meta.type, - .indexType = gpuObject.indexType, - .smoothNormals = s_smoothNormals[gpuObject.meta.type], - }; - } - } - - { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = geomInfoBuffer->getSize(); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), geomInfos).move_into(m_triangleGeomInfoBuffer); - } - - return true; - } -#endif - smart_refctd_ptr m_converter; - - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; - smart_refctd_ptr m_semaphore; - uint64_t m_realFrameIx = 0; -uint32_t m_frameAccumulationCounter = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader m_mouse; - InputSystem::ChannelReader m_keyboard; - - struct CameraSetting - { - float fov = 60.f; - float zNear = 0.1f; - float zFar = 10000.f; - float moveSpeed = 1.f; - float rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - - } m_cameraSetting; - Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - - video::CDumbPresentationOracle m_oracle; - -#if 0 - struct C_UI - { - nbl::core::smart_refctd_ptr manager; - - struct - { - core::smart_refctd_ptr gui, scene; - } samplers; - - core::smart_refctd_ptr descriptorSet; - } m_ui; - core::smart_refctd_ptr m_guiDescriptorSetPool; - - core::vector m_gpuTriangleGeometries; - core::vector m_gpuIntersectionSpheres; - uint32_t m_intersectionHitGroupIdx; - - smart_refctd_ptr m_gpuTlas; - smart_refctd_ptr m_instanceBuffer; - - smart_refctd_ptr m_triangleGeomInfoBuffer; - smart_refctd_ptr m_proceduralGeomInfoBuffer; - smart_refctd_ptr m_proceduralAabbBuffer; - smart_refctd_ptr m_indirectBuffer; - - smart_refctd_ptr m_hdrImage; - smart_refctd_ptr m_hdrImageView; - - smart_refctd_ptr m_rayTracingDsPool; - smart_refctd_ptr m_rayTracingDs; - smart_refctd_ptr m_rayTracingPipeline; - uint64_t m_rayTracingStackSize; - ShaderBindingTable m_shaderBindingTable; - - smart_refctd_ptr m_presentDs; - smart_refctd_ptr m_presentDsPool; - smart_refctd_ptr m_presentPipeline; - -#endif -}; -NBL_MAIN_FUNC(MeshLoadersApp) diff --git a/29_MeshLoaders/pipeline.groovy b/29_MeshLoaders/pipeline.groovy deleted file mode 100644 index 9a89cc786..000000000 --- a/29_MeshLoaders/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CPLYSTLDemoBuilder extends IBuilder -{ - public CPLYSTLDemoBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CPLYSTLDemoBuilder(_agent, _info) -} - -return this \ No newline at end of file From 4f1fabdb786b87e4609649a36059c33cd54ae843 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 9 Jul 2025 15:19:03 +0700 Subject: [PATCH 018/219] add debug aabb draws around mesh --- 12_MeshLoaders/CMakeLists.txt | 8 ++++- 12_MeshLoaders/main.cpp | 58 +++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/12_MeshLoaders/CMakeLists.txt b/12_MeshLoaders/CMakeLists.txt index dee195066..3743e57cb 100644 --- a/12_MeshLoaders/CMakeLists.txt +++ b/12_MeshLoaders/CMakeLists.txt @@ -15,4 +15,10 @@ endif() # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet -# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) \ No newline at end of file +# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) + +if (NBL_BUILD_DEBUG_DRAW) + add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) +endif() diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 3a4d8b13b..4d57eb18e 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -9,6 +9,10 @@ #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" #endif +#ifdef NBL_BUILD_DEBUG_DRAW +#include "nbl/ext/DebugDraw/CDrawAABB.h" +#endif + class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourcesApplication { using device_base_t = MonoWindowApplication; @@ -48,6 +52,18 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc if (!m_renderer) return logFail("Failed to create renderer!"); +#ifdef NBL_BUILD_DEBUG_DRAW + { + auto* renderpass = scRes->getRenderpass(); + ext::debugdraw::DrawAABB::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; + drawAABB = ext::debugdraw::DrawAABB::create(std::move(params)); + } +#endif + // if (!reloadModel()) return false; @@ -109,8 +125,12 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { for (const auto& event : events) - if (event.keyCode==E_KEY_CODE::EKC_R && event.action==SKeyboardEvent::ECA_RELEASED) - reload = true; + { + if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + if (event.keyCode == E_KEY_CODE::EKC_B && event.action == SKeyboardEvent::ECA_RELEASED) + m_drawBBs = !m_drawBBs; + } camera.keyboardProcess(events); }, m_logger.get() @@ -130,6 +150,24 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc } m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); } +#ifdef NBL_BUILD_DEBUG_DRAW + if (m_drawBBs) + { + core::matrix4SIMD modelViewProjectionMatrix; + { + const auto viewMatrix = camera.getViewMatrix(); + const auto projectionMatrix = camera.getProjectionMatrix(); + const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); + + core::matrix3x4SIMD modelMatrix; + modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); + modelMatrix.setRotation(quaternion(0, 0, 0)); + modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + } + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + drawAABB->render(cb, drawFinished, modelViewProjectionMatrix.pointer()); + } +#endif cb->endRenderPass(); } cb->end(); @@ -349,7 +387,10 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc return false; } } - + +#ifdef NBL_BUILD_DEBUG_DRAW + drawAABB->clearAABBs(); +#endif auto tmp = hlsl::float32_t4x3( hlsl::float32_t3(1,0,0), hlsl::float32_t3(0,1,0), @@ -367,6 +408,12 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc const auto transformed = hlsl::shapes::util::transform(promotedWorld,promoted); printAABB(transformed,"Transformed"); bound = hlsl::shapes::util::union_(transformed,bound); + +#ifdef NBL_BUILD_DEBUG_DRAW + const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); + const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); + drawAABB->addOBB(tmpAabb, tmpWorld, hlsl::float32_t4{ 1,1,1,1 }); +#endif } printAABB(bound,"Total"); if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) @@ -416,6 +463,11 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // mutables std::string m_modelPath; + + bool m_drawBBs = true; +#ifdef NBL_BUILD_DEBUG_DRAW + smart_refctd_ptr drawAABB; +#endif }; NBL_MAIN_FUNC(MeshLoadersApp) \ No newline at end of file From bfd286e04ae2cc468c241e32605cd0ad0dd663ba Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 18 Aug 2025 16:52:07 +0700 Subject: [PATCH 019/219] refactor debug_draw namespace --- 12_MeshLoaders/main.cpp | 8 ++++---- 34_DebugDraw/main.cpp | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 4d57eb18e..c10be19ec 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -55,12 +55,12 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW { auto* renderpass = scRes->getRenderpass(); - ext::debugdraw::DrawAABB::SCreationParameters params = {}; + ext::debug_draw::DrawAABB::SCreationParameters params = {}; params.assetManager = m_assetMgr; - params.pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; - drawAABB = ext::debugdraw::DrawAABB::create(std::move(params)); + drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); } #endif @@ -466,7 +466,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc bool m_drawBBs = true; #ifdef NBL_BUILD_DEBUG_DRAW - smart_refctd_ptr drawAABB; + smart_refctd_ptr drawAABB; #endif }; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 937d699b8..739a6a7d8 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -122,15 +122,15 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti .size = sizeof(SSimplePushConstants) }; { - ext::debugdraw::DrawAABB::SCreationParameters params = {}; + ext::debug_draw::DrawAABB::SCreationParameters params = {}; params.assetManager = m_assetMgr; - params.pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; - drawAABB = ext::debugdraw::DrawAABB::create(std::move(params)); + drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); } { - auto vertices = ext::debugdraw::DrawAABB::getVerticesFromAABB(testAABB); + auto vertices = ext::debug_draw::DrawAABB::getVerticesFromAABB(testAABB); IGPUBuffer::SCreationParams params; params.size = sizeof(float32_t3) * vertices.size(); params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; @@ -166,11 +166,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - const auto pipelineLayout = ext::debugdraw::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); + const auto pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; - m_pipeline = ext::debugdraw::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); + m_pipeline = ext::debug_draw::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); if (!m_pipeline) return logFail("Graphics pipeline creation failed"); } @@ -453,7 +453,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - smart_refctd_ptr drawAABB; + smart_refctd_ptr drawAABB; core::aabbox3d testAABB = core::aabbox3d({ -5, -5, -5 }, { 10, 10, -10 }); core::aabbox3d testAABB2 = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); smart_refctd_ptr verticesBuffer; From 8518c2b342217548d0c6797b26b5c5e20bc4df60 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Aug 2025 11:30:51 +0700 Subject: [PATCH 020/219] refactor remove usage of legacy matrices --- 12_MeshLoaders/main.cpp | 24 +++++++---------- 34_DebugDraw/app_resources/simple_common.hlsl | 4 --- 34_DebugDraw/main.cpp | 26 +++++++------------ 3 files changed, 19 insertions(+), 35 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index c10be19ec..e72f71cae 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -140,9 +140,9 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc reloadModel(); } // draw scene + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; // TODO: get rid of legacy matrices { memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); @@ -153,19 +153,8 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW if (m_drawBBs) { - core::matrix4SIMD modelViewProjectionMatrix; - { - const auto viewMatrix = camera.getViewMatrix(); - const auto projectionMatrix = camera.getProjectionMatrix(); - const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); - modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(cb, drawFinished, modelViewProjectionMatrix.pointer()); + drawAABB->render(cb, drawFinished, viewProjMatrix); } #endif cb->endRenderPass(); @@ -412,7 +401,12 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); - drawAABB->addOBB(tmpAabb, tmpWorld, hlsl::float32_t4{ 1,1,1,1 }); + float32_t4x4 tmpWorld4x4; + tmpWorld4x4[0] = tmpWorld[0]; + tmpWorld4x4[1] = tmpWorld[1]; + tmpWorld4x4[2] = tmpWorld[2]; + tmpWorld4x4[3] = float32_t4(0, 0, 0, 1); + drawAABB->addOBB(tmpAabb, tmpWorld4x4, hlsl::float32_t4{ 1,1,1,1 }); #endif } printAABB(bound,"Total"); diff --git a/34_DebugDraw/app_resources/simple_common.hlsl b/34_DebugDraw/app_resources/simple_common.hlsl index d74d64a8d..c5f658f4e 100644 --- a/34_DebugDraw/app_resources/simple_common.hlsl +++ b/34_DebugDraw/app_resources/simple_common.hlsl @@ -5,11 +5,7 @@ struct SSimplePushConstants { -#ifdef __HLSL_VERSION float32_t4x4 MVP; -#else - float MVP[4*4]; -#endif uint64_t pVertices; }; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 739a6a7d8..970f579a8 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -237,17 +237,8 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti camera.endInputProcessing(nextPresentationTimestamp); } - core::matrix4SIMD modelViewProjectionMatrix; - { - const auto viewMatrix = camera.getViewMatrix(); - const auto projectionMatrix = camera.getProjectionMatrix(); - const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); - modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - } + float32_t4x4 viewProjectionMatrix; + memcpy(&viewProjectionMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjectionMatrix)); // TODO: get rid of legacy transform auto* queue = getGraphicsQueue(); @@ -287,7 +278,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti { SSimplePushConstants pc; - memcpy(pc.MVP, modelViewProjectionMatrix.pointer(), sizeof(pc.MVP)); + pc.MVP = viewProjectionMatrix; pc.pVertices = verticesBuffer->getDeviceAddress(); cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); @@ -297,6 +288,9 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti } { + using aabb_t = hlsl::shapes::AABB<3, float>; + using point_t = aabb_t::point_t; + std::mt19937 gen(42); std::uniform_real_distribution translate_dis(-50.f, 50.f); std::uniform_real_distribution scale_dis(1.f, 10.f); @@ -305,14 +299,14 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti drawAABB->clearAABBs(); for (auto i = 0u; i < aabbCount; i++) { - core::vector3d pmin = { translate_dis(gen), translate_dis(gen), translate_dis(gen) }; - core::vector3d pmax = pmin + core::vector3d{ scale_dis(gen), scale_dis(gen), scale_dis(gen) }; - core::aabbox3d aabb = { pmin, pmax }; + point_t pmin = { translate_dis(gen), translate_dis(gen), translate_dis(gen) }; + point_t pmax = pmin + point_t{ scale_dis(gen), scale_dis(gen), scale_dis(gen) }; + aabb_t aabb = { pmin, pmax }; drawAABB->addAABB(aabb, { color_dis(gen),color_dis(gen),color_dis(gen),1}); } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(cmdbuf, drawFinished, modelViewProjectionMatrix.pointer()); + drawAABB->render(cmdbuf, drawFinished, viewProjectionMatrix); } cmdbuf->endRenderPass(); From 323c782226a402e0e4d21e902029a0602f616cff Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 11:42:50 +0700 Subject: [PATCH 021/219] refactor examples with latest DrawAabb changes --- 12_MeshLoaders/main.cpp | 38 +++++---- 34_DebugDraw/CMakeLists.txt | 18 ----- .../app_resources/simple.fragment.hlsl | 11 --- 34_DebugDraw/app_resources/simple.vertex.hlsl | 21 ----- 34_DebugDraw/app_resources/simple_common.hlsl | 20 ----- 34_DebugDraw/main.cpp | 81 ++++--------------- 6 files changed, 41 insertions(+), 148 deletions(-) delete mode 100644 34_DebugDraw/app_resources/simple.fragment.hlsl delete mode 100644 34_DebugDraw/app_resources/simple.vertex.hlsl delete mode 100644 34_DebugDraw/app_resources/simple_common.hlsl diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index e72f71cae..65ae17123 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -54,13 +54,21 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW { + SPushConstantRange dummyPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(ext::debug_draw::SSinglePushConstants) + }; + auto* renderpass = scRes->getRenderpass(); ext::debug_draw::DrawAABB::SCreationParameters params = {}; params.assetManager = m_assetMgr; - params.pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.transfer = getTransferUpQueue(); + params.singlePipelineLayout = ext::debug_draw::DrawAABB::createPipelineLayoutFromPCRange(m_device.get(), dummyPcRange); // not used + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; - drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); + m_drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); } #endif @@ -154,7 +162,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc if (m_drawBBs) { const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(cb, drawFinished, viewProjMatrix); + m_drawAABB->render(cb, drawFinished, m_aabbInstances, viewProjMatrix); } #endif cb->endRenderPass(); @@ -377,9 +385,6 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc } } -#ifdef NBL_BUILD_DEBUG_DRAW - drawAABB->clearAABBs(); -#endif auto tmp = hlsl::float32_t4x3( hlsl::float32_t3(1,0,0), hlsl::float32_t3(0,1,0), @@ -388,8 +393,10 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc ); core::vector worldTforms; const auto& converted = reservation.getGPUObjects(); - for (const auto& geom : converted) + m_aabbInstances.resize(converted.size()); + for (uint32_t i = 0; i < converted.size(); i++) { + const auto& geom = converted[i]; const auto promoted = geom.value->getAABB(); printAABB(promoted,"Geometry"); tmp[3].x += promoted.getExtent().x; @@ -399,14 +406,16 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc bound = hlsl::shapes::util::union_(transformed,bound); #ifdef NBL_BUILD_DEBUG_DRAW + auto& inst = m_aabbInstances[i]; const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); + hlsl::float32_t4x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); - float32_t4x4 tmpWorld4x4; - tmpWorld4x4[0] = tmpWorld[0]; - tmpWorld4x4[1] = tmpWorld[1]; - tmpWorld4x4[2] = tmpWorld[2]; - tmpWorld4x4[3] = float32_t4(0, 0, 0, 1); - drawAABB->addOBB(tmpAabb, tmpWorld4x4, hlsl::float32_t4{ 1,1,1,1 }); + inst.color = { 1,1,1,1 }; + inst.transform[0] = tmpWorld[0]; + inst.transform[1] = tmpWorld[1]; + inst.transform[2] = tmpWorld[2]; + inst.transform[3] = float32_t4(0, 0, 0, 1); + inst.transform = hlsl::mul(inst.transform, instanceTransform); #endif } printAABB(bound,"Total"); @@ -460,7 +469,8 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc bool m_drawBBs = true; #ifdef NBL_BUILD_DEBUG_DRAW - smart_refctd_ptr drawAABB; + smart_refctd_ptr m_drawAABB; + std::vector m_aabbInstances; #endif }; diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt index 557280430..89d0bc8e3 100644 --- a/34_DebugDraw/CMakeLists.txt +++ b/34_DebugDraw/CMakeLists.txt @@ -8,22 +8,4 @@ if(NBL_BUILD_DEBUG_DRAW) add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) target_include_directories(${EXECUTABLE_NAME} PUBLIC $) - - if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) - endif() endif() diff --git a/34_DebugDraw/app_resources/simple.fragment.hlsl b/34_DebugDraw/app_resources/simple.fragment.hlsl deleted file mode 100644 index ccd8bdffd..000000000 --- a/34_DebugDraw/app_resources/simple.fragment.hlsl +++ /dev/null @@ -1,11 +0,0 @@ -#pragma shader_stage(fragment) - -#include "simple_common.hlsl" - -[shader("pixel")] -float32_t4 main(PSInput input) : SV_TARGET -{ - float32_t4 outColor = input.color; - - return outColor; -} \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple.vertex.hlsl b/34_DebugDraw/app_resources/simple.vertex.hlsl deleted file mode 100644 index 9e362ee75..000000000 --- a/34_DebugDraw/app_resources/simple.vertex.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "simple_common.hlsl" - -using namespace nbl::hlsl; - -[[vk::push_constant]] SSimplePushConstants pc; - -[shader("vertex")] -PSInput main(uint vertexID : SV_VertexID) -{ - PSInput output; - - float32_t3 vertex = (bda::__ptr::create(pc.pVertices) + vertexID).deref_restrict().load(); - - output.position = mul(pc.MVP, float32_t4(vertex, 1)); - output.color = float32_t4(1, 0, 0, 1); - - return output; -} \ No newline at end of file diff --git a/34_DebugDraw/app_resources/simple_common.hlsl b/34_DebugDraw/app_resources/simple_common.hlsl deleted file mode 100644 index c5f658f4e..000000000 --- a/34_DebugDraw/app_resources/simple_common.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _DRAW_AABB_SIMPLE_COMMON_HLSL -#define _DRAW_AABB_SIMPLE_COMMON_HLSL - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -struct SSimplePushConstants -{ - float32_t4x4 MVP; - uint64_t pVertices; -}; - -#ifdef __HLSL_VERSION -struct PSInput -{ - float32_t4 position : SV_Position; - float32_t4 color : TEXCOORD0; -}; -#endif - -#endif diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 970f579a8..57c3e3051 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" -#include "app_resources/simple_common.hlsl" class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -119,60 +118,17 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti SPushConstantRange simplePcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, - .size = sizeof(SSimplePushConstants) + .size = sizeof(ext::debug_draw::SSinglePushConstants) }; { ext::debug_draw::DrawAABB::SCreationParameters params = {}; + params.transfer = getTransferUpQueue(); params.assetManager = m_assetMgr; - params.pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.singlePipelineLayout = ext::debug_draw::DrawAABB::createPipelineLayoutFromPCRange(m_device.get(), simplePcRange); + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); - } - { - auto vertices = ext::debug_draw::DrawAABB::getVerticesFromAABB(testAABB); - IGPUBuffer::SCreationParams params; - params.size = sizeof(float32_t3) * vertices.size(); - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - - m_utils->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = getTransferUpQueue() }, - std::move(params), - vertices.data() - ).move_into(verticesBuffer); - } - - auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = m_logger.get(); - lparams.workingDirectory = localInputCWD; - auto bundle = m_assetMgr->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); - exit(-1); - } - - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; - - return m_device->compileShader({ shaderSrc.get() }); - }; - { - auto vertexShader = compileShader("app_resources/simple.vertex.hlsl"); - auto fragmentShader = compileShader("app_resources/simple.fragment.hlsl"); - - const auto pipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get(), simplePcRange); - - IGPUGraphicsPipeline::SShaderSpecInfo vs = { .shader = vertexShader.get(), .entryPoint = "main" }; - IGPUGraphicsPipeline::SShaderSpecInfo fs = { .shader = fragmentShader.get(), .entryPoint = "main" }; - m_pipeline = ext::debug_draw::DrawAABB::createDefaultPipeline(m_device.get(), pipelineLayout.get(), renderpass, vs, fs); - if (!m_pipeline) - return logFail("Graphics pipeline creation failed"); } m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); @@ -276,16 +232,9 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti .renderArea = currentRenderArea }; - { - SSimplePushConstants pc; - pc.MVP = viewProjectionMatrix; - pc.pVertices = verticesBuffer->getDeviceAddress(); - - cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - cmdbuf->bindGraphicsPipeline(m_pipeline.get()); - cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSimplePushConstants), &pc); - drawAABB->renderSingle(cmdbuf); - } + cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + drawAABB->renderSingle(cmdbuf, testAABB, float32_t4{1, 0, 0, 1}, viewProjectionMatrix); { using aabb_t = hlsl::shapes::AABB<3, float>; @@ -296,17 +245,23 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti std::uniform_real_distribution scale_dis(1.f, 10.f); std::uniform_real_distribution color_dis(0.f, 1.f); const uint32_t aabbCount = 200u; - drawAABB->clearAABBs(); + + std::array aabbInstances; for (auto i = 0u; i < aabbCount; i++) { point_t pmin = { translate_dis(gen), translate_dis(gen), translate_dis(gen) }; point_t pmax = pmin + point_t{ scale_dis(gen), scale_dis(gen), scale_dis(gen) }; aabb_t aabb = { pmin, pmax }; - drawAABB->addAABB(aabb, { color_dis(gen),color_dis(gen),color_dis(gen),1}); + + auto& instance = aabbInstances[i]; + instance.color = { color_dis(gen),color_dis(gen),color_dis(gen),1 }; + + hlsl::float32_t4x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(aabb); + instance.transform = instanceTransform; } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(cmdbuf, drawFinished, viewProjectionMatrix); + drawAABB->render(cmdbuf, drawFinished, aabbInstances, viewProjectionMatrix); } cmdbuf->endRenderPass(); @@ -448,9 +403,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; smart_refctd_ptr drawAABB; - core::aabbox3d testAABB = core::aabbox3d({ -5, -5, -5 }, { 10, 10, -10 }); - core::aabbox3d testAABB2 = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - smart_refctd_ptr verticesBuffer; + hlsl::shapes::AABB<3, float> testAABB = hlsl::shapes::AABB<3, float>{ { -5, -5, -5 }, { 10, 10, -10 } }; using streaming_buffer_t = video::StreamingTransientDataBufferST>; smart_refctd_ptr streamingBuffer; From f75dc215f94f7a30c5083433f78e8937e154da44 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 16:13:00 +0700 Subject: [PATCH 022/219] use draw modes --- 12_MeshLoaders/main.cpp | 8 +------- 34_DebugDraw/main.cpp | 1 + 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 65ae17123..5e9a8114d 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -54,17 +54,11 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW { - SPushConstantRange dummyPcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, - .size = sizeof(ext::debug_draw::SSinglePushConstants) - }; - auto* renderpass = scRes->getRenderpass(); ext::debug_draw::DrawAABB::SCreationParameters params = {}; params.assetManager = m_assetMgr; params.transfer = getTransferUpQueue(); - params.singlePipelineLayout = ext::debug_draw::DrawAABB::createPipelineLayoutFromPCRange(m_device.get(), dummyPcRange); // not used + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BATCH; params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); params.utilities = m_utils; diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 57c3e3051..3897bcc14 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -124,6 +124,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti ext::debug_draw::DrawAABB::SCreationParameters params = {}; params.transfer = getTransferUpQueue(); params.assetManager = m_assetMgr; + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BOTH; params.singlePipelineLayout = ext::debug_draw::DrawAABB::createPipelineLayoutFromPCRange(m_device.get(), simplePcRange); params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); params.renderpass = smart_refctd_ptr(renderpass); From fdd9b0c0ba39a599909b85ee92007336096f2fc4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 6 Sep 2025 21:14:01 +0700 Subject: [PATCH 023/219] Add Draw OBB to example 12 --- 12_MeshLoaders/main.cpp | 58 ++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 5e9a8114d..10e85251c 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -18,6 +18,14 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc using device_base_t = MonoWindowApplication; using asset_base_t = BuiltinResourcesApplication; + enum DrawBoundingBoxMode + { + DBBM_NONE, + DBBM_AABB, + DBBM_OBB, + DBBM_COUNT + }; + public: inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), @@ -131,7 +139,9 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) reload = true; if (event.keyCode == E_KEY_CODE::EKC_B && event.action == SKeyboardEvent::ECA_RELEASED) - m_drawBBs = !m_drawBBs; + { + m_drawBBMode = DrawBoundingBoxMode((m_drawBBMode + 1) % DBBM_COUNT); + } } camera.keyboardProcess(events); }, @@ -153,10 +163,10 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); } #ifdef NBL_BUILD_DEBUG_DRAW - if (m_drawBBs) + if (m_drawBBMode != DBBM_NONE) { const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - m_drawAABB->render(cb, drawFinished, m_aabbInstances, viewProjMatrix); + m_drawAABB->render(cb, drawFinished, m_drawBBMode == DBBM_OBB ? m_obbInstances : m_aabbInstances, viewProjMatrix); } #endif cb->endRenderPass(); @@ -388,6 +398,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc core::vector worldTforms; const auto& converted = reservation.getGPUObjects(); m_aabbInstances.resize(converted.size()); + m_obbInstances.resize(converted.size()); for (uint32_t i = 0; i < converted.size(); i++) { const auto& geom = converted[i]; @@ -398,20 +409,39 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc const auto transformed = hlsl::shapes::util::transform(promotedWorld,promoted); printAABB(transformed,"Transformed"); bound = hlsl::shapes::util::union_(transformed,bound); + const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); + const auto world4x4 = float32_t4x4{ + tmpWorld[0], + tmpWorld[1], + tmpWorld[2], + float32_t4(0, 0, 0, 1) + }; #ifdef NBL_BUILD_DEBUG_DRAW - auto& inst = m_aabbInstances[i]; + + auto& aabbInst = m_aabbInstances[i]; const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); - hlsl::float32_t4x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); - const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); - inst.color = { 1,1,1,1 }; - inst.transform[0] = tmpWorld[0]; - inst.transform[1] = tmpWorld[1]; - inst.transform[2] = tmpWorld[2]; - inst.transform[3] = float32_t4(0, 0, 0, 1); - inst.transform = hlsl::mul(inst.transform, instanceTransform); + hlsl::float32_t4x4 aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); + aabbInst.color = { 1,1,1,1 }; + aabbInst.transform = hlsl::mul(world4x4, aabbTransform); + + auto& obbInst = m_obbInstances[i]; + const auto& cpuGeom = geometries[i].get(); + const auto obb = CPolygonGeometryManipulator::calculateOBB({ + .fetch = [geo = cpuGeom, &world4x4](size_t vertex_i) { + hlsl::float32_t3 pt; + geo->getPositionView().decodeElement(vertex_i, pt); + return pt; + }, + .size = cpuGeom->getPositionView().getElementCount(), + }); + obbInst.color = { 0, 0, 1, 1 }; + const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(obb); + obbInst.transform = hlsl::mul(world4x4, obbTransform); + #endif } + printAABB(bound,"Total"); if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) return false; @@ -461,10 +491,12 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc // mutables std::string m_modelPath; - bool m_drawBBs = true; + DrawBoundingBoxMode m_drawBBMode; #ifdef NBL_BUILD_DEBUG_DRAW smart_refctd_ptr m_drawAABB; std::vector m_aabbInstances; + std::vector m_obbInstances; + #endif }; From 3b1016e58ab5d9cb53bd9c37707dad356d30173d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Sep 2025 10:50:24 +0700 Subject: [PATCH 024/219] don't EXCLUDE_ALL new example --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a29b34314..21b26934b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,7 @@ if(NBL_BUILD_EXAMPLES) # Showcase compute pathtracing add_subdirectory(30_ComputeShaderPathTracer) - add_subdirectory(34_DebugDraw EXCLUDE_FROM_ALL) + add_subdirectory(34_DebugDraw) add_subdirectory(38_EXRSplit) # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) From d35ab8724355f2fe230129094acaeeacb17b6471 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 16 Sep 2025 11:24:42 +0700 Subject: [PATCH 025/219] Add example 72 --- 72_GeometryInspector/CMakeLists.txt | 51 ++ 72_GeometryInspector/include/common.hpp | 22 + 72_GeometryInspector/include/transform.hpp | 162 ++++ 72_GeometryInspector/main.cpp | 777 ++++++++++++++++++ CMakeLists.txt | 1 + .../geometry/CSimpleDebugRenderer.hpp | 3 + 6 files changed, 1016 insertions(+) create mode 100644 72_GeometryInspector/CMakeLists.txt create mode 100644 72_GeometryInspector/include/common.hpp create mode 100644 72_GeometryInspector/include/transform.hpp create mode 100644 72_GeometryInspector/main.cpp diff --git a/72_GeometryInspector/CMakeLists.txt b/72_GeometryInspector/CMakeLists.txt new file mode 100644 index 000000000..697399e91 --- /dev/null +++ b/72_GeometryInspector/CMakeLists.txt @@ -0,0 +1,51 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI AND NBL_BUILD_DEBUG_DRAW) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + if (NBL_BUILD_MITSUBA_LOADER) + list(APPEND NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" + ) + list(APPEND NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" + ) + endif() + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() + + add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + +endif() + diff --git a/72_GeometryInspector/include/common.hpp b/72_GeometryInspector/include/common.hpp new file mode 100644 index 000000000..cc06db2c1 --- /dev/null +++ b/72_GeometryInspector/include/common.hpp @@ -0,0 +1,22 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; + +#include "transform.hpp" +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/72_GeometryInspector/include/transform.hpp b/72_GeometryInspector/include/transform.hpp new file mode 100644 index 000000000..6ac299c4b --- /dev/null +++ b/72_GeometryInspector/include/transform.hpp @@ -0,0 +1,162 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + + +struct TransformRequestParams +{ + float camDistance = 8.f; + uint8_t sceneTexDescIx = ~0; + bool useWindow = false, editTransformDecomposition = false, enableViewManipulate = false; +}; + +nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ +// TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + nbl::hlsl::uint16_t2 retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + if(params.enableViewManipulate) + ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/72_GeometryInspector/main.cpp b/72_GeometryInspector/main.cpp new file mode 100644 index 000000000..5fe0421da --- /dev/null +++ b/72_GeometryInspector/main.cpp @@ -0,0 +1,777 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "common.hpp" + +#include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" + +#ifdef NBL_BUILD_MITSUBA_LOADER +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#endif + +#include "nbl/ext/DebugDraw/CDrawAABB.h" +#include "nbl/ext/ImGui/ImGui.h" + +class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + + enum DrawBoundingBoxMode + { + DBBM_NONE, + DBBM_AABB, + DBBM_OBB, + DBBM_COUNT + }; + + public: + static float32_t4x4 intofloat32_t4x4(const matrix4SIMD& mat) + { + return float32_t4x4{ + mat.rows[0].x, mat.rows[0].y, mat.rows[0].z, mat.rows[0].w, + mat.rows[1].x, mat.rows[1].y, mat.rows[1].z, mat.rows[1].w, + mat.rows[2].x, mat.rows[2].y, mat.rows[2].z, mat.rows[2].w, + mat.rows[3].x, mat.rows[3].y, mat.rows[3].z, mat.rows[3].w, + }; + } + + static float32_t4x4 intofloat32_t4x4(const matrix3x4SIMD& mat) + { + return float32_t4x4{ + mat.rows[0].x, mat.rows[0].y, mat.rows[0].z, mat.rows[0].w, + mat.rows[1].x, mat.rows[1].y, mat.rows[1].z, mat.rows[1].w, + mat.rows[2].x, mat.rows[2].y, mat.rows[2].z, mat.rows[2].w, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + } + + static float32_t4x4 intofloat32_t4x4(const float32_t3x4& mat) + { + return float32_t4x4{ + mat[0], + mat[1], + mat[2], + float32_t4(0.0f, 0.0f, 0.0f, 1.0f), + }; + } + inline GeometryInspectorApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + #ifdef NBL_BUILD_MITSUBA_LOADER + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + #endif + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i=0u; icreateCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1})) + return logFail("Couldn't create Command Buffer!"); + } + + + auto scRes = static_cast(m_surface->getSwapchainResources()); + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),scRes->getRenderpass(),0,{}); + if (!m_renderer) + return logFail("Failed to create renderer!"); + + auto* renderpass = scRes->getRenderpass(); + + { + ext::debug_draw::DrawAABB::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.transfer = getTransferUpQueue(); + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BATCH; + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; + m_bbRenderer = ext::debug_draw::DrawAABB::create(std::move(params)); + } + + // gui descriptor setup + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ETC_REPEAT; + params.TextureWrapV = ETC_REPEAT; + params.TextureWrapW = ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getGraphicsQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(m_cameraSetting.fov), + io.DisplaySize.x / io.DisplaySize.y, + m_cameraSetting.zNear, + m_cameraSetting.zFar); + + return projection; + }()); + + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); + + + ImGui::Text("Inspector"); + ImGui::ListBox("Selected polygon", &m_selectedMesh, + [](void* userData, int index) -> const char* { + auto* meshInstances = reinterpret_cast(userData); + return meshInstances[index].name.data(); + }, + m_meshInstances.data(), + m_meshInstances.size()); + + ImGui::Checkbox("Draw AABB", &m_shouldDrawAABB); + ImGui::Checkbox("Draw OBB", &m_shouldDrawOBB); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + static struct + { + hlsl::float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + auto& selectedInstance = m_renderer->getInstance(m_selectedMesh); + + imguizmoM16InOut.view = hlsl::transpose(intofloat32_t4x4(m_camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(intofloat32_t4x4(m_camera.getProjectionMatrix())); + imguizmoM16InOut.projection[1][1] *= -1.f; // Flip y coordinates. https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + imguizmoM16InOut.model = hlsl::transpose(intofloat32_t4x4(selectedInstance.world)); + { + m_transformParams.enableViewManipulate = true; + EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], m_transformParams); + } + selectedInstance.world = hlsl::float32_t3x4(hlsl::transpose(imguizmoM16InOut.model)); + + ImGui::End(); + }); + // + if (!reloadModel()) + return false; + + m_camera.mapKeysToArrows(); + + onAppInitializedFinish(); + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); + m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let m_camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + } + }, m_logger.get()); + + bool reload = false; + m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let m_camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + if (e.keyCode == E_KEY_CODE::EKC_R && e.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + if (reload) reloadModel(); + + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + update(nextPresentationTimestamp); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + { + // begin renderpass + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height} + }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = framebuffer, + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + const SViewport viewport = { + .x = static_cast(currentRenderArea.offset.x), + .y = static_cast(currentRenderArea.offset.y), + .width = static_cast(currentRenderArea.extent.width), + .height = static_cast(currentRenderArea.extent.height) + }; + cb->setViewport(0u,1u,&viewport); + + cb->setScissor(0u,1u,¤tRenderArea); + } + + // draw scene + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + { + // TODO: get rid of legacy matrices + { + memcpy(&viewMatrix,m_camera.getViewMatrix().pointer(),sizeof(viewMatrix)); + memcpy(&viewProjMatrix,m_camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); + } + m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); + } + + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + const auto& renderInstance = m_renderer->getInstance(m_selectedMesh); + const auto& meshInstance = m_meshInstances[m_selectedMesh]; + core::vector debugDrawInstances; + debugDrawInstances.reserve(2); + const auto world4x4 = float32_t4x4{ + renderInstance.world[0], + renderInstance.world[1], + renderInstance.world[2], + float32_t4(0, 0, 0, 1) + }; + if (m_shouldDrawAABB) + { + const auto aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(meshInstance.aabb); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = hlsl::mul(world4x4, aabbTransform), .color = float32_t4(1, 1, 1, 1)}); + } + if (m_shouldDrawOBB) + { + const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(meshInstance.obb); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = hlsl::mul(world4x4, obbTransform), .color = float32_t4(0, 0, 1, 1)}); + } + m_bbRenderer->render(cb, drawFinished, debugDrawInstances, viewProjMatrix); + + cb->beginDebugMarker("Render ImGui"); + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cb->bindGraphicsPipeline(uiPipeline); + cb->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + if (!m_ui.manager->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR); + return {}; + } + cb->endDebugMarker(); + + cb->endRenderPass(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] Geometry Inspector"; + { + caption += ", displaying ["; + caption += m_modelPath; + caption += "]"; + m_window->setCaption(caption); + } + + updateGUIDescriptorSet(); + return retval; + } + + protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + + private: + // TODO: standardise this across examples, and take from `argv` + bool m_nonInteractiveTest = false; + + bool reloadModel() + { + if (m_nonInteractiveTest) // TODO: maybe also take from argv and argc + m_modelPath = (sharedInputCWD/"ply/Spanner-ply.ply").string(); + else + { + pfd::open_file file("Choose a supported Model File", sharedInputCWD.string(), + { + "All Supported Formats", "*.ply *.stl *.serialized *.obj", + "TODO (.ply)", "*.ply", + "TODO (.stl)", "*.stl", + "Mitsuba 0.6 Serialized (.serialized)", "*.serialized", + "Wavefront Object (.obj)", "*.obj" + }, + false + ); + if (file.result().empty()) + return false; + m_modelPath = file.result()[0]; + } + + // free up + m_renderer->m_instances.clear(); + m_renderer->clearGeometries({.semaphore=m_semaphore.get(),.value=m_realFrameIx}); + m_assetMgr->clearAllAssetCache(); + + //! load the geometry + IAssetLoader::SAssetLoadParams params = {}; + params.logger = m_logger.get(); + auto bundle = m_assetMgr->getAsset(m_modelPath,params); + if (bundle.getContents().empty()) + return false; + + // + core::vector> geometries; + switch (bundle.getAssetType()) + { + case IAsset::E_TYPE::ET_GEOMETRY: + for (const auto& item : bundle.getContents()) + if (auto polyGeo=IAsset::castDown(item); polyGeo) + geometries.push_back(polyGeo); + break; + default: + m_logger->log("Asset loaded but not a supported type (ET_GEOMETRY,ET_GEOMETRY_COLLECTION)",ILogger::ELL_ERROR); + break; + } + if (geometries.empty()) + return false; + + using aabb_t = hlsl::shapes::AABB<3,float32_t>; + auto printAABB = [&](const aabb_t& aabb, const char* extraMsg="")->void + { + m_logger->log("%s AABB is (%f,%f,%f) -> (%f,%f,%f)",ILogger::ELL_INFO,extraMsg,aabb.minVx.x,aabb.minVx.y,aabb.minVx.z,aabb.maxVx.x,aabb.maxVx.y,aabb.maxVx.z); + }; + auto bound = aabb_t::create(); + // convert the geometries + { + smart_refctd_ptr converter = CAssetConverter::create({.device=m_device.get()}); + + const auto transferFamily = getTransferUpQueue()->getFamilyIndex(); + + struct SInputs : CAssetConverter::SInputs + { + virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const + { + return sharedBufferOwnership; + } + + core::vector sharedBufferOwnership; + } inputs = {}; + core::vector> patches(geometries.size(),CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + { + inputs.logger = m_logger.get(); + std::get>(inputs.assets) = {&geometries.front().get(),geometries.size()}; + std::get>(inputs.patches) = patches; + // set up shared ownership so we don't have to + core::unordered_set families; + families.insert(transferFamily); + families.insert(getGraphicsQueue()->getFamilyIndex()); + if (families.size()>1) + for (const auto fam : families) + inputs.sharedBufferOwnership.push_back(fam); + } + + // reserve + auto reservation = converter->reserve(inputs); + if (!reservation) + { + m_logger->log("Failed to reserve GPU objects for CPU->GPU conversion!",ILogger::ELL_ERROR); + return false; + } + + // convert + { + auto semaphore = m_device->createSemaphore(0u); + + constexpr auto MultiBuffering = 2; + std::array,MultiBuffering> commandBuffers = {}; + { + auto pool = m_device->createCommandPool(transferFamily,IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,commandBuffers,smart_refctd_ptr(m_logger)); + } + commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + std::array commandBufferSubmits; + for (auto i=0; ilog("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + } + + auto tmp = hlsl::float32_t4x3( + hlsl::float32_t3(1,0,0), + hlsl::float32_t3(0,1,0), + hlsl::float32_t3(0,0,1), + hlsl::float32_t3(0,0,0) + ); + const auto& converted = reservation.getGPUObjects(); + core::vector meshWorlds; + for (uint32_t i = 0; i < converted.size(); i++) + { + const auto& geom = converted[i]; + const auto aabb = geom.value->getAABB(); + printAABB(aabb,"Geometry"); + tmp[3].x += aabb.getExtent().x; + meshWorlds.emplace_back(hlsl::transpose(tmp)); + const auto transformed = hlsl::shapes::util::transform(meshWorlds.back(), aabb); + bound = hlsl::shapes::util::union_(transformed,bound); + + const auto& cpuGeom = geometries[i].get(); + const auto obb = CPolygonGeometryManipulator::calculateOBB({ + .fetch = [geo = cpuGeom](size_t vertex_i) { + hlsl::float32_t3 pt; + geo->getPositionView().decodeElement(vertex_i, pt); + return pt; + }, + .size = cpuGeom->getPositionView().getElementCount(), + }); + + m_meshInstances.push_back({ .name = std::format("Mesh {}", i), .aabb = aabb, .obb = obb }); + } + + printAABB(bound,"Total"); + if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) + return false; + + for (auto geom_i = 0u; geom_i < m_renderer->getGeometries().size(); geom_i++) + m_renderer->m_instances.push_back({ + .world = meshWorlds[geom_i], + .packedGeo = &m_renderer->getGeometry(geom_i) + }); + } + + // get scene bounds and reset m_camera + { + const float32_t distance = 0.05; + const auto diagonal = bound.getExtent(); + { + const auto measure = hlsl::length(diagonal); + const auto aspectRatio = float(m_window->getWidth())/float(m_window->getHeight()); + m_camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f,aspectRatio,distance*measure*0.1,measure*4.0)); + m_camera.setMoveSpeed(measure*0.04); + } + const auto pos = bound.maxVx+diagonal*distance; + m_camera.setPosition(vectorSIMDf(pos.x,pos.y,pos.z)); + const auto center = (bound.minVx+bound.maxVx)*0.5f; + m_camera.setTarget(vectorSIMDf(center.x,center.y,center.z)); + } + + // TODO: write out the geometry + + return true; + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline uint8_t MaxUITextureCount = 1u; + // + smart_refctd_ptr m_renderer; + + struct MeshInstance + { + std::string name; + hlsl::shapes::AABB<3, float32_t> aabb; + hlsl::shapes::OBB<3, float32_t> obb; + }; + core::vector m_meshInstances; + int m_selectedMesh = 0; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array,MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + // + struct CameraSetting + { + float fov = 60.f; + float zNear = 0.1f; + float zFar = 10000.f; + float moveSpeed = 1.f; + float rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + + } m_cameraSetting; + Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + std::string m_modelPath; + + smart_refctd_ptr m_bbRenderer; + bool m_shouldDrawAABB; + bool m_shouldDrawOBB; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + core::smart_refctd_ptr m_guiDescriptorSetPool; + + TransformRequestParams m_transformParams; + }; + +NBL_MAIN_FUNC(GeometryInspectorApp) diff --git a/CMakeLists.txt b/CMakeLists.txt index a29b34314..cacf77c98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) + add_subdirectory(72_GeometryInspector) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp index 9a9e5c966..f38de81ca 100644 --- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp +++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp @@ -365,6 +365,9 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted inline const auto& getGeometries() const {return m_geoms;} inline auto& getGeometry(const uint32_t ix) {return m_geoms[ix];} + inline const auto& getInstances() const {return m_instances;} + inline auto& getInstance(const uint32_t ix) {return m_instances[ix];} + // inline void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const { From 850ca5d41412d8182a72fb88dd4e9e61df55e19b Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 15:05:10 +0200 Subject: [PATCH 026/219] start example 14 --- 14_MitsubaLoader/CMakeLists.txt | 9 + 14_MitsubaLoader/main.cpp | 505 ++++++++++++++++++++++++++++++++ CMakeLists.txt | 4 + 3 files changed, 518 insertions(+) create mode 100644 14_MitsubaLoader/CMakeLists.txt create mode 100644 14_MitsubaLoader/main.cpp diff --git a/14_MitsubaLoader/CMakeLists.txt b/14_MitsubaLoader/CMakeLists.txt new file mode 100644 index 000000000..3921c61d9 --- /dev/null +++ b/14_MitsubaLoader/CMakeLists.txt @@ -0,0 +1,9 @@ +list(APPEND NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" +) +list(APPEND NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" +) + + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp new file mode 100644 index 000000000..d80fa8998 --- /dev/null +++ b/14_MitsubaLoader/main.cpp @@ -0,0 +1,505 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "argparse/argparse.hpp" +#include "common.hpp" + +#include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" + +#ifdef NBL_BUILD_MITSUBA_LOADER +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#endif + +class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ 1280,720 }, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; +#ifdef NBL_BUILD_MITSUBA_LOADER + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); +#endif + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_saveGeomPrefixPath = localOutputCWD / "saved"; + + // parse args + argparse::ArgumentParser parser("12_meshloaders"); + parser.add_argument("--savegeometry") + .help("Save the mesh on exit or reload") + .flag(); + + parser.add_argument("--savepath") + .nargs(1) + .help("Specify the file to which the mesh will be saved"); + + try + { + parser.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& e) + { + return logFail(e.what()); + } + + if (parser["--savegeometry"] == true) + m_saveGeom = true; + + if (parser.present("--savepath")) + { + auto tmp = path(parser.get("--savepath")); + + if (tmp.empty() || !tmp.has_filename()) + return logFail("Invalid path has been specified in --savepath argument"); + + if (!std::filesystem::exists(tmp.parent_path())) + return logFail("Path specified in --savepath argument doesn't exist"); + + m_specifiedGeomSavePath.emplace(std::move(tmp.generic_string())); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + return logFail("Couldn't create Command Buffer!"); + } + + + auto scRes = static_cast(m_surface->getSwapchainResources()); + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, {}); + if (!m_renderer) + return logFail("Failed to create renderer!"); + + // + if (!reloadModel()) + return false; + + camera.mapKeysToArrows(); + + onAppInitializedFinish(); + return true; + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + { + // begin renderpass + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height} + }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = framebuffer, + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + const SViewport viewport = { + .x = static_cast(currentRenderArea.offset.x), + .y = static_cast(currentRenderArea.offset.y), + .width = static_cast(currentRenderArea.extent.width), + .height = static_cast(currentRenderArea.extent.height) + }; + cb->setViewport(0u, 1u, &viewport); + + cb->setScissor(0u, 1u, ¤tRenderArea); + } + // late latch input + { + bool reload = false; + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + for (const auto& event : events) + if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + camera.keyboardProcess(events); + }, + m_logger.get() + ); + camera.endInputProcessing(nextPresentationTimestamp); + if (reload) + reloadModel(); + } + // draw scene + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + m_renderer->render(cb, CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix)); + } + cb->endRenderPass(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] Mesh Loaders"; + { + caption += ", displaying ["; + caption += m_modelPath; + caption += "]"; + m_window->setCaption(caption); + } + return retval; + } + + inline bool onAppTerminated() override + { + if (m_saveGeomTaskFuture.valid()) + { + m_logger->log("Waiting for geometry writer to finish writing...", ILogger::ELL_INFO); + m_saveGeomTaskFuture.wait(); + } + + return device_base_t::onAppTerminated(); + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + +private: + // TODO: standardise this across examples, and take from `argv` + bool m_nonInteractiveTest = false; + + bool reloadModel() + { + if (m_nonInteractiveTest) // TODO: maybe also take from argv and argc + m_modelPath = (sharedInputCWD / "ply/Spanner-ply.ply").string(); + else + { + pfd::open_file file("Choose a supported Model File", sharedInputCWD.string(), + { + "All Supported Formats", "*.ply *.stl *.serialized *.obj", + "TODO (.ply)", "*.ply", + "TODO (.stl)", "*.stl", + "Mitsuba 0.6 Serialized (.serialized)", "*.serialized", + "Wavefront Object (.obj)", "*.obj" + }, + false + ); + if (file.result().empty()) + return false; + m_modelPath = file.result()[0]; + } + + // free up + m_renderer->m_instances.clear(); + m_renderer->clearGeometries({ .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + m_assetMgr->clearAllAssetCache(); + + //! load the geometry + IAssetLoader::SAssetLoadParams params = {}; + params.logger = m_logger.get(); + auto asset = m_assetMgr->getAsset(m_modelPath, params); + if (asset.getContents().empty()) + return false; + + // + core::vector> geometries; + switch (asset.getAssetType()) + { + case IAsset::E_TYPE::ET_GEOMETRY: + for (const auto& item : asset.getContents()) + if (auto polyGeo = IAsset::castDown(item); polyGeo) + geometries.push_back(polyGeo); + break; + default: + m_logger->log("Asset loaded but not a supported type (ET_GEOMETRY,ET_GEOMETRY_COLLECTION)", ILogger::ELL_ERROR); + break; + } + if (geometries.empty()) + return false; + + if (m_saveGeom) + { + if (m_saveGeomTaskFuture.valid()) + { + m_logger->log("Waiting for previous geometry saving task to complete...", ILogger::ELL_INFO); + m_saveGeomTaskFuture.wait(); + } + + std::string currentGeomSavePath = m_specifiedGeomSavePath.value_or((m_saveGeomPrefixPath / path(m_modelPath).filename()).generic_string()); + m_saveGeomTaskFuture = std::async( + std::launch::async, + [this, geometries, currentGeomSavePath] { writeGeometry( + geometries[0], + currentGeomSavePath + ); } + ); + } + + using aabb_t = hlsl::shapes::AABB<3, double>; + auto printAABB = [&](const aabb_t& aabb, const char* extraMsg = "")->void + { + m_logger->log("%s AABB is (%f,%f,%f) -> (%f,%f,%f)", ILogger::ELL_INFO, extraMsg, aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, aabb.maxVx.x, aabb.maxVx.y, aabb.maxVx.z); + }; + auto bound = aabb_t::create(); + // convert the geometries + { + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get() }); + + const auto transferFamily = getTransferUpQueue()->getFamilyIndex(); + + struct SInputs : CAssetConverter::SInputs + { + virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const + { + return sharedBufferOwnership; + } + + core::vector sharedBufferOwnership; + } inputs = {}; + core::vector> patches(geometries.size(), CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + { + inputs.logger = m_logger.get(); + std::get>(inputs.assets) = { &geometries.front().get(),geometries.size() }; + std::get>(inputs.patches) = patches; + // set up shared ownership so we don't have to + core::unordered_set families; + families.insert(transferFamily); + families.insert(getGraphicsQueue()->getFamilyIndex()); + if (families.size() > 1) + for (const auto fam : families) + inputs.sharedBufferOwnership.push_back(fam); + } + + // reserve + auto reservation = converter->reserve(inputs); + if (!reservation) + { + m_logger->log("Failed to reserve GPU objects for CPU->GPU conversion!", ILogger::ELL_ERROR); + return false; + } + + // convert + { + auto semaphore = m_device->createSemaphore(0u); + + constexpr auto MultiBuffering = 2; + std::array, MultiBuffering> commandBuffers = {}; + { + auto pool = m_device->createCommandPool(transferFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, commandBuffers, smart_refctd_ptr(m_logger)); + } + commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + std::array commandBufferSubmits; + for (auto i = 0; i < MultiBuffering; i++) + commandBufferSubmits[i].cmdbuf = commandBuffers[i].get(); + + SIntendedSubmitInfo transfer = {}; + transfer.queue = getTransferUpQueue(); + transfer.scratchCommandBuffers = commandBufferSubmits; + transfer.scratchSemaphore = { + .semaphore = semaphore.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + CAssetConverter::SConvertParams cpar = {}; + cpar.utilities = m_utils.get(); + cpar.transfer = &transfer; + + // basically it records all data uploads and submits them right away + auto future = reservation.convert(cpar); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + } + + auto tmp = hlsl::float32_t4x3( + hlsl::float32_t3(1, 0, 0), + hlsl::float32_t3(0, 1, 0), + hlsl::float32_t3(0, 0, 1), + hlsl::float32_t3(0, 0, 0) + ); + core::vector worldTforms; + const auto& converted = reservation.getGPUObjects(); + for (const auto& geom : converted) + { + const auto promoted = geom.value->getAABB(); + printAABB(promoted, "Geometry"); + tmp[3].x += promoted.getExtent().x; + const auto promotedWorld = hlsl::float64_t3x4(worldTforms.emplace_back(hlsl::transpose(tmp))); + const auto transformed = hlsl::shapes::util::transform(promotedWorld, promoted); + printAABB(transformed, "Transformed"); + bound = hlsl::shapes::util::union_(transformed, bound); + } + printAABB(bound, "Total"); + if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) + return false; + + auto worlTformsIt = worldTforms.begin(); + for (const auto& geo : m_renderer->getGeometries()) + m_renderer->m_instances.push_back({ + .world = *(worlTformsIt++), + .packedGeo = &geo + }); + } + + // get scene bounds and reset camera + { + const double distance = 0.05; + const auto diagonal = bound.getExtent(); + { + const auto measure = hlsl::length(diagonal); + const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); + camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); + camera.setMoveSpeed(measure * 0.04); + } + const auto pos = bound.maxVx + diagonal * distance; + camera.setPosition(vectorSIMDf(pos.x, pos.y, pos.z)); + const auto center = (bound.minVx + bound.maxVx) * 0.5; + camera.setTarget(vectorSIMDf(center.x, center.y, center.z)); + } + + // TODO: write out the geometry + + return true; + } + + void writeGeometry(smart_refctd_ptr geometry, const std::string& savePath) + { + IAsset* assetPtr = const_cast(static_cast(geometry.get())); + IAssetWriter::SAssetWriteParams params{ assetPtr }; + m_logger->log("Saving mesh to %s", ILogger::ELL_INFO, savePath.c_str()); + if (!m_assetMgr->writeAsset(savePath, params)) + m_logger->log("Failed to save %s", ILogger::ELL_ERROR, savePath.c_str()); + m_logger->log("Mesh successfully saved!", ILogger::ELL_INFO); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + // + smart_refctd_ptr m_renderer; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + std::string m_modelPath; + + bool m_saveGeom = false; + std::future m_saveGeomTaskFuture; + std::optional m_specifiedGeomSavePath; + nbl::system::path m_saveGeomPrefixPath; +}; + +NBL_MAIN_FUNC(MeshLoadersApp) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ce94f93..0a7c6be29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,10 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(12_MeshLoaders) # add_subdirectory(13_MaterialCompilerTest) + # + if (NBL_BUILD_MITSUBA_LOADER) + add_subdirectory(14_MitsubaLoader) + endif() # Waiting for a refactor #add_subdirectory(27_PLYSTLDemo) From d68e5bd0687c21d5599dcb7119e1db39f6be4e52 Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 15:43:36 +0200 Subject: [PATCH 027/219] draft the test --- 14_MitsubaLoader/main.cpp | 527 ++++---------------------------------- 1 file changed, 51 insertions(+), 476 deletions(-) diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index d80fa8998..b0d1ecd2b 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -1,505 +1,80 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "argparse/argparse.hpp" -#include "common.hpp" -#include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" - -#ifdef NBL_BUILD_MITSUBA_LOADER +#include "nbl/examples/examples.hpp" +//#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" -#endif - -class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourcesApplication -{ - using device_base_t = MonoWindowApplication; - using asset_base_t = BuiltinResourcesApplication; - -public: - inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 1280,720 }, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) - { - } - - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; -#ifdef NBL_BUILD_MITSUBA_LOADER - m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); -#endif - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - m_saveGeomPrefixPath = localOutputCWD / "saved"; - - // parse args - argparse::ArgumentParser parser("12_meshloaders"); - parser.add_argument("--savegeometry") - .help("Save the mesh on exit or reload") - .flag(); - - parser.add_argument("--savepath") - .nargs(1) - .help("Specify the file to which the mesh will be saved"); - - try - { - parser.parse_args({ argv.data(), argv.data() + argv.size() }); - } - catch (const std::exception& e) - { - return logFail(e.what()); - } - - if (parser["--savegeometry"] == true) - m_saveGeom = true; - - if (parser.present("--savepath")) - { - auto tmp = path(parser.get("--savepath")); - - if (tmp.empty() || !tmp.has_filename()) - return logFail("Invalid path has been specified in --savepath argument"); - - if (!std::filesystem::exists(tmp.parent_path())) - return logFail("Path specified in --savepath argument doesn't exist"); - - m_specifiedGeomSavePath.emplace(std::move(tmp.generic_string())); - } - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - for (auto i = 0u; i < MaxFramesInFlight; i++) - { - if (!pool) - return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) - return logFail("Couldn't create Command Buffer!"); - } +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; - auto scRes = static_cast(m_surface->getSwapchainResources()); - m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, {}); - if (!m_renderer) - return logFail("Failed to create renderer!"); - - // - if (!reloadModel()) - return false; - - camera.mapKeysToArrows(); - - onAppInitializedFinish(); - return true; - } - - inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override - { - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - - // - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - - auto* const cb = m_cmdBufs.data()[resourceIx].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // clear to black for both things - { - // begin renderpass - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - auto* framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; - const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height} - }; - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = framebuffer, - .colorClearValues = &clearValue, - .depthStencilClearValues = &depthValue, - .renderArea = currentRenderArea - }; - cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - const SViewport viewport = { - .x = static_cast(currentRenderArea.offset.x), - .y = static_cast(currentRenderArea.offset.y), - .width = static_cast(currentRenderArea.extent.width), - .height = static_cast(currentRenderArea.extent.height) - }; - cb->setViewport(0u, 1u, &viewport); - - cb->setScissor(0u, 1u, ¤tRenderArea); - } - // late latch input - { - bool reload = false; - camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { - for (const auto& event : events) - if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) - reload = true; - camera.keyboardProcess(events); - }, - m_logger.get() - ); - camera.endInputProcessing(nextPresentationTimestamp); - if (reload) - reloadModel(); - } - // draw scene - { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); - } - m_renderer->render(cb, CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix)); - } - cb->endRenderPass(); - } - cb->end(); - - IQueue::SSubmitInfo::SSemaphoreInfo retval = - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - }; - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb } - }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { - { - .semaphore = device_base_t::getCurrentAcquire().semaphore, - .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval,1} - } - }; - - if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) - { - retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal - m_realFrameIx--; - } - std::string caption = "[Nabla Engine] Mesh Loaders"; - { - caption += ", displaying ["; - caption += m_modelPath; - caption += "]"; - m_window->setCaption(caption); - } - return retval; - } +// Testing our Mitsuba Loader +class MitsubaLoaderTest final : public BuiltinResourcesApplication +{ + using base_t = BuiltinResourcesApplication; - inline bool onAppTerminated() override - { - if (m_saveGeomTaskFuture.valid()) + bool failedTest(const core::string& relPath) { - m_logger->log("Waiting for geometry writer to finish writing...", ILogger::ELL_INFO); - m_saveGeomTaskFuture.wait(); - } - - return device_base_t::onAppTerminated(); - } - -protected: - const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override - { - // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later - .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, - // don't want any writes to be available, we'll clear - .srcAccessMask = ACCESS_FLAGS::NONE, - // destination needs to wait as early as possible - // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // because depth and color get cleared first no read mask - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + IAssetLoader::SAssetLoadParams params = {}; + params.logger = m_logger.get(); + auto asset = m_assetMgr->getAsset(relPath,params); + if (asset.getContents().empty()) { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination + m_logger->log("Failed To Load %s",ILogger::ELL_ERROR); + return true; } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - return dependencies; - } - -private: - // TODO: standardise this across examples, and take from `argv` - bool m_nonInteractiveTest = false; - - bool reloadModel() - { - if (m_nonInteractiveTest) // TODO: maybe also take from argv and argc - m_modelPath = (sharedInputCWD / "ply/Spanner-ply.ply").string(); - else - { - pfd::open_file file("Choose a supported Model File", sharedInputCWD.string(), - { - "All Supported Formats", "*.ply *.stl *.serialized *.obj", - "TODO (.ply)", "*.ply", - "TODO (.stl)", "*.stl", - "Mitsuba 0.6 Serialized (.serialized)", "*.serialized", - "Wavefront Object (.obj)", "*.obj" - }, - false - ); - if (file.result().empty()) - return false; - m_modelPath = file.result()[0]; - } - - // free up - m_renderer->m_instances.clear(); - m_renderer->clearGeometries({ .semaphore = m_semaphore.get(),.value = m_realFrameIx }); - m_assetMgr->clearAllAssetCache(); - - //! load the geometry - IAssetLoader::SAssetLoadParams params = {}; - params.logger = m_logger.get(); - auto asset = m_assetMgr->getAsset(m_modelPath, params); - if (asset.getContents().empty()) + // so we don't run out of RAM during testing + m_assetMgr->clearAllAssetCache(); return false; - - // - core::vector> geometries; - switch (asset.getAssetType()) - { - case IAsset::E_TYPE::ET_GEOMETRY: - for (const auto& item : asset.getContents()) - if (auto polyGeo = IAsset::castDown(item); polyGeo) - geometries.push_back(polyGeo); - break; - default: - m_logger->log("Asset loaded but not a supported type (ET_GEOMETRY,ET_GEOMETRY_COLLECTION)", ILogger::ELL_ERROR); - break; } - if (geometries.empty()) - return false; - if (m_saveGeom) - { - if (m_saveGeomTaskFuture.valid()) - { - m_logger->log("Waiting for previous geometry saving task to complete...", ILogger::ELL_INFO); - m_saveGeomTaskFuture.wait(); - } + public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + MitsubaLoaderTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - std::string currentGeomSavePath = m_specifiedGeomSavePath.value_or((m_saveGeomPrefixPath / path(m_modelPath).filename()).generic_string()); - m_saveGeomTaskFuture = std::async( - std::launch::async, - [this, geometries, currentGeomSavePath] { writeGeometry( - geometries[0], - currentGeomSavePath - ); } - ); - } - - using aabb_t = hlsl::shapes::AABB<3, double>; - auto printAABB = [&](const aabb_t& aabb, const char* extraMsg = "")->void - { - m_logger->log("%s AABB is (%f,%f,%f) -> (%f,%f,%f)", ILogger::ELL_INFO, extraMsg, aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, aabb.maxVx.x, aabb.maxVx.y, aabb.maxVx.z); - }; - auto bound = aabb_t::create(); - // convert the geometries + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override { - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get() }); - - const auto transferFamily = getTransferUpQueue()->getFamilyIndex(); - - struct SInputs : CAssetConverter::SInputs - { - virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const - { - return sharedBufferOwnership; - } - - core::vector sharedBufferOwnership; - } inputs = {}; - core::vector> patches(geometries.size(), CSimpleDebugRenderer::DefaultPolygonGeometryPatch); - { - inputs.logger = m_logger.get(); - std::get>(inputs.assets) = { &geometries.front().get(),geometries.size() }; - std::get>(inputs.patches) = patches; - // set up shared ownership so we don't have to - core::unordered_set families; - families.insert(transferFamily); - families.insert(getGraphicsQueue()->getFamilyIndex()); - if (families.size() > 1) - for (const auto fam : families) - inputs.sharedBufferOwnership.push_back(fam); - } - - // reserve - auto reservation = converter->reserve(inputs); - if (!reservation) - { - m_logger->log("Failed to reserve GPU objects for CPU->GPU conversion!", ILogger::ELL_ERROR); + if (!base_t::onAppInitialized(std::move(system))) return false; - } - - // convert - { - auto semaphore = m_device->createSemaphore(0u); - - constexpr auto MultiBuffering = 2; - std::array, MultiBuffering> commandBuffers = {}; - { - auto pool = m_device->createCommandPool(transferFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, commandBuffers, smart_refctd_ptr(m_logger)); - } - commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - std::array commandBufferSubmits; - for (auto i = 0; i < MultiBuffering; i++) - commandBufferSubmits[i].cmdbuf = commandBuffers[i].get(); +// m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); - SIntendedSubmitInfo transfer = {}; - transfer.queue = getTransferUpQueue(); - transfer.scratchCommandBuffers = commandBufferSubmits; - transfer.scratchSemaphore = { - .semaphore = semaphore.get(), - .value = 0u, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; + // first batch + if (failedTest("shapetest.xml")) return false; + if (failedTest("daily_pt.xml")) return false; + if (failedTest("brdf_eval_test.xml")) return false; + if (failedTest("brdf_eval_test_as.xml")) return false; + if (failedTest("brdf_eval_test_diffuse.xml")) return false; + if (failedTest("brdf_eval_test_lambert.xml")) return false; - CAssetConverter::SConvertParams cpar = {}; - cpar.utilities = m_utils.get(); - cpar.transfer = &transfer; + // some of our test scenes won't load without the `.serialized` support + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); - // basically it records all data uploads and submits them right away - auto future = reservation.convert(cpar); - if (future.copy() != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - } + return true; + } - auto tmp = hlsl::float32_t4x3( - hlsl::float32_t3(1, 0, 0), - hlsl::float32_t3(0, 1, 0), - hlsl::float32_t3(0, 0, 1), - hlsl::float32_t3(0, 0, 0) - ); - core::vector worldTforms; - const auto& converted = reservation.getGPUObjects(); - for (const auto& geom : converted) - { - const auto promoted = geom.value->getAABB(); - printAABB(promoted, "Geometry"); - tmp[3].x += promoted.getExtent().x; - const auto promotedWorld = hlsl::float64_t3x4(worldTforms.emplace_back(hlsl::transpose(tmp))); - const auto transformed = hlsl::shapes::util::transform(promotedWorld, promoted); - printAABB(transformed, "Transformed"); - bound = hlsl::shapes::util::union_(transformed, bound); - } - printAABB(bound, "Total"); - if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) - return false; + // One-shot App + bool keepRunning() override { return false; } - auto worlTformsIt = worldTforms.begin(); - for (const auto& geo : m_renderer->getGeometries()) - m_renderer->m_instances.push_back({ - .world = *(worlTformsIt++), - .packedGeo = &geo - }); - } + // One-shot App + void workLoopBody() override {} - // get scene bounds and reset camera + // Cleanup + bool onAppTerminated() override { - const double distance = 0.05; - const auto diagonal = bound.getExtent(); - { - const auto measure = hlsl::length(diagonal); - const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); - camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); - camera.setMoveSpeed(measure * 0.04); - } - const auto pos = bound.maxVx + diagonal * distance; - camera.setPosition(vectorSIMDf(pos.x, pos.y, pos.z)); - const auto center = (bound.minVx + bound.maxVx) * 0.5; - camera.setTarget(vectorSIMDf(center.x, center.y, center.z)); + return base_t::onAppTerminated(); } - - // TODO: write out the geometry - - return true; - } - - void writeGeometry(smart_refctd_ptr geometry, const std::string& savePath) - { - IAsset* assetPtr = const_cast(static_cast(geometry.get())); - IAssetWriter::SAssetWriteParams params{ assetPtr }; - m_logger->log("Saving mesh to %s", ILogger::ELL_INFO, savePath.c_str()); - if (!m_assetMgr->writeAsset(savePath, params)) - m_logger->log("Failed to save %s", ILogger::ELL_ERROR, savePath.c_str()); - m_logger->log("Mesh successfully saved!", ILogger::ELL_INFO); - } - - // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers - constexpr static inline uint32_t MaxFramesInFlight = 3u; - // - smart_refctd_ptr m_renderer; - // - smart_refctd_ptr m_semaphore; - uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - // - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; - // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - // mutables - std::string m_modelPath; - - bool m_saveGeom = false; - std::future m_saveGeomTaskFuture; - std::optional m_specifiedGeomSavePath; - nbl::system::path m_saveGeomPrefixPath; }; -NBL_MAIN_FUNC(MeshLoadersApp) \ No newline at end of file + +NBL_MAIN_FUNC(MitsubaLoaderTest) \ No newline at end of file From 5a57a6bfbeaaf7de61d9dab4d8143c9e50da7ec7 Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 22:13:44 +0200 Subject: [PATCH 028/219] input scene txt parser --- 14_MitsubaLoader/main.cpp | 86 ++++++++++++++++++++++++-------- 14_MitsubaLoader/test_scenes.txt | 7 +++ 2 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 14_MitsubaLoader/test_scenes.txt diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index b0d1ecd2b..74e05b839 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -6,6 +6,10 @@ //#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#include +#include + + using namespace nbl; using namespace nbl::core; using namespace nbl::hlsl; @@ -15,25 +19,70 @@ using namespace nbl::ui; using namespace nbl::video; using namespace nbl::examples; - // Testing our Mitsuba Loader class MitsubaLoaderTest final : public BuiltinResourcesApplication { using base_t = BuiltinResourcesApplication; - bool failedTest(const core::string& relPath) + bool test(const system::path& listPath) { - IAssetLoader::SAssetLoadParams params = {}; - params.logger = m_logger.get(); - auto asset = m_assetMgr->getAsset(relPath,params); - if (asset.getContents().empty()) + smart_refctd_ptr file; + { + ISystem::future_t> future; + using create_flags_t = IFileBase::E_CREATE_FLAGS; + m_system->createFile(future,listPath,create_flags_t::ECF_READ|create_flags_t::ECF_MAPPABLE); + if (!future.wait()) + return logFail("Failed to list of scenes to test"); + smart_refctd_ptr tmp; + future.acquire().move_into(tmp); + file = std::move(tmp); + } + + const auto base = file->getFileName().parent_path(); + const void* const ptr = file->getMappedPointer(); + const auto end = reinterpret_cast(ptr)+file->getSize(); + for (auto cursor=reinterpret_cast(ptr); cursorlog("Failed To Load %s",ILogger::ELL_ERROR); - return true; + cursor = std::find_if(cursor,end,std::not_fn(std::isspace)); + if (cursor==end) + break; + auto nextLine = [&]()->const char* + { + constexpr std::array newlines = {'\r','\n'}; + auto retval = std::find_first_of(cursor,end,newlines.begin(),newlines.end()); + while (++retvalgetAsset(relPath,params); + if (asset.getContents().empty()) + return logFail("Failed To Load %s",relPath.c_str()); + m_logger->log("Loaded %s",ILogger::ELL_INFO,relPath.c_str()); + // TODO: print True Material IR + // so we don't run out of RAM during testing + m_assetMgr->clearAllAssetCache(); + } + else if (*cursor!=';') + { + const char chr[2] = {*cursor,0}; + return logFail("Parser Error, encountered unsupprted character %s near line start",chr); + } + cursor = nextLine(); } - // so we don't run out of RAM during testing - m_assetMgr->clearAllAssetCache(); - return false; + return true; } public: @@ -48,18 +97,15 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication return false; // m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); - - // first batch - if (failedTest("shapetest.xml")) return false; - if (failedTest("daily_pt.xml")) return false; - if (failedTest("brdf_eval_test.xml")) return false; - if (failedTest("brdf_eval_test_as.xml")) return false; - if (failedTest("brdf_eval_test_diffuse.xml")) return false; - if (failedTest("brdf_eval_test_lambert.xml")) return false; - // some of our test scenes won't load without the `.serialized` support m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + // public batch + if (!test(localInputCWD/"test_scenes.txt")) + return false; +// if (!test(sharedInputCWD/"Ditt-Reference-Scenes/private_test_scenes.xml")) +// return false; + return true; } diff --git a/14_MitsubaLoader/test_scenes.txt b/14_MitsubaLoader/test_scenes.txt new file mode 100644 index 000000000..4211e3cb6 --- /dev/null +++ b/14_MitsubaLoader/test_scenes.txt @@ -0,0 +1,7 @@ +; Here is my Commented line that batch file will skip (started with semicolons) +"../media/mitsuba/shapetest.xml" +"../media/mitsuba/daily_pt.xml" +;"../media/mitsuba/brdf_eval_test.xml" +;"../media/mitsuba/brdf_eval_test_as.xml" +;"../media/mitsuba/brdf_eval_test_diffuse.xml" +;"../media/mitsuba/brdf_eval_test_lambert.xml" From 7ade89f9e5b34866bf785be8fae2148a5b35193f Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 9 Oct 2025 11:56:27 +0200 Subject: [PATCH 029/219] move the stub along --- 14_MitsubaLoader/main.cpp | 8 +++++--- media | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index 74e05b839..16a634c32 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -3,7 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/examples/examples.hpp" -//#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" #include @@ -32,11 +32,13 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication using create_flags_t = IFileBase::E_CREATE_FLAGS; m_system->createFile(future,listPath,create_flags_t::ECF_READ|create_flags_t::ECF_MAPPABLE); if (!future.wait()) - return logFail("Failed to list of scenes to test"); + return logFail("Failed to list of scenes to test with path %s",listPath.string().c_str()); smart_refctd_ptr tmp; future.acquire().move_into(tmp); file = std::move(tmp); } + if (!file) + return logFail("Failed to open list of scenes to test with path %s",listPath.string().c_str()); const auto base = file->getFileName().parent_path(); const void* const ptr = file->getMappedPointer(); @@ -103,7 +105,7 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication // public batch if (!test(localInputCWD/"test_scenes.txt")) return false; -// if (!test(sharedInputCWD/"Ditt-Reference-Scenes/private_test_scenes.xml")) +// if (!test(sharedInputCWD/"Ditt-Reference-Scenes/private_test_scenes.txt")) // return false; return true; diff --git a/media b/media index c24f4e139..f895f4e3d 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit c24f4e13901554abc9fdf87081108cc7dca1db57 +Subproject commit f895f4e3d1f72c772267143fe60f891bfe9e8e82 From 38be9e27119e574fd9424bbbe3b955dfdd4616a4 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 9 Oct 2025 12:34:24 +0200 Subject: [PATCH 030/219] refuse load-as-buffer --- 14_MitsubaLoader/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index 16a634c32..24ec71bbb 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -70,7 +70,7 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication IAssetLoader::SAssetLoadParams params = {}; params.logger = m_logger.get(); auto asset = m_assetMgr->getAsset(relPath,params); - if (asset.getContents().empty()) + if (asset.getContents().empty() || asset.getAssetType()!=IAsset::E_TYPE::ET_SCENE) return logFail("Failed To Load %s",relPath.c_str()); m_logger->log("Loaded %s",ILogger::ELL_INFO,relPath.c_str()); // TODO: print True Material IR @@ -98,7 +98,7 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication if (!base_t::onAppInitialized(std::move(system))) return false; -// m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); // some of our test scenes won't load without the `.serialized` support m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); From bfcff8a686409dd7c0d55607bb8cb6bcc0e0b80a Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 9 Oct 2025 16:38:59 +0200 Subject: [PATCH 031/219] adjust to change in CMitsubaLoader ctor --- 14_MitsubaLoader/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index 24ec71bbb..b698340f2 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -98,7 +98,7 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication if (!base_t::onAppInitialized(std::move(system))) return false; - m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + m_assetMgr->addAssetLoader(make_smart_refctd_ptr(core::smart_refctd_ptr(m_system))); // some of our test scenes won't load without the `.serialized` support m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); From c3b463a33f864ddb9f9b4099b660b313cad6ab5c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 13:36:06 +0200 Subject: [PATCH 032/219] NSC integration for IES Viewer example's shaders, enable CI, port compute input to HLSL, use BDA for buffer data, starting updating main.cpp then leave nasty ifdef 0 - shaders first since I can make them compile at build time, leave TODOs --- 50.IESProfileTest/CMakeLists.txt | 66 +++- 50.IESProfileTest/app_resources/common.hlsl | 41 ++ 50.IESProfileTest/app_resources/compute.hlsl | 231 ++++++++++++ .../{shader.frag => app_resources/pixel.hlsl} | 0 .../vertex.hlsl} | 0 50.IESProfileTest/compute/cdc.comp | 260 ------------- 50.IESProfileTest/compute/common.h | 14 - 50.IESProfileTest/inputs.json | 8 +- 50.IESProfileTest/main.cpp | 355 ++++++++++++++++-- CMakeLists.txt | 1 + 10 files changed, 667 insertions(+), 309 deletions(-) create mode 100644 50.IESProfileTest/app_resources/common.hlsl create mode 100644 50.IESProfileTest/app_resources/compute.hlsl rename 50.IESProfileTest/{shader.frag => app_resources/pixel.hlsl} (100%) rename 50.IESProfileTest/{shader.vert => app_resources/vertex.hlsl} (100%) delete mode 100644 50.IESProfileTest/compute/cdc.comp delete mode 100644 50.IESProfileTest/compute/common.h diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 52e8e83f2..bbab38f1c 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -1,10 +1,62 @@ +nbl_create_executable_project("" "" "" "" "") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/compute.hlsl + app_resources/pixel.hlsl + app_resources/vertex.hlsl +) -nbl_create_executable_project("" "" "" nlohmann_json::nlohmann_json) +set(TODO [=[ +{ + "INPUT": "app_resources/pixel.hlsl", + "KEY": "pixel", + "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "DEPENDS": [], + "CAPS": [] +}, +{ + "INPUT": "app_resources/vertex.hlsl", + "KEY": "vertex", + "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "DEPENDS": [], + "CAPS": [] +} +]=]) -add_dependencies(${EXECUTABLE_NAME} nlohmann_json::nlohmann_json) -target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file +set(V 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/compute.hlsl", + "KEY": "compute", + "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl new file mode 100644 index 000000000..8b0d14783 --- /dev/null +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -0,0 +1,41 @@ +#ifndef _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +// -> TODO: use NBL_CONTEXPR or something +#ifndef UINT16_MAX +#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do +#endif +#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do +#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do +#define QUANT_ERROR_ADMISSIBLE 1/1024 + +#define WORKGROUP_SIZE 256u +#define WORKGROUP_DIMENSION 16u +// <- + wipe whatever we already have + +using namespace nbl::hlsl; + +struct PushConstants +{ + uint64_t hAnglesBDA; + uint64_t vAnglesBDA; + uint64_t dataBDA; + float64_t maxIValue; + + uint32_t hAnglesCount; + uint32_t vAnglesCount; + uint32_t dataCount; + float32_t zAngleDegreeRotation; + + uint32_t mode; + + #ifdef __HLSL_VERSION + float32_t getHorizontalAngle(uint32_t ix) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float32_t) * ix); } + float32_t getVerticalAngle(uint32_t ix) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float32_t) * ix); } + float32_t getData(uint32_t ix) { return vk::RawBufferLoad(dataBDA + sizeof(float32_t) * ix); } + #endif // __HLSL_VERSION +}; + +#endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESProfileTest/app_resources/compute.hlsl b/50.IESProfileTest/app_resources/compute.hlsl new file mode 100644 index 000000000..228a600d4 --- /dev/null +++ b/50.IESProfileTest/app_resources/compute.hlsl @@ -0,0 +1,231 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "common.hlsl" + +[[vk::binding(0, 0)]] RWTexture2D outIESCandelaImage; +[[vk::binding(1, 0)]] RWTexture2D outSphericalCoordinatesImage; +[[vk::binding(2, 0)]] RWTexture2D outOUVProjectionDirectionImage; +[[vk::binding(3, 0)]] RWTexture2D outPassTMask; + +[[vk::push_constant]] struct PushConstants pc; + +float32_t3 octahedronUVToDir(float32_t2 uv) +{ + float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); + float32_t2 absP = float32_t2(abs(position.x), abs(position.y)); + + position.z = 1.0 - absP.x - absP.y; + + if (position.z < 0.0) + { + position.x = sign(position.x) * (1.0 - absP.y); + position.y = sign(position.y) * (1.0 - absP.x); + } + + // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" + if(pc.zAngleDegreeRotation != 0.0) + { + float32_t rDegree = pc.zAngleDegreeRotation; + + const float32_t zAngleRadians = float32_t(rDegree * M_PI / 180.0); + const float32_t cosineV = cos(zAngleRadians); + const float32_t sineV = sin(zAngleRadians); + + position = float32_t3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); + } + + return normalize(position); +} + +//! Returns spherical coordinates with physics convention in radians +/* + https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg + Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle + in [-PI, PI] range +*/ + +float32_t2 sphericalDirToRadians(float32_t3 direction) +{ + float32_t theta = acos(clamp(direction.z/length(direction), -1.0, 1.0)); + float32_t phi = atan2(direction.y, direction.x); // TODO: check it + + return float32_t2(theta, phi); +} + +uint32_t implGetVUB(const float32_t angle) +{ + for(uint32_t i = 0; i < pc.vAnglesCount; ++i) + if(pc.getVerticalAngle(i) > angle) + return i; + + return pc.vAnglesCount; +} + +uint32_t implGetHUB(const float32_t angle) +{ + for (uint32_t i = 0; i < pc.hAnglesCount; ++i) + if (pc.getHorizontalAngle(i) > angle) + return i; + + return pc.hAnglesCount; +} + +uint32_t getVLB(const float32_t angle) +{ + return uint32_t(max(int(implGetVUB(angle)) - 1, 0)); +} + +uint32_t getHLB(const float32_t angle) +{ + return uint32_t(max(int(implGetHUB(angle)) - 1, 0)); +} + +uint32_t getVUB(const float32_t angle) +{ + return uint32_t(min(int(implGetVUB(angle)), int(pc.vAnglesCount) - 1)); +} + +uint32_t getHUB(const float32_t angle) +{ + return uint32_t(min(int(implGetHUB(angle)), int(pc.hAnglesCount) - 1)); +} + +float32_t getValue(uint32_t i, uint32_t j) +{ + return pc.getData(pc.vAnglesCount * i + j); +} + +// symmetry +#define ISOTROPIC 0u +#define QUAD_SYMETRIC 1u +#define HALF_SYMETRIC 2u +#define NO_LATERAL_SYMMET 3u + +uint32_t getSymmetry() // TODO: to reduce check time we could pass it with PCs +{ + if(pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake + return ISOTROPIC; + + const float32_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + + if(hABack == 90) + return QUAD_SYMETRIC; + else if(hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here + return HALF_SYMETRIC; + else + return NO_LATERAL_SYMMET; +} + +float32_t wrapPhi(const float32_t phi, const uint32_t symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry +{ + switch (symmetry) + { + case ISOTROPIC: + return 0.0; + case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + { + float32_t wrapPhi = abs(phi); //! first MIRROR + + if(wrapPhi > M_HALF_PI) //! then REPEAT + wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); + + return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 + } + case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + case NO_LATERAL_SYMMET: + { + if(phi < 0) + return phi + 2.0 * M_PI; + else + return phi; + } + } + + return 69; +} + +float32_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) +{ + const float32_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); + + float32_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); + float32_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + + if (vAngle > vABack) + return 0.0; + + // bilinear interpolation + uint32_t j0 = getVLB(vAngle); + uint32_t j1 = getVUB(vAngle); + uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); + uint32_t i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); + + float32_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); + float32_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); + + float32_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; + float32_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; + + float32_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); + float32_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); + + return s0 * (1.0 - u) + s1 * u; +} + +//! Checks if (x,y) /in [0,PI] x [-PI,PI] product +/* + IES vertical range is [0, 180] degrees + and horizontal range is [0, 360] degrees + but for easier computations (MIRROR & MIRROW_REPEAT operations) + we represent horizontal range as [-180, 180] given spherical coordinates +*/ + +bool isWithinSCDomain(const float32_t2 p) +{ + const float32_t2 lb = float32_t2(0, -M_PI); + const float32_t2 ub = float32_t2(M_PI, M_PI); + + return all(lb <= p) && all(p <= ub); +} + +[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + uint32_t2 destinationSize; + outIESCandelaImage.GetDimensions(destinationSize.x, destinationSize.y); + const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); + + const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); + const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); + + if (all(pixelCoordinates < destinationSize)) + { + const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); + const float32_t3 direction = octahedronUVToDir(uv); + const float32_t2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped + + const float32_t intensity = sampleI(sphericalCoordinates, getSymmetry()); + + const float32_t normD = length(direction); + float32_t2 mask; + + if(1.0 - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0 + QUANT_ERROR_ADMISSIBLE) + mask.x = 1.0; // pass + else + mask.x = 0; + + if(isWithinSCDomain(sphericalCoordinates)) + mask.y = 1.0; // pass + else + mask.y = 0; + + outIESCandelaImage[pixelCoordinates] = uint32_t(intensity / pc.maxIValue); + outSphericalCoordinatesImage[pixelCoordinates] = sphericalCoordinates; + outOUVProjectionDirectionImage[pixelCoordinates] = direction; + outPassTMask[pixelCoordinates] = mask; + } +} \ No newline at end of file diff --git a/50.IESProfileTest/shader.frag b/50.IESProfileTest/app_resources/pixel.hlsl similarity index 100% rename from 50.IESProfileTest/shader.frag rename to 50.IESProfileTest/app_resources/pixel.hlsl diff --git a/50.IESProfileTest/shader.vert b/50.IESProfileTest/app_resources/vertex.hlsl similarity index 100% rename from 50.IESProfileTest/shader.vert rename to 50.IESProfileTest/app_resources/vertex.hlsl diff --git a/50.IESProfileTest/compute/cdc.comp b/50.IESProfileTest/compute/cdc.comp deleted file mode 100644 index 390d63acb..000000000 --- a/50.IESProfileTest/compute/cdc.comp +++ /dev/null @@ -1,260 +0,0 @@ -#version 430 core - -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.h" - -layout(local_size_x = WORKGROUP_DIMENSION, local_size_y = WORKGROUP_DIMENSION) in; - -layout(set = 0, binding = 0, r16) restrict uniform image2D outIESCandelaImage; -layout(set = 0, binding = 1, rg32f) restrict uniform image2D outSphericalCoordinatesImage; -layout(set = 0, binding = 2, rgba32f) restrict uniform image2D outOUVProjectionDirectionImage; -layout(set = 0, binding = 3, rg8) restrict uniform image2D outPassTMask; - -layout(std430, set = 0, binding = 4) readonly buffer HorizontalAngles -{ - double hAngles[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer VerticalAngles -{ - double vAngles[]; -}; - -layout(std430, set = 0, binding = 6) readonly buffer Data -{ - double data[]; -}; - -layout(push_constant) uniform PushConstants -{ - float maxIValue; - float zAngleDegreeRotation; - uint mode; - uint dummy; -} pc; - -vec3 octahedronUVToDir(vec2 uv) -{ - vec3 position = vec3((uv * 2.0 - 1.0).xy, 0.0); - vec2 absP = vec2(abs(position.x), abs(position.y)); - - position.z = 1.0 - absP.x - absP.y; - - if (position.z < 0.0) - { - position.x = sign(position.x) * (1.0 - absP.y); - position.y = sign(position.y) * (1.0 - absP.x); - } - - // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" - if(pc.zAngleDegreeRotation != 0.0) - { - float rDegree = pc.zAngleDegreeRotation; - - const float zAngleRadians = float(rDegree * M_PI / 180.0); - const float cosineV = cos(zAngleRadians); - const float sineV = sin(zAngleRadians); - - position = vec3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); - //position = vec3((cosineV * position.x) - (sineV * position.y), (cosineV * position.x) + (sineV * position.y), position.z); - } - - return normalize(position); -} - -//! Returns spherical coordinates with physics convention in radians -/* - https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg - Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle - in range [-PI, PI] range -*/ - -vec2 sphericalDirToRadians(vec3 direction) -{ - double theta = acos(clamp(direction.z/length(direction), -1.0, 1.0)); - double phi = atan(direction.y, direction.x); - - return vec2(theta, phi); -} - -uint implGetVUB(const float angle) -{ - const uint len = vAngles.length(); - - for(uint i = 0; i < len; ++i) - if(vAngles[i] > angle) - return i; - - return len; -} - -uint implGetHUB(const float angle) -{ - const uint len = hAngles.length(); - - for(uint i = 0; i < len; ++i) - if(hAngles[i] > angle) - return i; - - return len; -} - -uint getVLB(const float angle) -{ - return uint(max(int(implGetVUB(angle)) - 1, 0)); -} - -uint getHLB(const float angle) -{ - return uint(max(int(implGetHUB(angle)) - 1, 0)); -} - -uint getVUB(const float angle) -{ - return uint(min(int(implGetVUB(angle)), int(vAngles.length()) - 1)); -} - -uint getHUB(const float angle) -{ - return uint(min(int(implGetHUB(angle)), int(hAngles.length()) - 1)); -} - -double getValue(uint i, uint j) -{ - return data[vAngles.length() * i + j]; -} - -// symmetry -#define ISOTROPIC 0u -#define QUAD_SYMETRIC 1u -#define HALF_SYMETRIC 2u -#define NO_LATERAL_SYMMET 3u - -uint getSymmetry() // TODO: to reduce check time we could pass it with PCs -{ - const uint hALength = hAngles.length(); - if(hALength < 2) // careful here, somebody can break it by feeding us with too much data by mistake - return ISOTROPIC; - - const double hABack = hAngles[hALength - 1]; - - if(hABack == 90) - return QUAD_SYMETRIC; - else if(hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here - return HALF_SYMETRIC; - else - return NO_LATERAL_SYMMET; -} - -float wrapPhi(const float phi, const uint symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry -{ - switch (symmetry) - { - case ISOTROPIC: - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float wrapPhi = abs(phi); //! first MIRROR - - if(wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: - { - if(phi < 0) - return phi + 2.0 * M_PI; - else - return phi; - } - } - - return 69; -} - -double sampleI(const vec2 sphericalCoordinates, const uint symmetry) -{ - const float vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); - - double vABack = vAngles[vAngles.length() - 1]; - double hABack = hAngles[hAngles.length() - 1]; - - if (vAngle > vABack) - return 0.0; - - // bilinear interpolation - uint j0 = getVLB(vAngle); - uint j1 = getVUB(vAngle); - uint i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); - uint i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); - - double uReciprocal = i1 == i0 ? 1.0 : 1.0 / (hAngles[i1] - hAngles[i0]); - double vReciprocal = j1 == j0 ? 1.0 : 1.0 / (vAngles[j1] - vAngles[j0]); - - double u = (hAngle - hAngles[i0]) * uReciprocal; - double v = (vAngle - vAngles[j0]) * vReciprocal; - - double s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); - double s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); - - return s0 * (1.0 - u) + s1 * u; -} - -//! Checks if (x,y) /in [0,PI] x [-PI,PI] product -/* - IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) - we represent horizontal range as [-180, 180] given spherical coordinates -*/ - -bool isWithinSCDomain(vec2 point) -{ - const vec2 lb = vec2(0, -M_PI); - const vec2 ub = vec2(M_PI, M_PI); - - return all(lessThanEqual(lb, point)) && all(lessThanEqual(point, ub)); -} - -void main() -{ - const ivec2 destinationSize = imageSize(outIESCandelaImage); - const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); - - const float VERTICAL_INVERSE = 1.0f / float(destinationSize.x); - const float HORIZONTAL_INVERSE = 1.0f / float(destinationSize.y); - - if (all(lessThan(pixelCoordinates, destinationSize))) - { - const vec2 uv = vec2((float(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); - const vec3 direction = octahedronUVToDir(uv); - const vec2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped - - const double intensity = sampleI(sphericalCoordinates, getSymmetry()); - const vec4 value = vec4(intensity / pc.maxIValue, 0, 0, 0); - - const double normD = length(direction); - vec2 mask; - - if(1.0 - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0 + QUANT_ERROR_ADMISSIBLE) - mask.x = 1.0; // pass - else - mask.x = 0; - - if(isWithinSCDomain(sphericalCoordinates)) - mask.y = 1.0; // pass - else - mask.y = 0; - - imageStore(outIESCandelaImage, pixelCoordinates, value); - imageStore(outSphericalCoordinatesImage, pixelCoordinates, vec4(sphericalCoordinates, 0, 1)); - imageStore(outOUVProjectionDirectionImage, pixelCoordinates, vec4(direction.xyz, 1)); - imageStore(outPassTMask, pixelCoordinates, vec4(mask.xy, 1, 1)); - } -} \ No newline at end of file diff --git a/50.IESProfileTest/compute/common.h b/50.IESProfileTest/compute/common.h deleted file mode 100644 index edbc94104..000000000 --- a/50.IESProfileTest/compute/common.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _COMMON_INCLUDED_ -#define _COMMON_INCLUDED_ - -#ifndef UINT16_MAX -#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do -#endif -#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do -#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do -#define QUANT_ERROR_ADMISSIBLE 1/1024 - -#define WORKGROUP_SIZE 256u -#define WORKGROUP_DIMENSION 16u - -#endif // _COMMON_INCLUDED_ diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json index d6b4ce528..0c3fe42ea 100644 --- a/50.IESProfileTest/inputs.json +++ b/50.IESProfileTest/inputs.json @@ -4,10 +4,10 @@ ], "files": [ "../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", - "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", - "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", - "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", - "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" ], "gui": true, "writeAssets": false diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 7aa640f67..4785a8fd9 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -1,39 +1,343 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#define BENCHMARK_TILL_FIRST_FRAME - -#include -#include -#include -#include "nbl/ext/ScreenShot/ScreenShot.h" -#include "compute/common.h" -#include - -// small hack to compile with the json library -namespace std -{ - int sprintf_s(char* buffer, size_t size, const char* format, ...) { - va_list args; - va_start(args, format); - int result = ::sprintf_s(buffer, size, format, args); - va_end(args); - return result; - } -} +int main() {} +#if 0 // TODO, first shaders -#include "nlohmann/json.hpp" +#include "nbl/examples/examples.hpp" using namespace nbl; using namespace core; -using json = nlohmann::json; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; + +#define BENCHMARK_TILL_FIRST_FRAME #ifdef BENCHMARK_TILL_FIRST_FRAME const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); bool stopBenchamrkFlag = false; #endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ 1280,720 }, EF_D16_UNORM, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + + /* + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies + }, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch // we want to use the vertex data through UTBs + ); + */ + + auto scRes = static_cast(m_surface->getSwapchainResources()); + //const auto& geometries = m_scene->getInitParams().geometries; + + /* + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); + if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) + return logFail("Could not create Renderer!"); + // special case + { + const auto& pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto& name : m_scene->getInitParams().geometryNames) + { + if (name == "Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + m_renderer->m_instances.resize(1); + m_renderer->m_instances[0].world = float32_t3x4( + float32_t4(1, 0, 0, 0), + float32_t4(0, 1, 0, 0), + float32_t4(0, 0, 1, 0) + ); + */ + + // camera + { + core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); + core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x) / float(m_initialResolution.y), 0.1, 10000); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + } + + onAppInitializedFinish(); + return true; + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cb->beginDebugMarker("IESViewer Frame"); + { + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + camera.endInputProcessing(nextPresentationTimestamp); + } + + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = m_window->getWidth(); + viewport.height = m_window->getHeight(); + } + cb->setViewport(0u, 1u, &viewport); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + { + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + //m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + gcIndex; + //m_renderer->render(cb, viewParams); + + cb->endRenderPass(); + cb->endDebugMarker(); + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] IES Viewer"; + { + caption += ", displaying ["; + //caption += m_scene->getInitParams().geometryNames[gcIndex]; + caption += "]"; + m_window->setCaption(caption); + } + return retval; + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + +private: + // + //smart_refctd_ptr m_scene; + //smart_refctd_ptr m_renderer; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + + uint16_t gcIndex = {}; + + void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) + { + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) + { + auto ev = *eventIt; + + /* + if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + { + gcIndex += int16_t(core::sign(ev.scrollEvent.verticalScroll)); + gcIndex = core::clamp(gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + } + */ + } + } +}; + +NBL_MAIN_FUNC(IESViewer) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* class IESCompute { public: @@ -820,4 +1124,7 @@ int main() } return 0; -} \ No newline at end of file +} +*/ + +#endif \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ce94f93..4ad88dfa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ if(NBL_BUILD_EXAMPLES) #add_subdirectory(43_SumAndCDFFilters) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) + add_subdirectory(50.IESProfileTest) add_subdirectory(54_Transformations EXCLUDE_FROM_ALL) add_subdirectory(55_RGB18E7S3 EXCLUDE_FROM_ALL) add_subdirectory(61_UI) From 29f64a283b2ccaef76f169b997a47647631d84c9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 15:31:16 +0200 Subject: [PATCH 033/219] port IES vertex & pixel shaders --- 50.IESProfileTest/app_resources/PSInput.hlsl | 10 ++ 50.IESProfileTest/app_resources/VSInput.hlsl | 11 ++ 50.IESProfileTest/app_resources/pixel.hlsl | 102 +++++++++---------- 50.IESProfileTest/app_resources/vertex.hlsl | 19 ++-- 4 files changed, 81 insertions(+), 61 deletions(-) create mode 100644 50.IESProfileTest/app_resources/PSInput.hlsl create mode 100644 50.IESProfileTest/app_resources/VSInput.hlsl diff --git a/50.IESProfileTest/app_resources/PSInput.hlsl b/50.IESProfileTest/app_resources/PSInput.hlsl new file mode 100644 index 000000000..a4ed7b727 --- /dev/null +++ b/50.IESProfileTest/app_resources/PSInput.hlsl @@ -0,0 +1,10 @@ +#ifndef _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ +#define _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ + +#ifdef __HLSL_VERSION +struct PSInput +{ + float32_t4 position : SV_Position; +}; +#endif // __HLSL_VERSION +#endif // _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ diff --git a/50.IESProfileTest/app_resources/VSInput.hlsl b/50.IESProfileTest/app_resources/VSInput.hlsl new file mode 100644 index 000000000..56dcae831 --- /dev/null +++ b/50.IESProfileTest/app_resources/VSInput.hlsl @@ -0,0 +1,11 @@ +#ifndef _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ +#define _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ + +#ifdef __HLSL_VERSION +struct VSInput +{ + [[vk::location(0)]] float3 position : POSITION; + [[vk::location(3)]] float3 normal : NORMAL; +}; +#endif // __HLSL_VERSION +#endif // _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ diff --git a/50.IESProfileTest/app_resources/pixel.hlsl b/50.IESProfileTest/app_resources/pixel.hlsl index ec7a00b8f..be19f0309 100644 --- a/50.IESProfileTest/app_resources/pixel.hlsl +++ b/50.IESProfileTest/app_resources/pixel.hlsl @@ -1,71 +1,71 @@ -#version 430 core -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include +#include "common.hlsl" +#include "PSInput.hlsl" -layout (location = 0) in vec3 Pos; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D inIESCandelaImage; +[[vk::combinedImageSampler]] [[vk::binding(1, 3)]] Texture2D inSphericalCoordinatesImage; +[[vk::combinedImageSampler]] [[vk::binding(2, 3)]] Texture2D inOUVProjectionDirectionImage; +[[vk::combinedImageSampler]] [[vk::binding(3, 3)]] Texture2D inPassTMaskImage; -layout (location = 0) out vec4 outColor; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState inIESCandelaSampler; +[[vk::combinedImageSampler]] [[vk::binding(1, 3)]] SamplerState inSphericalCoordinatesSampler; +[[vk::combinedImageSampler]] [[vk::binding(2, 3)]] SamplerState inOUVProjectionDirectionSampler; +[[vk::combinedImageSampler]] [[vk::binding(3, 3)]] SamplerState inPassTMaskSampler; -layout(set = 3, binding = 0) uniform sampler2D inIESCandelaImage; -layout(set = 3, binding = 1) uniform sampler2D inSphericalCoordinatesImage; -layout(set = 3, binding = 2) uniform sampler2D inOUVProjectionDirectionImage; -layout(set = 3, binding = 3) uniform sampler2D inPassTMask; +[[vk::push_constant]] struct PushConstants pc; -layout(push_constant) uniform PushConstants +float32_t2 iesDirToUv(float32_t3 dir) { - float maxIValue; - float zAngleDegreeRotation; - uint mode; - uint dummy; -} pc; + float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); + float32_t3 s = dir / sum; -#define M_PI 3.1415926536 + if (s.z < 0.0f) + s.xy = sign(s.xy) * (1.0f - abs(s.yx)); -float plot(float cand, float pct, float bold){ - return smoothstep( pct-0.005*bold, pct, cand) - - smoothstep( pct, pct+0.005*bold, cand); + return s.xy * 0.5f + 0.5f; +} + +float32_t plot(float32_t cand, float32_t pct, float32_t bold) +{ + return smoothstep(pct-0.005*bold, pct, cand) - smoothstep( pct, pct+0.005*bold, cand); } // vertical cut of IES (i.e. cut by plane x = 0) -float f(vec2 uv) { - return texture(inIESCandelaImage,nbl_glsl_IES_convert_dir_to_uv(normalize(vec3(uv.x, 0.001, uv.y)))).x; - // float vangle = (abs(atan(uv.x,uv.y)))/(M_PI); - // float hangle = uv.x <= 0.0 ? 0.0 : 1.0; - // return texture(inIESCandelaImage,vec2(hangle,vangle)).x; +float32_t f(float32_t2 uv) +{ + return inIESCandelaImage.Sample(inIESCandelaSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; } -void main() +[shader("pixel")] +float32_t4 main(PSInput input) : SV_Target0 { - vec2 ndc = Pos.xy; - vec2 uv = (ndc + 1) / 2; + float32_t2 ndc = input.position.xy; + float32_t2 uv = (ndc + 1) / 2; - if(pc.mode == 0) + switch (pc.mode) { - float dist = length(ndc)*1.015625; - vec3 col = vec3(plot(dist,1.0,0.75)); + case 0: + { + float32_t dist = length(ndc) * 1.015625f; + float32_t p = plot(dist, 1.0f, 0.75f); + float32_t3 col = float32_t3(p, p, p); - float normalizedStrength = f(ndc); - if (dist Date: Tue, 14 Oct 2025 16:16:26 +0200 Subject: [PATCH 034/219] save 50. example refactor work, add AppInputParser.cpp/.hpp, update inputs, load prebuilt shaders and parse args --- 50.IESProfileTest/AppInputParser.cpp | 109 ++++ 50.IESProfileTest/AppInputParser.hpp | 21 + 50.IESProfileTest/CMakeLists.txt | 43 +- 50.IESProfileTest/inputs.json | 12 +- 50.IESProfileTest/main.cpp | 922 ++------------------------- 5 files changed, 198 insertions(+), 909 deletions(-) create mode 100644 50.IESProfileTest/AppInputParser.cpp create mode 100644 50.IESProfileTest/AppInputParser.hpp diff --git a/50.IESProfileTest/AppInputParser.cpp b/50.IESProfileTest/AppInputParser.cpp new file mode 100644 index 000000000..6719bdf74 --- /dev/null +++ b/50.IESProfileTest/AppInputParser.cpp @@ -0,0 +1,109 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "AppInputParser.hpp" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; +using namespace nlohmann; + +bool AppInputParser::parse(std::vector& out, const std::string input, const std::string cwd) +{ + const auto jInputFile = std::filesystem::absolute(input); + const auto sjInputFile = jInputFile.string(); + + std::ifstream file(sjInputFile.c_str()); + if (!file.is_open()) { + + logger.log("Could not open \"%s\" file.", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + std::stringstream buffer; + buffer << file.rdbuf(); + const auto jsonBuffer = buffer.str(); + + if (jsonBuffer.empty()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + const auto jsonMap = json::parse(jsonBuffer.c_str()); + + if (!jsonMap["directories"].is_array()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["files"].is_array()) + { + logger.log("\"%s\" file's field \"files\" is not an array!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["writeAssets"].is_boolean()) + { + logger.log("\"%s\" file's field \"writeAssets\" is not a boolean!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + auto addFile = [&](const std::string_view in) -> bool + { + auto path = std::filesystem::absolute(cwd / std::filesystem::path(in)); + + if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") + out.push_back(path.string()); + else + { + logger.log("Invalid \"%s\" input!", system::ILogger::ELL_ERROR, path.string().c_str()); + return false; + } + + return true; + }; + + auto addFiles = [&](const std::string_view directoryPath) -> bool + { + auto directory(std::filesystem::absolute(cwd / std::filesystem::path(directoryPath))); + if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) + { + logger.log("Invalid \"%s\" directory!", system::ILogger::ELL_ERROR, directory.string().c_str()); + return false; + } + + for (const auto& entry : std::filesystem::directory_iterator(directory)) + if (!addFile(entry.path().string().c_str())) + return false; + + return true; + }; + + // parse json + { + std::vector jDirectories; + jsonMap["directories"].get_to(jDirectories); + + for (const auto& it : jDirectories) + if (!addFiles(it)) + return false; + + std::vector jFiles; + jsonMap["files"].get_to(jFiles); + + for (const auto& it : jFiles) + if (!addFile(it)) + return false; + } + + return true; +} \ No newline at end of file diff --git a/50.IESProfileTest/AppInputParser.hpp b/50.IESProfileTest/AppInputParser.hpp new file mode 100644 index 000000000..dc8f67128 --- /dev/null +++ b/50.IESProfileTest/AppInputParser.hpp @@ -0,0 +1,21 @@ +#ifndef _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ +#define _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nlohmann/json.hpp" + +struct AppInputParser +{ +public: + AppInputParser(nbl::system::logger_opt_ptr _logger = nullptr) : logger(_logger) {} + bool parse(std::vector& out, const std::string input, const std::string cwd = "."); + +private: + nbl::system::logger_opt_ptr logger; +}; + +#endif // _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index bbab38f1c..311b981b5 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -1,4 +1,10 @@ -nbl_create_executable_project("" "" "" "" "") +set(SRCs + AppInputParser.cpp + AppInputParser.hpp + inputs.json +) + +nbl_create_executable_project("${SRCs}" "" "" "" "") target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") @@ -8,23 +14,8 @@ set(DEPENDS app_resources/pixel.hlsl app_resources/vertex.hlsl ) - -set(TODO [=[ -{ - "INPUT": "app_resources/pixel.hlsl", - "KEY": "pixel", - "COMPILE_OPTIONS": ["-T", "lib_${V}"], - "DEPENDS": [], - "CAPS": [] -}, -{ - "INPUT": "app_resources/vertex.hlsl", - "KEY": "vertex", - "COMPILE_OPTIONS": ["-T", "lib_${V}"], - "DEPENDS": [], - "CAPS": [] -} -]=]) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(V 6_8) set(JSON [=[ @@ -35,7 +26,21 @@ set(JSON [=[ "COMPILE_OPTIONS": ["-T", "lib_${V}"], "DEPENDS": [], "CAPS": [] - } + }, + { + "INPUT": "app_resources/pixel.hlsl", + "KEY": "pixel", + "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "DEPENDS": [], + "CAPS": [] + }, + { + "INPUT": "app_resources/vertex.hlsl", + "KEY": "vertex", + "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "DEPENDS": [], + "CAPS": [] + } ] ]=]) string(CONFIGURE "${JSON}" JSON) diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json index 0c3fe42ea..fbb833112 100644 --- a/50.IESProfileTest/inputs.json +++ b/50.IESProfileTest/inputs.json @@ -1,13 +1,13 @@ { "directories": [ - "../media/mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" + "mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" ], "files": [ - "../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", - "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", - "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", - "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", - "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + "mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", + "mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" ], "gui": true, "writeAssets": false diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 4785a8fd9..e4d20830b 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -2,10 +2,9 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -int main() {} -#if 0 // TODO, first shaders - #include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include "AppInputParser.hpp" using namespace nbl; using namespace core; @@ -24,11 +23,6 @@ const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_r bool stopBenchamrkFlag = false; #endif - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication { using device_base_t = MonoWindowApplication; @@ -47,68 +41,52 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); + std::vector inputList; + AppInputParser parser(system::logger_opt_ptr(m_logger.get())); + if (!parser.parse(inputList, "../inputs.json", "../../media")) + return false; - auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - for (auto i = 0u; i < MaxFramesInFlight; i++) + auto createShader = [&]() -> smart_refctd_ptr { - if (!pool) - return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) - return logFail("Couldn't create Command Buffer!"); - } + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + lp.workingDirectory = "app_resources"; - const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto assets = assetBundle.getContents(); - /* - m_scene = CGeometryCreatorScene::create( + if (assets.empty()) { - .transferQueue = getTransferUpQueue(), - .utilities = m_utils.get(), - .logger = m_logger.get(), - .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies - }, - CSimpleDebugRenderer::DefaultPolygonGeometryPatch // we want to use the vertex data through UTBs - ); - */ + m_logger->log("Could not load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); + return nullptr; + } - auto scRes = static_cast(m_surface->getSwapchainResources()); - //const auto& geometries = m_scene->getInitParams().geometries; + auto spirvShader = IAsset::castDown(assets[0]); - /* - m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); - if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) - return logFail("Could not create Renderer!"); - // special case - { - const auto& pipelines = m_renderer->getInitParams().pipelines; - auto ix = 0u; - for (const auto& name : m_scene->getInitParams().geometryNames) - { - if (name == "Cone") - m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; - ix++; - } - } - m_renderer->m_instances.resize(1); - m_renderer->m_instances[0].world = float32_t3x4( - float32_t4(1, 0, 0, 0), - float32_t4(0, 1, 0, 0), - float32_t4(0, 0, 1, 0) - ); - */ + if (spirvShader) + m_logger->log("Loaded \"%s\" shader!", system::ILogger::ELL_INFO, key.data()); + else + m_logger->log("Could not cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); + + return spirvShader; + }; - // camera + #define CREATE_SHADER(SHADER, PATH) \ + if (!(SHADER = createShader.template operator()() )) return false; + + smart_refctd_ptr compute, pixel, vertex; { - core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); - core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x) / float(m_initialResolution.y), 0.1, 10000); - camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); + auto start = std::chrono::high_resolution_clock::now(); + CREATE_SHADER(compute, "compute") + CREATE_SHADER(pixel, "pixel") + CREATE_SHADER(vertex, "vertex") + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); } - onAppInitializedFinish(); return true; } @@ -303,828 +281,4 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } }; -NBL_MAIN_FUNC(IESViewer) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/* -class IESCompute -{ - public: - IESCompute(video::IVideoDriver* _driver, asset::IAssetManager* _assetManager, const std::vector& _assets) - : assets(_assets), driver(_driver), generalPurposeOffset(0), pushConstant({(float)getProfile(0).getMaxCandelaValue(), 0.f}) - { - createGPUEnvironment(_assetManager); - - fbo = createFBO(driver->getScreenSize().Width, driver->getScreenSize().Height); - } - ~IESCompute() {} - - enum E_MODE : uint32_t - { - EM_CDC, //! Candlepower Distribution Curve - EM_IES_C, //! IES Candela - EM_SPERICAL_C, //! Sperical coordinates - EM_DIRECTION, //! Sample direction - EM_PASS_T_MASK, //! Test mask - EM_SIZE - }; - - enum E_BINDINGS - { - EB_IMAGE_IES_C, //! Image with IES Candela data - EB_IMAGE_S, //! Image with spehircal coordinates data - EB_IMAGE_D, //! Image with direction data - EB_IMAGE_T_MASK,//! Image with test mask data - EB_SSBO_HA, //! IES Profile SSBO Horizontal Angles - EB_SSBO_VA, //! IES Profile SSBO Vertical Angles - EB_SSBO_D, //! IES Profile SSBO Data - EB_SIZE - }; - - const asset::CIESProfile& getProfile(const size_t& assetIndex) - { - return assets[assetIndex].getMetadata()->selfCast()->profile; - } - - const asset::CIESProfile& getActiveProfile() - { - return getProfile(generalPurposeOffset); - } - - void begin() - { - driver->setRenderTarget(fbo); - const float clear[4]{ 0.f,0.f,0.f,1.f }; - driver->clearColorBuffer(video::EFAP_COLOR_ATTACHMENT0, clear); - driver->beginScene(true, false, video::SColor(255, 0, 0, 0)); - } - - void dispatch() - { - auto& gpue = m_gpue; - - driver->bindComputePipeline(gpue.cPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, gpue.cPipeline->getLayout(), 0u, 1u, &gpue.cDescriptorSet.get(), nullptr); - driver->pushConstants(gpue.cPipeline->getLayout(), asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant), &pushConstant); - - const auto xGroups = (getActiveProfile().getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; - driver->dispatch(xGroups, xGroups, 1u); - - COpenGLExtensionHandler::extGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); - } - - void renderpass() - { - auto& gpue = m_gpue; - - driver->bindGraphicsPipeline(gpue.gPipeline.get()); - driver->bindDescriptorSets(video::EPBP_GRAPHICS, gpue.gPipeline->getLayout(), 3u, 1u, &gpue.gDescriptorSet.get(), nullptr); - driver->pushConstants(gpue.gPipeline->getLayout(), asset::ISpecializedShader::ESS_FRAGMENT, 0u, sizeof(PushConstant), &pushConstant); - driver->drawMeshBuffer(gpue.mBuffer.get()); - } - - void end() - { - driver->blitRenderTargets(fbo, nullptr, false, false); - driver->endScene(); - - #ifdef BENCHMARK_TILL_FIRST_FRAME - if (!stopBenchamrkFlag) - { - const std::chrono::steady_clock::time_point stopBenchmark = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(stopBenchmark - startBenchmark); - std::cout << "Time taken till first render pass: " << duration.count() << " milliseconds" << std::endl; - stopBenchamrkFlag = true; - } - #endif - } - - void updateZDegree(const asset::CIESProfile::IES_STORAGE_FORMAT& degreeOffset) - { - const auto& profile = getProfile(generalPurposeOffset); - const auto newDegreeRotation = std::clamp(pushConstant.zAngleDegreeRotation + degreeOffset, profile.getHoriAngles().front(), profile.getHoriAngles().back()); - pushConstant.zAngleDegreeRotation = newDegreeRotation; - } - - void updateGeneralPurposeOffset(const int8_t& offset) - { - const auto newOffset = std::clamp(int64_t(generalPurposeOffset) + int64_t(core::sign(offset)), int64_t(0), int64_t(assets.size() - 1)); - - if (newOffset != generalPurposeOffset) - { - generalPurposeOffset = newOffset; - - // not elegant way to do it here but lets leave it as it is - updateCDescriptorSets(); // flush descriptor set - updateGDescriptorSets(); // flush descriptor set - - const auto& profile = getActiveProfile(); - pushConstant.maxIValue = (float)profile.getMaxCandelaValue(); - } - } - - const asset::CIESProfile::IES_STORAGE_FORMAT getZDegree() - { - const auto& profile = getProfile(generalPurposeOffset); - return pushConstant.zAngleDegreeRotation + (profile.getSymmetry() == asset::CIESProfile::OTHER_HALF_SYMMETRIC ? 90.0 : 0.0); // real IES horizontal angle has 90.0 degress offset if OTHER_HALF_SYMMETRY, we handle it because of legacy IES 1995 specification case - } - - void updateMode(const E_MODE& mode) - { - pushConstant.mode = static_cast(mode); - } - - const auto& getMode() - { - return pushConstant.mode; - } - - private: - - void createGPUEnvironment(asset::IAssetManager* _assetManager) - { - auto gpuSpecializedShaderFromFile = [&](const char* path) - { - auto bundle = _assetManager->getAsset(path, {}); - auto shader = core::smart_refctd_ptr_static_cast(*bundle.getContents().begin()); - - return driver->getGPUObjectsFromAssets(&shader, &shader + 1u)->operator[](0); // omg - }; - - auto& gpue = m_gpue; - createGPUDescriptors(); - const auto initIdx = generalPurposeOffset; - - // Compute - { - const std::vector bindings = getCBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; - - gpue.cPipeline = driver->createGPUComputePipeline(nullptr, driver->createGPUPipelineLayout(&range, &range + 1u, core::smart_refctd_ptr(descriptorSetLayout)), gpuSpecializedShaderFromFile("../compute/cdc.comp")); - gpue.cDescriptorSet = driver->createGPUDescriptorSet(std::move(descriptorSetLayout)); - } - - { - for (auto i = 0; i < EB_SIZE; i++) - { - gpue.cwrites[i].dstSet = gpue.cDescriptorSet.get(); - gpue.cwrites[i].binding = i; - gpue.cwrites[i].arrayElement = 0u; - gpue.cwrites[i].count = 1u; - gpue.cwrites[i].info = &gpue.cinfos[i]; - } - - gpue.cwrites[EB_IMAGE_IES_C].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_S].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_D].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_T_MASK].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_SSBO_HA].descriptorType = asset::EDT_STORAGE_BUFFER; - gpue.cwrites[EB_SSBO_VA].descriptorType = asset::EDT_STORAGE_BUFFER; - gpue.cwrites[EB_SSBO_D].descriptorType = asset::EDT_STORAGE_BUFFER; - - updateCDescriptorSets(); - } - } - - // Graphics - { - const std::vector bindings = getGBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - - auto mesh = _assetManager->getGeometryCreator()->createRectangleMesh(vector2df_SIMD(1.0, 1.0)); - auto cpusphere = core::make_smart_refctd_ptr(nullptr, nullptr, mesh.bindings, std::move(mesh.indexBuffer)); - - cpusphere->setBoundingBox(mesh.bbox); - cpusphere->setIndexType(mesh.indexType); - cpusphere->setIndexCount(mesh.indexCount); - - auto vShader = gpuSpecializedShaderFromFile("../shader.vert"); - auto fShader = gpuSpecializedShaderFromFile("../shader.frag"); - - video::IGPUSpecializedShader* shaders[] = { vShader.get(), fShader.get() }; - asset::SRasterizationParams raster; - - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_FRAGMENT, 0u, sizeof(PushConstant) }; - gpue.gPipeline = driver->createGPURenderpassIndependentPipeline(nullptr, driver->createGPUPipelineLayout(&range, &range + 1u, nullptr, nullptr, nullptr, core::smart_refctd_ptr(descriptorSetLayout)), shaders, shaders + 2, mesh.inputParams, asset::SBlendParams{}, mesh.assemblyParams, raster); - gpue.gDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(descriptorSetLayout)); - gpue.mBuffer = driver->getGPUObjectsFromAssets(&cpusphere.get(), &cpusphere.get() + 1)->front(); - } - - auto createSampler = [&]() - { - return driver->createGPUSampler({ asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK,asset::ISampler::ETF_LINEAR,asset::ISampler::ETF_LINEAR,asset::ISampler::ESMM_LINEAR,0u,false,asset::ECO_ALWAYS }); - }; - - gpue.sampler = createSampler(); - - for (auto i = 0; i < gpue.NBL_D_IMAGES_AMOUNT; i++) - { - gpue.gwrites[i].dstSet = gpue.gDescriptorSet.get(); - gpue.gwrites[i].binding = i; - gpue.gwrites[i].count = 1u; - gpue.gwrites[i].arrayElement = 0u; - gpue.gwrites[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; - gpue.gwrites[i].info = gpue.ginfos + i; - } - - updateGDescriptorSets(); - } - } - - void createGPUDescriptors() - { - auto createCPUBuffer = [&](const auto& pInput) - { - core::smart_refctd_ptr buffer = core::make_smart_refctd_ptr(sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * pInput.size()); - memcpy(buffer->getPointer(), pInput.data(), buffer->getSize()); - - return buffer; - }; - - for(size_t i = 0; i < assets.size(); ++i) - { - const auto& profile = getProfile(i); - auto& cssbod = m_gpue.CSSBOD.emplace_back(); - - auto createGPUBuffer = [&](const auto& cpuBuffer) - { - return driver->createFilledDeviceLocalGPUBufferOnDedMem(cpuBuffer->getSize(), cpuBuffer->getPointer()); - }; - - cssbod.hAngles = createGPUBuffer(createCPUBuffer(profile.getHoriAngles())); - cssbod.vAngles = createGPUBuffer(createCPUBuffer(profile.getVertAngles())); - cssbod.data = createGPUBuffer(createCPUBuffer(profile.getData())); - - const auto optimalResolution = profile.getOptimalIESResolution(); - - cssbod.dImageIESC = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageS = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageD = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageTMask = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - } - } - - void updateCDescriptorSets() - { - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_IES_C]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_S]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_D]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_T_MASK]); - - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_HA]); - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_VA]); - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_D]); - - const core::smart_refctd_ptr proxy(m_gpue.cPipeline->getLayout()->getDescriptorSetLayout(0)); - m_gpue.cDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); - - for (auto i = 0; i < EB_SIZE; i++) - m_gpue.cwrites[i].dstSet = m_gpue.cDescriptorSet.get(); - - driver->updateDescriptorSets(EB_SIZE, m_gpue.cwrites, 0u, nullptr); - } - - void updateGDescriptorSets() - { - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_IES_C]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_S]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_D]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_T_MASK]); - - const core::smart_refctd_ptr proxy(m_gpue.gPipeline->getLayout()->getDescriptorSetLayout(3)); - m_gpue.gDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); - - for (auto i = 0; i < m_gpue.NBL_D_IMAGES_AMOUNT; i++) - m_gpue.gwrites[i].dstSet = m_gpue.gDescriptorSet.get(); - - driver->updateDescriptorSets(m_gpue.NBL_D_IMAGES_AMOUNT, m_gpue.gwrites, 0u, nullptr); - } - - template - void fillSSBODescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) - { - static_assert(binding == EB_SSBO_HA || binding == EB_SSBO_VA || binding == EB_SSBO_D); - - const auto& profile = getProfile(assetIndex); - auto& cssbod = m_gpue.CSSBOD[assetIndex]; - - core::smart_refctd_ptr proxy; - - if constexpr (binding == EB_SSBO_HA) - proxy = core::smart_refctd_ptr(cssbod.hAngles); - else if (binding == EB_SSBO_VA) - proxy = core::smart_refctd_ptr(cssbod.vAngles); - else - proxy = core::smart_refctd_ptr(cssbod.data); - - info.desc = core::smart_refctd_ptr(proxy); - info.buffer = { 0, proxy->getSize() }; - } - - template - void fillImageDescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) - { - static_assert(binding == EB_IMAGE_IES_C || binding == EB_IMAGE_S || binding == EB_IMAGE_D || binding == EB_IMAGE_T_MASK); - - const auto& profile = getProfile(assetIndex); - auto& cssbod = m_gpue.CSSBOD[assetIndex]; - - core::smart_refctd_ptr proxy; - - if constexpr (binding == EB_IMAGE_IES_C) - proxy = core::smart_refctd_ptr(cssbod.dImageIESC); - else if (binding == EB_IMAGE_S) - proxy = core::smart_refctd_ptr(cssbod.dImageS); - else if (binding == EB_IMAGE_D) - proxy = core::smart_refctd_ptr(cssbod.dImageD); - else - proxy = core::smart_refctd_ptr(cssbod.dImageTMask); - - info.desc = core::smart_refctd_ptr(proxy); - info.image = { core::smart_refctd_ptr(m_gpue.sampler), asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - } - - template - auto createGPUImageView(const size_t& width, const size_t& height) - { - IGPUImage::SCreationParams imageInfo; - imageInfo.format = format; - imageInfo.type = IGPUImage::ET_2D; - imageInfo.extent.width = width; - imageInfo.extent.height = height; - imageInfo.extent.depth = 1u; - - imageInfo.mipLevels = 1u; - imageInfo.arrayLayers = 1u; - imageInfo.samples = asset::ICPUImage::ESCF_1_BIT; - imageInfo.flags = static_cast(0u); - - auto image = driver->createGPUImageOnDedMem(std::move(imageInfo), driver->getDeviceLocalGPUMemoryReqs()); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(image); - imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - return driver->createGPUImageView(std::move(imgViewInfo)); - } - - std::vector getCBindings() - { - std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_S, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_D, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_HA, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_VA, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_D, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr } - }; - - return bindings; - } - - std::vector getGBindings() - { - const std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } - }; - - return bindings; - } - - template - video::IFrameBuffer* createFBO(const size_t& width, const size_t& height) - { - auto* fbo = driver->addFrameBuffer(); - - bBuffer = createGPUImageView(width, height); - fbo->attach(video::EFAP_COLOR_ATTACHMENT0, core::smart_refctd_ptr(bBuffer)); - - return fbo; - } - - const std::vector assets; - video::IVideoDriver* const driver; - - struct GPUE - { - _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; - - // Compute - core::smart_refctd_ptr cPipeline; - core::smart_refctd_ptr cDescriptorSet; - - IGPUDescriptorSet::SDescriptorInfo cinfos[EB_SIZE]; - IGPUDescriptorSet::SWriteDescriptorSet cwrites[EB_SIZE]; - - struct CSSBODescriptor - { - core::smart_refctd_ptr vAngles, hAngles, data; - core::smart_refctd_ptr dImageIESC, dImageS, dImageD, dImageTMask; - }; - - std::vector CSSBOD; - - // Graphics - core::smart_refctd_ptr gPipeline; - core::smart_refctd_ptr gDescriptorSet; - core::smart_refctd_ptr mBuffer; - - IGPUDescriptorSet::SDescriptorInfo ginfos[NBL_D_IMAGES_AMOUNT]; - IGPUDescriptorSet::SWriteDescriptorSet gwrites[NBL_D_IMAGES_AMOUNT]; - - // Shared data - core::smart_refctd_ptr sampler; - } m_gpue; - - #include "nbl/nblpack.h" - struct PushConstant - { - float maxIValue; - float zAngleDegreeRotation; - IESCompute::E_MODE mode = IESCompute::EM_CDC; - } PACK_STRUCT; - #include "nbl/nblunpack.h" - - PushConstant pushConstant; - - video::IFrameBuffer* fbo = nullptr; - core::smart_refctd_ptr bBuffer; - - size_t generalPurposeOffset = 0; -}; - -class IESExampleEventReceiver : public nbl::IEventReceiver -{ -public: - IESExampleEventReceiver() {} - - bool OnEvent(const nbl::SEvent& event) - { - if (event.EventType == nbl::EET_MOUSE_INPUT_EVENT) - { - zDegreeOffset = event.MouseInput.Wheel; - - return true; - } - - if (event.EventType == nbl::EET_KEY_INPUT_EVENT && !event.KeyInput.PressedDown) - { - switch (event.KeyInput.Key) - { - case nbl::KEY_UP: - { - generalPurposeOffset = 1; - return true; - } - case nbl::KEY_DOWN: - { - generalPurposeOffset = -1; - return true; - } - case nbl::KEY_KEY_C: - { - mode = IESCompute::EM_CDC; - return true; - } - case nbl::KEY_KEY_V: - { - mode = IESCompute::EM_IES_C; - return true; - } - case nbl::KEY_KEY_S: - { - mode = IESCompute::EM_SPERICAL_C; - return true; - } - case nbl::KEY_KEY_D: - { - mode = IESCompute::EM_DIRECTION; - return true; - } - case nbl::KEY_KEY_M: - { - mode = IESCompute::EM_PASS_T_MASK; - return true; - } - case nbl::KEY_KEY_Q: - { - running = false; - return true; - } - } - } - - return false; - } - - void reset() { zDegreeOffset = 0; generalPurposeOffset = 0; } - inline const auto& isRunning() const { return running; } - inline const auto& getMode() const { return mode; } - template - inline const auto& getZDegreeOffset() const { return static_cast(zDegreeOffset); } - inline const auto& getGeneralPurposeOffset() { return generalPurposeOffset; } -private: - double zDegreeOffset = 0.0; - int8_t generalPurposeOffset = 0; - IESCompute::E_MODE mode = IESCompute::EM_CDC; - bool running = true; -}; - -int main() -{ - nbl::SIrrlichtCreationParameters params; - params.Bits = 24; - params.ZBufferBits = 24; - params.DriverType = video::EDT_OPENGL; - params.WindowSize = dimension2d(640, 640); - params.Fullscreen = false; - params.Vsync = true; - params.Doublebuffer = true; - params.Stencilbuffer = false; - - auto device = createDeviceEx(params); - - if (!device) - return 1; - - auto* driver = device->getVideoDriver(); - auto* am = device->getAssetManager(); - - asset::IAssetLoader::SAssetLoadParams lparams; - lparams.loaderFlags; - - auto readJSON = [](const std::string& filePath) - { - std::ifstream file(filePath.data()); - if (!file.is_open()) { - printf("Invalid input json \"%s\" file! Aborting..", filePath.data()); - exit(0x45); - } - - std::stringstream buffer; - buffer << file.rdbuf(); - - return buffer.str(); - }; - - const auto INPUT_JSON_FILE_PATH_FS = std::filesystem::absolute("../inputs.json"); - const auto INPUT_JSON_FILE_PATH = INPUT_JSON_FILE_PATH_FS.string(); - const auto jsonBuffer = readJSON(INPUT_JSON_FILE_PATH); - if (jsonBuffer.empty()) { - printf("Read input json \"%s\" file is empty! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - const auto jsonMap = json::parse(jsonBuffer.c_str()); - - if (!jsonMap["directories"].is_array()) - { - printf("Input json \"%s\" file's field \"directories\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - if (!jsonMap["files"].is_array()) - { - printf("Input json \"%s\" file's field \"files\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - if (!jsonMap["writeAssets"].is_boolean()) - { - printf("Input json \"%s\" file's field \"writeAssets\" is not a boolean! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - const auto&& IES_INPUTS = [&]() - { - std::vector inputFilePaths; - - auto addFile = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS](const std::string_view filePath) -> void - { - auto path = std::filesystem::path(filePath); - - if (!path.is_absolute()) - path = std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / path); - - if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") - inputFilePaths.push_back(path.string()); - else - { - printf("Invalid input path \"%s\"! Aborting..\n", path.string().c_str()); - exit(0x45); - } - }; - - auto addFiles = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS, &addFile](const std::string_view directoryPath) -> void - { - auto directory(std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / directoryPath)); - if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) { - printf("Invalid input directory \"%s\"! Aborting..\n", directoryPath.data()); - exit(0x45); - } - - for (const auto& entry : std::filesystem::directory_iterator(directory)) - addFile(entry.path().string().c_str()); - }; - - // parse json - { - std::vector jDirectories; - jsonMap["directories"].get_to(jDirectories); - - for (const auto& it : jDirectories) - addFiles(it); - - std::vector jFiles; - jsonMap["files"].get_to(jFiles); - - for (const auto& it : jFiles) - addFile(it); - } - - return std::move(inputFilePaths); - }(); - - const bool GUI = [&]() - { - bool b = false; - jsonMap["gui"].get_to(b); - - return b; - }(); - - const bool WRITE_ASSETS = [&]() - { - bool b = false; - jsonMap["writeAssets"].get_to(b); - - return b; - }(); - - const auto ASSETS = [&]() - { - size_t loaded = {}, total = IES_INPUTS.size(); - std::vector assets; - std::vector outStems; - - for (size_t i = 0; i < total; ++i) - { - auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].c_str(), lparams); - const auto* path = IES_INPUTS[i].c_str(); - const auto stem = std::filesystem::path(IES_INPUTS[i].c_str()).stem().string(); - - if (asset.getMetadata()) - { - assets.emplace_back(std::move(asset)); - outStems.push_back(stem); - ++loaded; - } - else - printf("Could not load metadata from \"%s\" asset! Skipping..\n", path); - } - printf("Loaded [%s/%s] assets! Status: %s\n", std::to_string(loaded).c_str(), std::to_string(total).c_str(), loaded == total ? "PASSING" : "FAILING"); - - return std::make_pair(assets, outStems); - }(); - - if (GUI) - printf("GUI Mode: ON\n"); - else - { - printf("GUI Mode: OFF\nExiting..."); - exit(0); - } - - IESCompute iesComputeEnvironment(driver, am, ASSETS.first); - IESExampleEventReceiver receiver; - device->setEventReceiver(&receiver); - - auto getModeRS = [&]() - { - switch (iesComputeEnvironment.getMode()) - { - case IESCompute::EM_CDC: - return "CDC"; - case IESCompute::EM_IES_C: - return "IES Candela"; - case IESCompute::EM_SPERICAL_C: - return "Spherical Coordinates"; - case IESCompute::EM_DIRECTION: - return "Direction sample"; - case IESCompute::EM_PASS_T_MASK: - return "Pass Mask"; - default: - return "ERROR"; - } - }; - - auto getProfileRS = [&](const asset::CIESProfile& profile) - { - switch (profile.getSymmetry()) - { - case asset::CIESProfile::ISOTROPIC: - return "ISOTROPIC"; - case asset::CIESProfile::QUAD_SYMETRIC: - return "QUAD_SYMETRIC"; - case asset::CIESProfile::HALF_SYMETRIC: - return "HALF_SYMETRIC"; - case asset::CIESProfile::OTHER_HALF_SYMMETRIC: - return "OTHER_HALF_SYMMETRIC"; - case asset::CIESProfile::NO_LATERAL_SYMMET: - return "NO_LATERAL_SYMMET"; - default: - return "ERROR"; - } - }; - - while (device->run() && receiver.isRunning()) - { - iesComputeEnvironment.updateGeneralPurposeOffset(receiver.getGeneralPurposeOffset()); - iesComputeEnvironment.updateZDegree(receiver.getZDegreeOffset()); - iesComputeEnvironment.updateMode(receiver.getMode()); - - iesComputeEnvironment.begin(); - iesComputeEnvironment.dispatch(); - iesComputeEnvironment.renderpass(); - iesComputeEnvironment.end(); - - std::wostringstream windowCaption; - { - const auto* const mode = getModeRS(); - const auto* const profile = getProfileRS(iesComputeEnvironment.getActiveProfile()); - - windowCaption << "IES Demo - Nabla Engine - Profile: " << profile << " - Degrees: " << iesComputeEnvironment.getZDegree() << " - Mode: " << mode; - device->setWindowCaption(windowCaption.str()); - } - receiver.reset(); - } - - if(WRITE_ASSETS) - for (size_t i = 0; i < ASSETS.first.size(); ++i) - { - const auto& bundle = ASSETS.first[i]; - const auto& stem = ASSETS.second[i]; - - const auto& profile = bundle.getMetadata()->selfCast()->profile; - // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? ah touch required probably first - const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); - - asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); - - if (am->writeAsset(out.c_str(), wparams)) - printf("Saved \"%s\"\n", out.c_str()); - else - printf("Could not write \"%s\"\n", out.c_str()); - } - - return 0; -} -*/ - -#endif \ No newline at end of file +NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 31dfe2a5f455c8e1fb8ce9e4884a39c52345b16d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 14 Oct 2025 16:26:59 +0200 Subject: [PATCH 035/219] I had wrong pointer for media to some 2023 commit? --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index c24f4e139..f895f4e3d 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit c24f4e13901554abc9fdf87081108cc7dca1db57 +Subproject commit f895f4e3d1f72c772267143fe60f891bfe9e8e82 From 59a996222da2232348a1a9e31c4484161b340fb0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 14 Oct 2025 17:08:39 +0200 Subject: [PATCH 036/219] need to inspect metadata, non asset did load correctly --- 50.IESProfileTest/AppInputParser.cpp | 10 ++++++++-- 50.IESProfileTest/AppInputParser.hpp | 9 ++++++++- 50.IESProfileTest/main.cpp | 26 ++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/50.IESProfileTest/AppInputParser.cpp b/50.IESProfileTest/AppInputParser.cpp index 6719bdf74..30e4767f3 100644 --- a/50.IESProfileTest/AppInputParser.cpp +++ b/50.IESProfileTest/AppInputParser.cpp @@ -15,7 +15,7 @@ using namespace scene; using namespace nbl::examples; using namespace nlohmann; -bool AppInputParser::parse(std::vector& out, const std::string input, const std::string cwd) +bool AppInputParser::parse(Output& out, const std::string input, const std::string cwd) { const auto jInputFile = std::filesystem::absolute(input); const auto sjInputFile = jInputFile.string(); @@ -62,7 +62,7 @@ bool AppInputParser::parse(std::vector& out, const std::string inpu auto path = std::filesystem::absolute(cwd / std::filesystem::path(in)); if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") - out.push_back(path.string()); + out.inputList.push_back(path.string()); else { logger.log("Invalid \"%s\" input!", system::ILogger::ELL_ERROR, path.string().c_str()); @@ -105,5 +105,11 @@ bool AppInputParser::parse(std::vector& out, const std::string inpu return false; } + out.withGUI = false; + jsonMap["gui"].get_to(out.withGUI); + + out.writeAssets = false; + jsonMap["writeAssets"].get_to(out.writeAssets); + return true; } \ No newline at end of file diff --git a/50.IESProfileTest/AppInputParser.hpp b/50.IESProfileTest/AppInputParser.hpp index dc8f67128..c5df7cab5 100644 --- a/50.IESProfileTest/AppInputParser.hpp +++ b/50.IESProfileTest/AppInputParser.hpp @@ -11,8 +11,15 @@ struct AppInputParser { public: + struct Output + { + std::vector inputList; + bool withGUI; + bool writeAssets; + }; + AppInputParser(nbl::system::logger_opt_ptr _logger = nullptr) : logger(_logger) {} - bool parse(std::vector& out, const std::string input, const std::string cwd = "."); + bool parse(Output& out, const std::string jFilePath, const std::string cwd = "."); private: nbl::system::logger_opt_ptr logger; diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index e4d20830b..fe4a0ac0f 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -41,11 +41,33 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - std::vector inputList; + AppInputParser::Output out; AppInputParser parser(system::logger_opt_ptr(m_logger.get())); - if (!parser.parse(inputList, "../inputs.json", "../../media")) + if (!parser.parse(out, "../inputs.json", "../../media")) return false; + std::vector assets; + { + size_t loaded = {}, total = out.inputList.size(); + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + + for (const auto& in : out.inputList) + { + auto asset = m_assetMgr->getAsset(in.c_str(), lp); + + if (asset.getMetadata()) + { + assets.emplace_back(std::move(asset)); + ++loaded; + } + else + m_logger->log("Could not load metadata from \"%s\" asset! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); + } + const auto sl = std::to_string(loaded), st = std::to_string(total); + m_logger->log("Loaded [%s/%s] assets! Status: %s", system::ILogger::ELL_INFO, sl.c_str(), st.c_str(), loaded == total ? "PASSING" : "FAILING"); + } + auto createShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; From 02924fbac84775596dd553b210016f5fb46834fc Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 15 Oct 2025 13:37:55 +0200 Subject: [PATCH 037/219] update logging --- 50.IESProfileTest/main.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index fe4a0ac0f..498e06558 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -48,6 +48,8 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp std::vector assets; { + m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); + auto start = std::chrono::high_resolution_clock::now(); size_t loaded = {}, total = out.inputList.size(); IAssetLoader::SAssetLoadParams lp = {}; lp.logger = system::logger_opt_ptr(m_logger.get()); @@ -60,12 +62,23 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp { assets.emplace_back(std::move(asset)); ++loaded; + + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); } else - m_logger->log("Could not load metadata from \"%s\" asset! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); + m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); } const auto sl = std::to_string(loaded), st = std::to_string(total); - m_logger->log("Loaded [%s/%s] assets! Status: %s", system::ILogger::ELL_INFO, sl.c_str(), st.c_str(), loaded == total ? "PASSING" : "FAILING"); + const bool passed = loaded == total; + + if (not passed) + { + auto diff = std::to_string(total - loaded); + m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); } auto createShader = [&]() -> smart_refctd_ptr @@ -80,16 +93,16 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (assets.empty()) { - m_logger->log("Could not load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); + m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); return nullptr; } auto spirvShader = IAsset::castDown(assets[0]); if (spirvShader) - m_logger->log("Loaded \"%s\" shader!", system::ILogger::ELL_INFO, key.data()); + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); else - m_logger->log("Could not cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); + m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); return spirvShader; }; From 7e4399a48b6f47892b8406ff7853489653a6acaf Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 15 Oct 2025 15:50:40 +0200 Subject: [PATCH 038/219] reorganize struct for holding IES asset resources, create GPU images --- 50.IESProfileTest/main.cpp | 89 +++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 498e06558..ce4986d1c 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -46,7 +46,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!parser.parse(out, "../inputs.json", "../../media")) return false; - std::vector assets; { m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); auto start = std::chrono::high_resolution_clock::now(); @@ -60,7 +59,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (asset.getMetadata()) { - assets.emplace_back(std::move(asset)); + auto& ies = assets.emplace_back(); + ies.bundle = std::move(asset); + ies.key = in; + ++loaded; m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); @@ -81,6 +83,26 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); } + { + m_logger->log("Creating GPU IES images..", system::ILogger::ELL_INFO); + auto start = std::chrono::high_resolution_clock::now(); + for (auto& ies : assets) + { + const auto resolution = ies.getProfile()->getOptimalIESResolution(); + + #define CREATE_VIEW(VIEW, FORMAT) \ + if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT) )) { m_logger->log("Failed to create GPU Image for for \"%s\"! Terminating.", system::ILogger::ELL_ERROR, ies.key.c_str()); return false; } + + CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM) + CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT) + CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT) + CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM) + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished creating GPU IES images, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + auto createShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; @@ -283,6 +305,27 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } private: + struct IES + { + struct + { + core::smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; + } views; + + asset::SAssetBundle bundle; + std::string key; + + inline const asset::CIESProfile* getProfile() + { + auto* meta = bundle.getMetadata(); + if (meta) + return &meta->selfCast()->profile; + + return nullptr; + } + }; + std::vector assets; + // //smart_refctd_ptr m_scene; //smart_refctd_ptr m_renderer; @@ -314,6 +357,48 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp */ } } + + core::smart_refctd_ptr createImageView(const size_t width, const size_t height, asset::E_FORMAT format) + { + IGPUImage::SCreationParams imageParams {}; + imageParams.type = IImage::E_TYPE::ET_2D; + imageParams.extent.height = height; + imageParams.extent.width = width; + imageParams.extent.depth = 1u; + imageParams.format = format; + imageParams.mipLevels = 1u; + imageParams.flags = IImage::ECF_NONE; + imageParams.arrayLayers = 1u; + imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + + auto image = m_device->createImage(std::move(imageParams)); + + if (!image) + { + m_logger->log("Failed to create image!", system::ILogger::ELL_ERROR); + return nullptr; + } + + auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!allocation.isValid()) + { + m_logger->log("Failed to allocate device memory!", system::ILogger::ELL_ERROR); + return nullptr; + } + + IGPUImageView::SCreationParams viewParams {}; + viewParams.image = std::move(image); + viewParams.format = format; + viewParams.viewType = IGPUImageView::ET_2D; + viewParams.flags = IImageViewBase::ECF_NONE; + viewParams.subresourceRange.baseArrayLayer = 0u; + viewParams.subresourceRange.baseMipLevel = 0u; + viewParams.subresourceRange.layerCount = 1u; + viewParams.subresourceRange.levelCount = 1u; + viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); + + return m_device->createImageView(std::move(viewParams)); + } }; NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 23174cc73f2d06eb349793a4b4077f5bd0edc021 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 15 Oct 2025 17:21:29 +0200 Subject: [PATCH 039/219] add buffer creation --- 50.IESProfileTest/main.cpp | 50 +++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index ce4986d1c..5e14307b4 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -84,11 +84,12 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } { - m_logger->log("Creating GPU IES images..", system::ILogger::ELL_INFO); + m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); auto start = std::chrono::high_resolution_clock::now(); for (auto& ies : assets) { - const auto resolution = ies.getProfile()->getOptimalIESResolution(); + const auto* profile = ies.getProfile(); + const auto resolution = profile->getOptimalIESResolution(); #define CREATE_VIEW(VIEW, FORMAT) \ if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT) )) { m_logger->log("Failed to create GPU Image for for \"%s\"! Terminating.", system::ILogger::ELL_ERROR, ies.key.c_str()); return false; } @@ -100,7 +101,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); - m_logger->log("Finished creating GPU IES images, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); } auto createShader = [&]() -> smart_refctd_ptr @@ -312,6 +313,11 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp core::smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; } views; + struct + { + core::smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; + } buffers; + asset::SAssetBundle bundle; std::string key; @@ -399,6 +405,44 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return m_device->createImageView(std::move(viewParams)); } + + core::smart_refctd_ptr createBuffer(size_t size) + { + IGPUBuffer::SCreationParams bufferParams = {}; + bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; + bufferParams.size = size; + + auto buffer = m_device->createBuffer(std::move(bufferParams)); + + if (not buffer) + { + m_logger->log("Failed to create buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + auto memoryReqs = buffer->getMemoryReqs(); + + if(m_utils) + memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); + if (not allocation.isValid()) + { + m_logger->log("Failed to allocate buffer!", ILogger::ELL_ERROR); + return nullptr; + } + auto memory = allocation.memory; + + if (!memory->map({ 0ull, memoryReqs.size }, bitflag(IDeviceMemoryAllocation::EMCAF_READ) | IDeviceMemoryAllocation::EMCAF_WRITE)) + { + m_logger->log("Failed to map device memory!", ILogger::ELL_ERROR); + return nullptr; + } + + // TODO: maybe lets also fill buffer with IES data at one go + + return buffer; + } }; NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 03a97f1b77dbd44ffa9b4ce8be529ca4cf13452a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 11:08:05 +0200 Subject: [PATCH 040/219] handle PCs, add debug info for gpu resources & events I think I will have events done through ImGUI ext and I will go bindless for my images --- 50.IESProfileTest/app_resources/common.hlsl | 2 +- 50.IESProfileTest/main.cpp | 177 ++++++++++++++------ 2 files changed, 131 insertions(+), 48 deletions(-) diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index 8b0d14783..01dbe850f 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -27,8 +27,8 @@ struct PushConstants uint32_t hAnglesCount; uint32_t vAnglesCount; uint32_t dataCount; - float32_t zAngleDegreeRotation; + float32_t zAngleDegreeRotation; uint32_t mode; #ifdef __HLSL_VERSION diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 5e14307b4..9025b417e 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -4,6 +4,7 @@ #include "nbl/examples/examples.hpp" #include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include "app_resources/common.hlsl" #include "AppInputParser.hpp" using namespace nbl; @@ -23,6 +24,9 @@ const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_r bool stopBenchamrkFlag = false; #endif +constexpr static std::string_view InputsJson = "../inputs.json"; +constexpr static std::string_view MediaEntry = "../../media"; + class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication { using device_base_t = MonoWindowApplication; @@ -41,9 +45,11 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; + const auto media = absolute(path(MediaEntry.data())); + AppInputParser::Output out; AppInputParser parser(system::logger_opt_ptr(m_logger.get())); - if (!parser.parse(out, "../inputs.json", "../../media")) + if (!parser.parse(out, InputsJson.data(), media.string())) return false; { @@ -61,8 +67,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp { auto& ies = assets.emplace_back(); ies.bundle = std::move(asset); - ies.key = in; - + ies.key = path(in).lexically_relative(media).string(); ++loaded; m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); @@ -91,13 +96,20 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp const auto* profile = ies.getProfile(); const auto resolution = profile->getOptimalIESResolution(); - #define CREATE_VIEW(VIEW, FORMAT) \ - if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT) )) { m_logger->log("Failed to create GPU Image for for \"%s\"! Terminating.", system::ILogger::ELL_ERROR, ies.key.c_str()); return false; } + #define CREATE_VIEW(VIEW, FORMAT, NAME) \ + if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; + + CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") + CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") + CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") + CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") + + #define CREATE_BUFFER(BUFFER, DATA, NAME) \ + if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; - CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM) - CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT) - CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT) - CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM) + CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") + CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") + CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") } auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); @@ -161,11 +173,14 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp cb->beginDebugMarker("IESViewer Frame"); { camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); mouseProcess(events); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); }, m_logger.get()); camera.endInputProcessing(nextPresentationTimestamp); } + auto& ies = assets[activeAssetIx]; + PushConstants pc; + updatePushConstants(pc, ies); asset::SViewport viewport; { @@ -206,19 +221,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); - } - const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); - - // tear down scene every frame - //m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + gcIndex; - //m_renderer->render(cb, viewParams); - cb->endRenderPass(); cb->endDebugMarker(); cb->end(); @@ -257,9 +259,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp std::string caption = "[Nabla Engine] IES Viewer"; { - caption += ", displaying ["; - //caption += m_scene->getInitParams().geometryNames[gcIndex]; - caption += "]"; m_window->setCaption(caption); } return retval; @@ -306,6 +305,17 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } private: + enum E_MODE : uint32_t + { + EM_CDC, //! Candlepower Distribution Curve + EM_IES_C, //! IES Candela + EM_SPERICAL_C, //! Sperical coordinates + EM_DIRECTION, //! Sample direction + EM_PASS_T_MASK, //! Test mask + + EM_SIZE + }; + struct IES { struct @@ -321,7 +331,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp asset::SAssetBundle bundle; std::string key; - inline const asset::CIESProfile* getProfile() + float zDegree; + E_MODE mode; + + inline const asset::CIESProfile* getProfile() const { auto* meta = bundle.getMetadata(); if (meta) @@ -330,7 +343,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return nullptr; } }; + + bool running = true; std::vector assets; + size_t activeAssetIx = 0; // //smart_refctd_ptr m_scene; @@ -348,23 +364,59 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp uint16_t gcIndex = {}; + // TODO: lets have this stuff in nice imgui void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { - for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) + for (auto it = events.begin(); it != events.end(); it++) { - auto ev = *eventIt; + auto ev = *it; - /* - if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { - gcIndex += int16_t(core::sign(ev.scrollEvent.verticalScroll)); - gcIndex = core::clamp(gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + auto& ies = assets[activeAssetIx]; + auto* profile = ies.getProfile(); + + auto impulse = ev.scrollEvent.verticalScroll; + ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); } - */ } } - core::smart_refctd_ptr createImageView(const size_t width, const size_t height, asset::E_FORMAT format) + void keyboardProcess(const nbl::ui::IKeyboardEventChannel::range_t& events) + { + for (auto it = events.begin(); it != events.end(); it++) + { + const auto ev = *it; + + if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + { + auto& ies = assets[activeAssetIx]; + auto* profile = ies.getProfile(); + + if (ev.keyCode == nbl::ui::EKC_UP_ARROW) + activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size()); + else if(ev.keyCode == nbl::ui::EKC_DOWN_ARROW) + activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size()); + + if (ev.keyCode == nbl::ui::EKC_C) + ies.mode = EM_CDC; + else if (ev.keyCode == nbl::ui::EKC_V) + ies.mode = EM_IES_C; + else if (ev.keyCode == nbl::ui::EKC_S) + ies.mode = EM_SPERICAL_C; + else if (ev.keyCode == nbl::ui::EKC_D) + ies.mode = EM_DIRECTION; + else if (ev.keyCode == nbl::ui::EKC_M) + ies.mode = EM_PASS_T_MASK; + + if (ev.keyCode == nbl::ui::EKC_Q) + running = false; + } + } + } + // <- + + core::smart_refctd_ptr createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) { IGPUImage::SCreationParams imageParams {}; imageParams.type = IImage::E_TYPE::ET_2D; @@ -378,17 +430,18 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; auto image = m_device->createImage(std::move(imageParams)); + image->setObjectDebugName(name.c_str()); if (!image) { - m_logger->log("Failed to create image!", system::ILogger::ELL_ERROR); + m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); return nullptr; } auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); if (!allocation.isValid()) { - m_logger->log("Failed to allocate device memory!", system::ILogger::ELL_ERROR); + m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); return nullptr; } @@ -403,20 +456,26 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp viewParams.subresourceRange.levelCount = 1u; viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); - return m_device->createImageView(std::move(viewParams)); + auto imageView = m_device->createImageView(std::move(viewParams)); + + if(not imageView) + m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + + return imageView; } - core::smart_refctd_ptr createBuffer(size_t size) + core::smart_refctd_ptr createBuffer(const core::vector& in, std::string name) { IGPUBuffer::SCreationParams bufferParams = {}; bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; - bufferParams.size = size; + bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); auto buffer = m_device->createBuffer(std::move(bufferParams)); + buffer->setObjectDebugName(name.c_str()); if (not buffer) { - m_logger->log("Failed to create buffer!", ILogger::ELL_ERROR); + m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); return nullptr; } @@ -428,21 +487,45 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); if (not allocation.isValid()) { - m_logger->log("Failed to allocate buffer!", ILogger::ELL_ERROR); + m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); return nullptr; } - auto memory = allocation.memory; - if (!memory->map({ 0ull, memoryReqs.size }, bitflag(IDeviceMemoryAllocation::EMCAF_READ) | IDeviceMemoryAllocation::EMCAF_WRITE)) + auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); + + if (not mappedPointer) { - m_logger->log("Failed to map device memory!", ILogger::ELL_ERROR); + m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); return nullptr; } - // TODO: maybe lets also fill buffer with IES data at one go + memcpy(mappedPointer, in.data(), buffer->getSize()); + + if (not allocation.memory->unmap()) + { + m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } return buffer; } + + inline void updatePushConstants(PushConstants& out, const IES& in) + { + out.vAnglesBDA = in.buffers.vAngles->getDeviceAddress(); + out.hAnglesBDA = in.buffers.hAngles->getDeviceAddress(); + out.dataBDA = in.buffers.data->getDeviceAddress(); + + const auto* profile = in.getProfile(); + + out.maxIValue = profile->getMaxCandelaValue(); + out.vAnglesCount = profile->getVertAngles().size(); + out.hAnglesCount = profile->getHoriAngles().size(); + out.dataCount = profile->getData().size(); + + out.zAngleDegreeRotation = in.zDegree; + out.mode = in.mode; + } }; NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 514576352c711f56dbd3dfab0613d8a49865f0b6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 17:45:56 +0200 Subject: [PATCH 041/219] save work, graphics pipeline, flushing buffers, going bindless --- 50.IESProfileTest/CMakeLists.txt | 8 +- 50.IESProfileTest/app_resources/common.hlsl | 19 ++- 50.IESProfileTest/app_resources/compute.hlsl | 2 +- 50.IESProfileTest/app_resources/pixel.hlsl | 26 ++-- 50.IESProfileTest/main.cpp | 122 +++++++++++++++++-- 5 files changed, 140 insertions(+), 37 deletions(-) diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 311b981b5..d5ef049d2 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -17,27 +17,27 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) -set(V 6_8) +set(SM 6_8) set(JSON [=[ [ { "INPUT": "app_resources/compute.hlsl", "KEY": "compute", - "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "COMPILE_OPTIONS": ["-T", "lib_${SM}"], "DEPENDS": [], "CAPS": [] }, { "INPUT": "app_resources/pixel.hlsl", "KEY": "pixel", - "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "COMPILE_OPTIONS": ["-T", "lib_${SM}"], "DEPENDS": [], "CAPS": [] }, { "INPUT": "app_resources/vertex.hlsl", "KEY": "vertex", - "COMPILE_OPTIONS": ["-T", "lib_${V}"], + "COMPILE_OPTIONS": ["-T", "lib_${SM}"], "DEPENDS": [], "CAPS": [] } diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index 01dbe850f..a68d74422 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -6,8 +6,11 @@ // -> TODO: use NBL_CONTEXPR or something #ifndef UINT16_MAX #define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do -#endif +#endif // UINT16_MAX +#ifndef M_PI #define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do +#endif // M_PI + #define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do #define QUANT_ERROR_ADMISSIBLE 1/1024 @@ -15,6 +18,9 @@ #define WORKGROUP_DIMENSION 16u // <- + wipe whatever we already have +// TODO: since NSC prebuilds into SPIRV - maybe could make it a CMake option with a default val +#define MAX_IES_IMAGES 4u * 6969 + using namespace nbl::hlsl; struct PushConstants @@ -28,13 +34,16 @@ struct PushConstants uint32_t vAnglesCount; uint32_t dataCount; - float32_t zAngleDegreeRotation; uint32_t mode; + uint32_t texIx; + float32_t zAngleDegreeRotation; + + uint32_t dummy; #ifdef __HLSL_VERSION - float32_t getHorizontalAngle(uint32_t ix) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float32_t) * ix); } - float32_t getVerticalAngle(uint32_t ix) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float32_t) * ix); } - float32_t getData(uint32_t ix) { return vk::RawBufferLoad(dataBDA + sizeof(float32_t) * ix); } + float64_t getHorizontalAngle(uint32_t i) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float64_t) * i); } + float64_t getVerticalAngle(uint32_t i) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float64_t) * i); } + float64_t getData(uint32_t i) { return vk::RawBufferLoad(dataBDA + sizeof(float64_t) * i); } #endif // __HLSL_VERSION }; diff --git a/50.IESProfileTest/app_resources/compute.hlsl b/50.IESProfileTest/app_resources/compute.hlsl index 228a600d4..afdfd7347 100644 --- a/50.IESProfileTest/app_resources/compute.hlsl +++ b/50.IESProfileTest/app_resources/compute.hlsl @@ -7,7 +7,7 @@ [[vk::binding(0, 0)]] RWTexture2D outIESCandelaImage; [[vk::binding(1, 0)]] RWTexture2D outSphericalCoordinatesImage; [[vk::binding(2, 0)]] RWTexture2D outOUVProjectionDirectionImage; -[[vk::binding(3, 0)]] RWTexture2D outPassTMask; +[[vk::binding(3, 0)]] RWTexture2D outPassTMask; [[vk::push_constant]] struct PushConstants pc; diff --git a/50.IESProfileTest/app_resources/pixel.hlsl b/50.IESProfileTest/app_resources/pixel.hlsl index be19f0309..22e232240 100644 --- a/50.IESProfileTest/app_resources/pixel.hlsl +++ b/50.IESProfileTest/app_resources/pixel.hlsl @@ -5,15 +5,11 @@ #include "common.hlsl" #include "PSInput.hlsl" -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D inIESCandelaImage; -[[vk::combinedImageSampler]] [[vk::binding(1, 3)]] Texture2D inSphericalCoordinatesImage; -[[vk::combinedImageSampler]] [[vk::binding(2, 3)]] Texture2D inOUVProjectionDirectionImage; -[[vk::combinedImageSampler]] [[vk::binding(3, 3)]] Texture2D inPassTMaskImage; - -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState inIESCandelaSampler; -[[vk::combinedImageSampler]] [[vk::binding(1, 3)]] SamplerState inSphericalCoordinatesSampler; -[[vk::combinedImageSampler]] [[vk::binding(2, 3)]] SamplerState inOUVProjectionDirectionSampler; -[[vk::combinedImageSampler]] [[vk::binding(3, 3)]] SamplerState inPassTMaskSampler; +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; +[[vk::binding(10, 0)]] SamplerState generalSampler; [[vk::push_constant]] struct PushConstants pc; @@ -36,11 +32,11 @@ float32_t plot(float32_t cand, float32_t pct, float32_t bold) // vertical cut of IES (i.e. cut by plane x = 0) float32_t f(float32_t2 uv) { - return inIESCandelaImage.Sample(inIESCandelaSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; + return inIESCandelaImage[pc.texIx].Sample(generalSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; } [shader("pixel")] -float32_t4 main(PSInput input) : SV_Target0 +float32_t4 PSMain(PSInput input) : SV_Target0 { float32_t2 ndc = input.position.xy; float32_t2 uv = (ndc + 1) / 2; @@ -60,12 +56,12 @@ float32_t4 main(PSInput input) : SV_Target0 return float32_t4(col, 1.0f); } case 1: - return float32_t4(inIESCandelaImage.Sample(inIESCandelaSampler, uv).x, 0.f, 0.f, 1.f); + return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, uv).x, 0.f, 0.f, 1.f); case 2: - return float32_t4(inSphericalCoordinatesImage.Sample(inSphericalCoordinatesSampler, uv).xy, 0.f, 1.f); + return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, uv).xy, 0.f, 1.f); case 3: - return float32_t4(inOUVProjectionDirectionImage.Sample(inOUVProjectionDirectionSampler, uv).xyz, 1.f); + return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, uv).xyz, 1.f); default: - return float32_t4(inPassTMaskImage.Sample(inPassTMaskSampler, uv).xy, 0.f, 1.f); + return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, uv).xy, 0.f, 1.f); } } diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 9025b417e..110defaca 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -157,6 +157,103 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); } + // Pipelines & Descriptor Sets + { + using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + using stage_flags_t = asset::IShader::E_SHADER_STAGE; + static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; + static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); + + const uint32_t texturesCount = assets.size(); + auto computeBindings = std::to_array + ({ + {.binding = 0, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 1, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 2, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 3, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr} + }); + + auto pixelBindings = std::to_array + ({ + {.binding = 0, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 1, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 2, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, + {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = 1u, .immutableSamplers = nullptr} + }); + + smart_refctd_ptr generalSampler; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; + params.MinFilter = ISampler::ETF_LINEAR; + params.MaxFilter = ISampler::ETF_LINEAR; + params.MipmapMode = ISampler::ESMM_LINEAR; + params.AnisotropicFilter = 0u; + params.CompareEnable = false; + params.CompareFunc = ISampler::ECO_ALWAYS; + + generalSampler = m_device->createSampler(params); + + if (not generalSampler) + { + m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); + return false; + } + + generalSampler->setObjectDebugName("Default IES sampler"); + } + + auto scRes = static_cast(m_surface->getSwapchainResources()); + scRes->getRenderpass(); + + // Graphics Pipeline + { + auto descriptorSetLayout = m_device->createDescriptorSetLayout(pixelBindings); + + if(not descriptorSetLayout) + return logFail("Failed to create descriptor set layout!"); + + auto range = std::to_array({ {stage_flags_t::ESS_FRAGMENT, 0u, sizeof(PushConstants)} }); + auto graphicsPipelineLayout = m_device->createPipelineLayout(range, nullptr, nullptr, nullptr, core::smart_refctd_ptr(descriptorSetLayout)); + + if(not graphicsPipelineLayout) + return logFail("Failed to create pipeline layout!"); + + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + { + { .shader = vertex.get(), .entryPoint = "VSMain" }, + { .shader = pixel.get(), .entryPoint = "PSMain" } + }; + + auto params = std::to_array({ {} }); + params[0].layout = graphicsPipelineLayout.get(); + params[0].cached = { + .vertexInput = {}, + .primitiveAssembly = { + .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, + }, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {} + }; + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + + if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) + return logFail("Failed to create graphics pipeline!"); + } + + } + return true; } @@ -172,16 +269,24 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cb->beginDebugMarker("IESViewer Frame"); { - camera.beginInputProcessing(nextPresentationTimestamp); mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); }, m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); }, m_logger.get()); - camera.endInputProcessing(nextPresentationTimestamp); } auto& ies = assets[activeAssetIx]; PushConstants pc; updatePushConstants(pc, ies); + for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity + { + auto bound = buffer->getBoundMemory(); + if (bound.memory->haveToMakeVisible()) + { + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); + m_device->flushMappedMemoryRanges(1, &range); + } + } + asset::SViewport viewport; { viewport.minDepth = 1.f; @@ -344,26 +449,18 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } }; + smart_refctd_ptr graphicsPipeline; + bool running = true; std::vector assets; size_t activeAssetIx = 0; - // - //smart_refctd_ptr m_scene; - //smart_refctd_ptr m_renderer; - // smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; - // InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - - uint16_t gcIndex = {}; - // TODO: lets have this stuff in nice imgui void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { @@ -525,6 +622,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp out.zAngleDegreeRotation = in.zDegree; out.mode = in.mode; + out.texIx = activeAssetIx; } }; From 1500ce014cceb14df35cd5009d8b126b34ef706f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 18:18:41 +0200 Subject: [PATCH 042/219] specify alignment explicitly for vk::RawBufferLoad call, although need to think if I can go from double -> float32 IES storage --- 50.IESProfileTest/app_resources/common.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index a68d74422..75738cdc9 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -41,9 +41,9 @@ struct PushConstants uint32_t dummy; #ifdef __HLSL_VERSION - float64_t getHorizontalAngle(uint32_t i) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float64_t) * i); } - float64_t getVerticalAngle(uint32_t i) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float64_t) * i); } - float64_t getData(uint32_t i) { return vk::RawBufferLoad(dataBDA + sizeof(float64_t) * i); } + float64_t getHorizontalAngle(uint32_t i) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float64_t) * i, sizeof(float64_t)); } + float64_t getVerticalAngle(uint32_t i) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float64_t) * i, sizeof(float64_t)); } + float64_t getData(uint32_t i) { return vk::RawBufferLoad(dataBDA + sizeof(float64_t) * i, sizeof(float64_t)); } #endif // __HLSL_VERSION }; From a2268ded75741d56ed1a3ccb427ca89997ef3511 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 20:33:53 +0200 Subject: [PATCH 043/219] nbl::hlsl::bda is way more sexy --- 50.IESProfileTest/app_resources/common.hlsl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index 75738cdc9..00a9cfbfc 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -3,6 +3,10 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#endif // __HLSL_VERSION + // -> TODO: use NBL_CONTEXPR or something #ifndef UINT16_MAX #define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do @@ -41,9 +45,9 @@ struct PushConstants uint32_t dummy; #ifdef __HLSL_VERSION - float64_t getHorizontalAngle(uint32_t i) { return vk::RawBufferLoad(hAnglesBDA + sizeof(float64_t) * i, sizeof(float64_t)); } - float64_t getVerticalAngle(uint32_t i) { return vk::RawBufferLoad(vAnglesBDA + sizeof(float64_t) * i, sizeof(float64_t)); } - float64_t getData(uint32_t i) { return vk::RawBufferLoad(dataBDA + sizeof(float64_t) * i, sizeof(float64_t)); } + float64_t getHorizontalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(hAnglesBDA) + i).deref().load(); } + float64_t getVerticalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(vAnglesBDA) + i).deref().load(); } + float64_t getData(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(dataBDA) + i).deref().load(); } #endif // __HLSL_VERSION }; From 27674bf7311c42270d7f4c9f1d713b8c9c638f92 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 20:47:16 +0200 Subject: [PATCH 044/219] bindless for compute though, silent DXC warnings thou I need to think if I can change IES storage to float32 - probably yes --- 50.IESProfileTest/app_resources/compute.hlsl | 190 +++++++++---------- 1 file changed, 94 insertions(+), 96 deletions(-) diff --git a/50.IESProfileTest/app_resources/compute.hlsl b/50.IESProfileTest/app_resources/compute.hlsl index afdfd7347..03548ee5b 100644 --- a/50.IESProfileTest/app_resources/compute.hlsl +++ b/50.IESProfileTest/app_resources/compute.hlsl @@ -4,38 +4,38 @@ #include "common.hlsl" -[[vk::binding(0, 0)]] RWTexture2D outIESCandelaImage; -[[vk::binding(1, 0)]] RWTexture2D outSphericalCoordinatesImage; -[[vk::binding(2, 0)]] RWTexture2D outOUVProjectionDirectionImage; -[[vk::binding(3, 0)]] RWTexture2D outPassTMask; +[[vk::binding(0, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; [[vk::push_constant]] struct PushConstants pc; -float32_t3 octahedronUVToDir(float32_t2 uv) +float32_t3 octahedronUVToDir(float64_t2 uv) { - float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); + float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); float32_t2 absP = float32_t2(abs(position.x), abs(position.y)); - - position.z = 1.0 - absP.x - absP.y; - - if (position.z < 0.0) + + position.z = 1.0 - absP.x - absP.y; + + if (position.z < 0.0) { position.x = sign(position.x) * (1.0 - absP.y); position.y = sign(position.y) * (1.0 - absP.x); } // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" - if(pc.zAngleDegreeRotation != 0.0) + if (pc.zAngleDegreeRotation != 0.0) { - float32_t rDegree = pc.zAngleDegreeRotation; - + float64_t rDegree = pc.zAngleDegreeRotation; + const float32_t zAngleRadians = float32_t(rDegree * M_PI / 180.0); - const float32_t cosineV = cos(zAngleRadians); - const float32_t sineV = sin(zAngleRadians); + const float64_t cosineV = cos(zAngleRadians); + const float64_t sineV = sin(zAngleRadians); position = float32_t3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); } - + return normalize(position); } @@ -48,51 +48,51 @@ float32_t3 octahedronUVToDir(float32_t2 uv) float32_t2 sphericalDirToRadians(float32_t3 direction) { - float32_t theta = acos(clamp(direction.z/length(direction), -1.0, 1.0)); + float32_t theta = acos(clamp(direction.z / length(direction), -1.0, 1.0)); float32_t phi = atan2(direction.y, direction.x); // TODO: check it - + return float32_t2(theta, phi); } -uint32_t implGetVUB(const float32_t angle) -{ - for(uint32_t i = 0; i < pc.vAnglesCount; ++i) - if(pc.getVerticalAngle(i) > angle) - return i; +uint32_t implGetVUB(const float64_t angle) +{ + for (uint32_t i = 0; i < pc.vAnglesCount; ++i) + if (pc.getVerticalAngle(i) > angle) + return i; - return pc.vAnglesCount; + return pc.vAnglesCount; } -uint32_t implGetHUB(const float32_t angle) -{ +uint32_t implGetHUB(const float64_t angle) +{ for (uint32_t i = 0; i < pc.hAnglesCount; ++i) if (pc.getHorizontalAngle(i) > angle) return i; - return pc.hAnglesCount; + return pc.hAnglesCount; } -uint32_t getVLB(const float32_t angle) +uint32_t getVLB(const float64_t angle) { return uint32_t(max(int(implGetVUB(angle)) - 1, 0)); } -uint32_t getHLB(const float32_t angle) +uint32_t getHLB(const float64_t angle) { return uint32_t(max(int(implGetHUB(angle)) - 1, 0)); } -uint32_t getVUB(const float32_t angle) +uint32_t getVUB(const float64_t angle) { return uint32_t(min(int(implGetVUB(angle)), int(pc.vAnglesCount) - 1)); } -uint32_t getHUB(const float32_t angle) +uint32_t getHUB(const float64_t angle) { return uint32_t(min(int(implGetHUB(angle)), int(pc.hAnglesCount) - 1)); } -float32_t getValue(uint32_t i, uint32_t j) +float64_t getValue(uint32_t i, uint32_t j) { return pc.getData(pc.vAnglesCount * i + j); } @@ -105,14 +105,14 @@ float32_t getValue(uint32_t i, uint32_t j) uint32_t getSymmetry() // TODO: to reduce check time we could pass it with PCs { - if(pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake + if (pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake return ISOTROPIC; - - const float32_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); - - if(hABack == 90) + + const float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + + if (hABack == 90) return QUAD_SYMETRIC; - else if(hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here + else if (hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here return HALF_SYMETRIC; else return NO_LATERAL_SYMMET; @@ -122,73 +122,73 @@ float32_t wrapPhi(const float32_t phi, const uint32_t symmetry) //! wrap phi sph { switch (symmetry) { - case ISOTROPIC: - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float32_t wrapPhi = abs(phi); //! first MIRROR - - if(wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: - { - if(phi < 0) - return phi + 2.0 * M_PI; - else - return phi; - } + case ISOTROPIC: + return 0.0; + case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + { + float32_t wrapPhi = abs(phi); //! first MIRROR + + if (wrapPhi > M_HALF_PI) //! then REPEAT + wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); + + return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 + } + case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + case NO_LATERAL_SYMMET: + { + if (phi < 0) + return phi + 2.0 * M_PI; + else + return phi; } - + } + return 69; } -float32_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) +float64_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) { - const float32_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); - - float32_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); - float32_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + const float64_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); + + float64_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); + float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); if (vAngle > vABack) return 0.0; - + // bilinear interpolation uint32_t j0 = getVLB(vAngle); uint32_t j1 = getVUB(vAngle); - uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); + uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); uint32_t i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); - - float32_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); - float32_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); - - float32_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; - float32_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; - - float32_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); - float32_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); - + + float64_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); + float64_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); + + float64_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; + float64_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; + + float64_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); + float64_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); + return s0 * (1.0 - u) + s1 * u; } //! Checks if (x,y) /in [0,PI] x [-PI,PI] product /* IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) + and horizontal range is [0, 360] degrees + but for easier computations (MIRROR & MIRROW_REPEAT operations) we represent horizontal range as [-180, 180] given spherical coordinates */ -bool isWithinSCDomain(const float32_t2 p) +bool isWithinSCDomain(const float64_t2 p) { - const float32_t2 lb = float32_t2(0, -M_PI); - const float32_t2 ub = float32_t2(M_PI, M_PI); + const float64_t2 lb = float64_t2(0, -M_PI); + const float64_t2 ub = float64_t2(M_PI, M_PI); - return all(lb <= p) && all(p <= ub); + return all(lb <= p) && all(p <= ub); } [numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] @@ -196,36 +196,34 @@ bool isWithinSCDomain(const float32_t2 p) void main(uint32_t3 ID : SV_DispatchThreadID) { uint32_t2 destinationSize; - outIESCandelaImage.GetDimensions(destinationSize.x, destinationSize.y); + outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); - + const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); - + if (all(pixelCoordinates < destinationSize)) { const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); const float32_t3 direction = octahedronUVToDir(uv); const float32_t2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped - - const float32_t intensity = sampleI(sphericalCoordinates, getSymmetry()); - + const float32_t normD = length(direction); float32_t2 mask; - - if(1.0 - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0 + QUANT_ERROR_ADMISSIBLE) + + if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) mask.x = 1.0; // pass else mask.x = 0; - - if(isWithinSCDomain(sphericalCoordinates)) + + if (isWithinSCDomain(sphericalCoordinates)) mask.y = 1.0; // pass else mask.y = 0; - outIESCandelaImage[pixelCoordinates] = uint32_t(intensity / pc.maxIValue); - outSphericalCoordinatesImage[pixelCoordinates] = sphericalCoordinates; - outOUVProjectionDirectionImage[pixelCoordinates] = direction; - outPassTMask[pixelCoordinates] = mask; + outIESCandelaImage[pc.texIx][pixelCoordinates] = float32_t(sampleI(sphericalCoordinates, getSymmetry()) / pc.maxIValue); + outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sphericalCoordinates; + outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = direction; + outPassTMask[pc.texIx][pixelCoordinates] = mask; } } \ No newline at end of file From 07f3fbdf274da65ddcad95e452407bab661c4f44 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 21:51:26 +0200 Subject: [PATCH 045/219] write descriptors --- 50.IESProfileTest/main.cpp | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 110defaca..0df39d351 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -250,6 +250,43 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) return logFail("Failed to create graphics pipeline!"); + + const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, dscLayoutPtrs); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), graphicDS.data()); + { + std::array, 4u> infos; + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + #define FILL_INFO(DESC, IX) \ + { \ + auto& info = infos[IX].emplace_back(); \ + info.desc = DESC; \ + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; \ + } + + FILL_INFO(ies.views.candela, 0u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) + } + + std::array writes; + for (uint32_t i = 0; i < infos.size(); ++i) + { + auto& write = writes[i]; + write.count = assets.size(); + write.info = infos[i].data(); + write.dstSet = graphicDS[0u].get(); + write.arrayElement = 0u; + write.binding = i; + } + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write descriptor sets"); + } } } @@ -449,7 +486,11 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } }; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> graphicDS; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> computeDS; + smart_refctd_ptr graphicsPipeline; + smart_refctd_ptr computePipeline; bool running = true; std::vector assets; From b97424cc9040ba8e5ecdfc3bf683e3e2ed4a4023 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 17 Oct 2025 14:18:19 +0200 Subject: [PATCH 046/219] precompile FullScreenTriangle ext with NSC, use with IES shaders, get rid of validation errors for graphics pipeline creation & its descriptor writes --- 50.IESProfileTest/app_resources/common.hlsl | 2 +- 50.IESProfileTest/app_resources/pixel.hlsl | 17 ++++---- 50.IESProfileTest/app_resources/vertex.hlsl | 13 +----- 50.IESProfileTest/main.cpp | 46 +++++++++++---------- 4 files changed, 36 insertions(+), 42 deletions(-) diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index 00a9cfbfc..40ae07b40 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -23,7 +23,7 @@ // <- + wipe whatever we already have // TODO: since NSC prebuilds into SPIRV - maybe could make it a CMake option with a default val -#define MAX_IES_IMAGES 4u * 6969 +#define MAX_IES_IMAGES 6969 using namespace nbl::hlsl; diff --git a/50.IESProfileTest/app_resources/pixel.hlsl b/50.IESProfileTest/app_resources/pixel.hlsl index 22e232240..c985afffc 100644 --- a/50.IESProfileTest/app_resources/pixel.hlsl +++ b/50.IESProfileTest/app_resources/pixel.hlsl @@ -3,7 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hlsl" -#include "PSInput.hlsl" +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" +using namespace nbl::hlsl::ext::FullScreenTriangle; [[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; [[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; @@ -36,15 +37,13 @@ float32_t f(float32_t2 uv) } [shader("pixel")] -float32_t4 PSMain(PSInput input) : SV_Target0 +float32_t4 PSMain(SVertexAttributes input) : SV_Target0 { - float32_t2 ndc = input.position.xy; - float32_t2 uv = (ndc + 1) / 2; - switch (pc.mode) { case 0: { + float32_t2 ndc = input.uv * 2.f - 1.f; float32_t dist = length(ndc) * 1.015625f; float32_t p = plot(dist, 1.0f, 0.75f); float32_t3 col = float32_t3(p, p, p); @@ -56,12 +55,12 @@ float32_t4 PSMain(PSInput input) : SV_Target0 return float32_t4(col, 1.0f); } case 1: - return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, uv).x, 0.f, 0.f, 1.f); + return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); case 2: - return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, uv).xy, 0.f, 1.f); + return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); case 3: - return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, uv).xyz, 1.f); + return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); default: - return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, uv).xy, 0.f, 1.f); + return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); } } diff --git a/50.IESProfileTest/app_resources/vertex.hlsl b/50.IESProfileTest/app_resources/vertex.hlsl index 4a86eb88d..a0f565455 100644 --- a/50.IESProfileTest/app_resources/vertex.hlsl +++ b/50.IESProfileTest/app_resources/vertex.hlsl @@ -2,14 +2,5 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "VSInput.hlsl" -#include "PSInput.hlsl" - -[shader("vertex")] -PSInput VSMain(VSInput input) -{ - PSInput output; - output.position = float4(input.position, 1.f); - - return output; -} \ No newline at end of file +// small trick, temporary, we will have a separate rule for compiling this ext and embed into Nabla DLL +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 0df39d351..34ba5b5af 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -167,19 +167,19 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp const uint32_t texturesCount = assets.size(); auto computeBindings = std::to_array ({ - {.binding = 0, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 1, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 2, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 3, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = texturesCount, .immutableSamplers = nullptr} + {.binding = 0, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 1, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 2, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 3, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr} }); auto pixelBindings = std::to_array ({ - {.binding = 0, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 1, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 2, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = texturesCount, .immutableSamplers = nullptr}, - {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = 1u, .immutableSamplers = nullptr} + {.binding = 0, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 1, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 2, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, + {.binding = 10, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = 1u, .immutableSamplers = nullptr} }); smart_refctd_ptr generalSampler; @@ -219,40 +219,44 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return logFail("Failed to create descriptor set layout!"); auto range = std::to_array({ {stage_flags_t::ESS_FRAGMENT, 0u, sizeof(PushConstants)} }); - auto graphicsPipelineLayout = m_device->createPipelineLayout(range, nullptr, nullptr, nullptr, core::smart_refctd_ptr(descriptorSetLayout)); + auto graphicsPipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); if(not graphicsPipelineLayout) return logFail("Failed to create pipeline layout!"); + IGPUPipelineBase::SShaderEntryMap specConstants; + const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = { - { .shader = vertex.get(), .entryPoint = "VSMain" }, + { .shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, { .shader = pixel.get(), .entryPoint = "PSMain" } }; auto params = std::to_array({ {} }); + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; params[0].layout = graphicsPipelineLayout.get(); - params[0].cached = { - .vertexInput = {}, - .primitiveAssembly = { - .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, - }, + params[0].cached = + { + .vertexInput = {}, // full screen tri ext, no inputs + .primitiveAssembly = {}, .rasterization = { .polygonMode = EPM_FILL, .faceCullingMode = EFCM_NONE, .depthWriteEnable = false, }, - .blend = {} + .blend = {}, + .subpassIx = 0u }; - params[0].renderpass = scRes->getRenderpass(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) return logFail("Failed to create graphics pipeline!"); const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, dscLayoutPtrs); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), graphicDS.data()); { std::array, 4u> infos; From 7d50d65b36124ded561018ac8e86de41141c96a3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 19 Oct 2025 12:58:03 +0200 Subject: [PATCH 047/219] compute pipeline creation, single layout and descriptor to rule them, sampled & storage bits for my images --- 50.IESProfileTest/app_resources/PSInput.hlsl | 10 -- 50.IESProfileTest/app_resources/VSInput.hlsl | 11 -- 50.IESProfileTest/app_resources/common.hlsl | 13 ++ 50.IESProfileTest/app_resources/compute.hlsl | 7 - 50.IESProfileTest/app_resources/pixel.hlsl | 8 - 50.IESProfileTest/main.cpp | 156 +++++++++++-------- 6 files changed, 103 insertions(+), 102 deletions(-) delete mode 100644 50.IESProfileTest/app_resources/PSInput.hlsl delete mode 100644 50.IESProfileTest/app_resources/VSInput.hlsl diff --git a/50.IESProfileTest/app_resources/PSInput.hlsl b/50.IESProfileTest/app_resources/PSInput.hlsl deleted file mode 100644 index a4ed7b727..000000000 --- a/50.IESProfileTest/app_resources/PSInput.hlsl +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ -#define _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ - -#ifdef __HLSL_VERSION -struct PSInput -{ - float32_t4 position : SV_Position; -}; -#endif // __HLSL_VERSION -#endif // _NBL_THIS_EXAMPLE_PSINPUT_HLSL_ diff --git a/50.IESProfileTest/app_resources/VSInput.hlsl b/50.IESProfileTest/app_resources/VSInput.hlsl deleted file mode 100644 index 56dcae831..000000000 --- a/50.IESProfileTest/app_resources/VSInput.hlsl +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ -#define _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ - -#ifdef __HLSL_VERSION -struct VSInput -{ - [[vk::location(0)]] float3 position : POSITION; - [[vk::location(3)]] float3 normal : NORMAL; -}; -#endif // __HLSL_VERSION -#endif // _NBL_THIS_EXAMPLE_VSINPUT_HLSL_ diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl index 40ae07b40..9705c2282 100644 --- a/50.IESProfileTest/app_resources/common.hlsl +++ b/50.IESProfileTest/app_resources/common.hlsl @@ -51,4 +51,17 @@ struct PushConstants #endif // __HLSL_VERSION }; +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; +[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; +[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; +[[vk::push_constant]] struct PushConstants pc; +#endif // __HLSL_VERSION + #endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESProfileTest/app_resources/compute.hlsl b/50.IESProfileTest/app_resources/compute.hlsl index 03548ee5b..cf22466fc 100644 --- a/50.IESProfileTest/app_resources/compute.hlsl +++ b/50.IESProfileTest/app_resources/compute.hlsl @@ -4,13 +4,6 @@ #include "common.hlsl" -[[vk::binding(0, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; - -[[vk::push_constant]] struct PushConstants pc; - float32_t3 octahedronUVToDir(float64_t2 uv) { float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); diff --git a/50.IESProfileTest/app_resources/pixel.hlsl b/50.IESProfileTest/app_resources/pixel.hlsl index c985afffc..5fe452b2d 100644 --- a/50.IESProfileTest/app_resources/pixel.hlsl +++ b/50.IESProfileTest/app_resources/pixel.hlsl @@ -6,14 +6,6 @@ #include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" using namespace nbl::hlsl::ext::FullScreenTriangle; -[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; -[[vk::binding(10, 0)]] SamplerState generalSampler; - -[[vk::push_constant]] struct PushConstants pc; - float32_t2 iesDirToUv(float32_t3 dir) { float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 34ba5b5af..02a066029 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -163,25 +163,23 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp using stage_flags_t = asset::IShader::E_SHADER_STAGE; static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); + static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; - const uint32_t texturesCount = assets.size(); - auto computeBindings = std::to_array - ({ - {.binding = 0, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 1, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 2, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 3, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_COMPUTE, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr} - }); + //! singe descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier + //! TODO: maybe could use subpass dependencies if this becomes part of renderpass, instead of a barrier - auto pixelBindings = std::to_array + #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } + #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } + static constexpr auto bindings = std::to_array ({ - {.binding = 0, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 1, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 2, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 3, .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = TexturesCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr}, - {.binding = 10, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = stage_flags_t::ESS_FRAGMENT, .count = 1u, .immutableSamplers = nullptr} + BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela + BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical + BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction + BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask + BINDING_SAMPLER(0u + 100u) }); + const uint32_t texturesCount = assets.size(); smart_refctd_ptr generalSampler; { IGPUSampler::SParams params; @@ -205,94 +203,121 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return false; } - generalSampler->setObjectDebugName("Default IES sampler"); + generalSampler->setObjectDebugName("General IES sampler"); } auto scRes = static_cast(m_surface->getSwapchainResources()); - scRes->getRenderpass(); - - // Graphics Pipeline + scRes->getRenderpass(); // note it also creates rp if nulled { - auto descriptorSetLayout = m_device->createDescriptorSetLayout(pixelBindings); + auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); if(not descriptorSetLayout) return logFail("Failed to create descriptor set layout!"); - auto range = std::to_array({ {stage_flags_t::ESS_FRAGMENT, 0u, sizeof(PushConstants)} }); - auto graphicsPipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); + auto range = std::to_array({ {stage_flags_t::ESS_ALL_OR_LIBRARY, 0u, sizeof(PushConstants)} }); + auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); - if(not graphicsPipelineLayout) + if(not pipelineLayout) return logFail("Failed to create pipeline layout!"); - IGPUPipelineBase::SShaderEntryMap specConstants; - const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); - specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; - - video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + // Graphics Pipeline { - { .shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, - { .shader = pixel.get(), .entryPoint = "PSMain" } - }; - - auto params = std::to_array({ {} }); - params[0].renderpass = scRes->getRenderpass(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; - params[0].layout = graphicsPipelineLayout.get(); - params[0].cached = + IGPUPipelineBase::SShaderEntryMap specConstants; + const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + { + {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, + {.shader = pixel.get(), .entryPoint = "PSMain" } + }; + + auto params = std::to_array({ {} }); + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + params[0].layout = pipelineLayout.get(); + params[0].cached = + { + .vertexInput = {}, // full screen tri ext, no inputs + .primitiveAssembly = {}, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {}, + .subpassIx = 0u + }; + + if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) + return logFail("Failed to create graphics pipeline!"); + } + + // Compute Pipeline { - .vertexInput = {}, // full screen tri ext, no inputs - .primitiveAssembly = {}, - .rasterization = { - .polygonMode = EPM_FILL, - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - }, - .blend = {}, - .subpassIx = 0u - }; - - if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) - return logFail("Failed to create graphics pipeline!"); + auto params = std::to_array({ {} });; + params[0].layout = pipelineLayout.get(); + params[0].shader.shader = compute.get(); + params[0].shader.entryPoint = "main"; + if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) + return logFail("Failed to create compute pipeline!"); + } const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); - pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), graphicDS.data()); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); { - std::array, 4u> infos; + std::array, 4u + 1u> infos; + #define FILL_INFO(DESC, IX) \ + { \ + auto& info = infos[IX].emplace_back(); \ + info.desc = DESC; \ + info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ + } + for (uint32_t i = 0; i < assets.size(); ++i) { auto& ies = assets[i]; - #define FILL_INFO(DESC, IX) \ - { \ - auto& info = infos[IX].emplace_back(); \ - info.desc = DESC; \ - info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; \ - } - FILL_INFO(ies.views.candela, 0u) FILL_INFO(ies.views.spherical, 1u) FILL_INFO(ies.views.direction, 2u) FILL_INFO(ies.views.mask, 3u) } + FILL_INFO(generalSampler, 4u); + auto* samplerInfo = infos.back().data(); + samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writes; - for (uint32_t i = 0; i < infos.size(); ++i) + std::array writes; + for (uint32_t i = 0; i < 4u; ++i) { auto& write = writes[i]; write.count = assets.size(); write.info = infos[i].data(); - write.dstSet = graphicDS[0u].get(); + write.dstSet = descriptors[0u].get(); write.arrayElement = 0u; write.binding = i; } + for (uint32_t i = 4u; i < 8u; ++i) + { + auto ix = i - 4u; + auto& write = writes[i] = writes[ix]; + write.binding = ix + 10u; + } + + auto& write = writes.back(); + write.count = 1u; + write.info = samplerInfo; + write.dstSet = descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = 0u + 100u; + if (!m_device->updateDescriptorSets(writes, {})) return logFail("Failed to write descriptor sets"); } } - } return true; @@ -490,11 +515,9 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } }; - std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> graphicDS; - std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> computeDS; - smart_refctd_ptr graphicsPipeline; smart_refctd_ptr computePipeline; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; bool running = true; std::vector assets; @@ -570,6 +593,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp imageParams.flags = IImage::ECF_NONE; imageParams.arrayLayers = 1u; imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; auto image = m_device->createImage(std::move(imageParams)); image->setObjectDebugName(name.c_str()); From c46f8f8755a26d5e3d1ba9587769902e4025248a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 20 Oct 2025 10:37:48 +0200 Subject: [PATCH 048/219] get some IES on screen, update init & render code. TODO: I need barriers to transitions layouts and sema waits --- 24_ColorSpaceTest/main.cpp | 1 + 50.IESProfileTest/main.cpp | 110 +++++++++++++++++++++++++------------ 2 files changed, 76 insertions(+), 35 deletions(-) diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 84c55ef3a..6bf466d64 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -794,6 +794,7 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B }; cmdbuf->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->pushConstants(m_pipeline->getLayout(),hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(push_constants_t),&pc); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS,m_pipeline->getLayout(),3,1,&ds); diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 02a066029..81679b91a 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -4,6 +4,7 @@ #include "nbl/examples/examples.hpp" #include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "app_resources/common.hlsl" #include "AppInputParser.hpp" @@ -214,12 +215,23 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if(not descriptorSetLayout) return logFail("Failed to create descriptor set layout!"); - auto range = std::to_array({ {stage_flags_t::ESS_ALL_OR_LIBRARY, 0u, sizeof(PushConstants)} }); + auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); if(not pipelineLayout) return logFail("Failed to create pipeline layout!"); + // Compute Pipeline + { + auto params = std::to_array({ {} });; + params[0].layout = pipelineLayout.get(); + params[0].shader.shader = compute.get(); + params[0].shader.entryPoint = "main"; + + if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) + return logFail("Failed to create compute pipeline!"); + } + // Graphics Pipeline { IGPUPipelineBase::SShaderEntryMap specConstants; @@ -254,16 +266,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return logFail("Failed to create graphics pipeline!"); } - // Compute Pipeline - { - auto params = std::to_array({ {} });; - params[0].layout = pipelineLayout.get(); - params[0].shader.shader = compute.get(); - params[0].shader.entryPoint = "main"; - if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) - return logFail("Failed to create compute pipeline!"); - } - const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); @@ -313,27 +315,39 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp write.dstSet = descriptors[0u].get(); write.arrayElement = 0u; write.binding = 0u + 100u; - + if (!m_device->updateDescriptorSets(writes, {})) return logFail("Failed to write descriptor sets"); } } } + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < m_cmdBufs.size(); i++) + { + if (!pool) + return logFail("Couldn't create command pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + return logFail("Couldn't create command buffer!"); + } + onAppInitializedFinish(); + return true; } inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override { - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cb->beginDebugMarker("IESViewer Frame"); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); { mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); }, m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); }, m_logger.get()); @@ -353,25 +367,45 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } } - asset::SViewport viewport; + auto* descriptor = descriptors[0].get(); + + // Compute { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = m_window->getWidth(); - viewport.height = m_window->getHeight(); + cb->beginDebugMarker("IES::compute"); + auto* layout = computePipeline->getLayout(); + cb->bindComputePipeline(computePipeline.get()); + cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; + cb->dispatch(xGroups, xGroups, 1); + + // TODO: barier + + cb->endDebugMarker(); } - cb->setViewport(0u, 1u, &viewport); - VkRect2D scissor = + // Graphics { - .offset = { 0, 0 }, - .extent = { m_window->getWidth(), m_window->getHeight() }, - }; - cb->setScissor(0u, 1u, &scissor); + cb->beginDebugMarker("IES::render"); + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = m_window->getWidth(); + viewport.height = m_window->getHeight(); + } + cb->setViewport(0u, 1u, &viewport); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); - { const VkRect2D currentRenderArea = { .offset = {0,0}, @@ -390,12 +424,18 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp }; cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + auto* layout = graphicsPipeline->getLayout(); + cb->bindGraphicsPipeline(graphicsPipeline.get()); + cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + cb->end(); } - cb->endRenderPass(); - cb->endDebugMarker(); - cb->end(); - IQueue::SSubmitInfo::SSemaphoreInfo retval = { .semaphore = m_semaphore.get(), From 3e02a71b672c7ea9d123251db52957a4ee4c0c9d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 20 Oct 2025 13:12:19 +0200 Subject: [PATCH 049/219] add pipeline barriers for an active image in render loop, I also need to handle init transition from UNDEFINED layout --- 50.IESProfileTest/main.cpp | 84 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 81679b91a..5fdb82482 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -367,11 +367,14 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } } - auto* descriptor = descriptors[0].get(); + auto* const descriptor = descriptors[0].get(); + auto* const image = ies.getActiveImage(); // Compute { cb->beginDebugMarker("IES::compute"); + ies.barrier(cb, image); + auto* layout = computePipeline->getLayout(); cb->bindComputePipeline(computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); @@ -387,6 +390,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp // Graphics { cb->beginDebugMarker("IES::render"); + ies.barrier(cb, image); asset::SViewport viewport; { @@ -542,8 +546,8 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp asset::SAssetBundle bundle; std::string key; - float zDegree; - E_MODE mode; + float zDegree = 0.f; + E_MODE mode = EM_CDC; inline const asset::CIESProfile* getProfile() const { @@ -553,6 +557,80 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp return nullptr; } + + inline video::IGPUImage* getActiveImage() const + { + switch (mode) + { + case EM_IES_C: + return views.candela->getCreationParameters().image.get(); + case EM_SPERICAL_C: + return views.spherical->getCreationParameters().image.get(); + case EM_DIRECTION: + return views.direction->getCreationParameters().image.get(); + case EM_PASS_T_MASK: + return views.mask->getCreationParameters().image.get(); + + case EM_CDC: + default: + return nullptr; + } + } + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + { + if (not image) + return false; + + if (not cb) + return false; + + using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; + const auto& params = image->getCreationParameters(); + const IGPUImage::SSubresourceRange range = + { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = params.mipLevels, + .baseArrayLayer = 0u, + .layerCount = params.arrayLayers + }; + + image_memory_barrier_t imageBarrier = + { + .barrier = {.dep = {}}, + .image = image, + .subresourceRange = range, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = newLayout + }; + + if constexpr (newLayout == IImage::LAYOUT::GENERAL) + { + // READ_ONLY_OPTIMAL -> GENERAL, RW + imageBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + imageBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + imageBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + imageBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + imageBarrier.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + } + else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + { + // GENERAL -> READ_ONLY_OPTIMAL, RO + imageBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + imageBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + imageBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + imageBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + imageBarrier.oldLayout = IImage::LAYOUT::GENERAL; + } + + if constexpr (undefined) + imageBarrier.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init + + return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = { &imageBarrier, 1 } }); + } }; smart_refctd_ptr graphicsPipeline; From ceada5e891a70b1fcc8d456c4bd50ef3106ee181 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 20 Oct 2025 16:12:30 +0200 Subject: [PATCH 050/219] transition undefined images' layout on init, get rid of validations errors on first frame when switching between active images --- 50.IESProfileTest/main.cpp | 164 ++++++++++++++++++++++++++++--------- 1 file changed, 126 insertions(+), 38 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 5fdb82482..4d551c4e9 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -326,14 +326,87 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); - auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - for (auto i = 0u; i < m_cmdBufs.size(); i++) + using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; + + auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); if (!pool) return logFail("Couldn't create command pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) return logFail("Couldn't create command buffer!"); + return true; + }; + + // render loop command buffers + if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) + return false; + + // transient command buffer + { + auto* queue = getGraphicsQueue(); + auto cbs = std::to_array({ smart_refctd_ptr() }); + if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) + return false; + + std::vector images; + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); + images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); + images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); + images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); + } + + auto* cb = cbs.front().get(); + cb->setObjectDebugName("Transient Command Buffer"); + + if(not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return logFail("Couldn't begin command buffer!"); + + if(not IES::barrier(cb, images)) + return logFail("Failed to record pipeline barriers!"); + + if(not cb->end()) + return logFail("Couldn't end command buffer!"); + + core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); + semaphore->setObjectDebugName("Scratch Semaphore"); + { + IQueue::SSubmitInfo::SSemaphoreInfo signal = + { + .semaphore = semaphore.get(), + .value = 1u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; + + const IQueue::SSubmitInfo infos[] = + { { + .waitSemaphores = {}, + .commandBuffers = cmds, + .signalSemaphores = {&signal,1} + } }; + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + return logFail("Failed to submit queue!"); + } + + { + const ISemaphore::SWaitInfo infos[] = + { { + .semaphore = semaphore.get(), + .value = 1u + } }; + + if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) + return logFail("Couldn't block for scratch semaphore!"); + } } + onAppInitializedFinish(); return true; @@ -368,12 +441,12 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } auto* const descriptor = descriptors[0].get(); - auto* const image = ies.getActiveImage(); + auto* image = ies.getActiveImage(); // Compute { cb->beginDebugMarker("IES::compute"); - ies.barrier(cb, image); + IES::barrier(cb, image); auto* layout = computePipeline->getLayout(); cb->bindComputePipeline(computePipeline.get()); @@ -390,7 +463,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp // Graphics { cb->beginDebugMarker("IES::render"); - ies.barrier(cb, image); + IES::barrier(cb, image); asset::SViewport viewport; { @@ -579,57 +652,72 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp template requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) { - if (not image) + if (images.empty()) return false; if (not cb) return false; using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; - const auto& params = image->getCreationParameters(); const IGPUImage::SSubresourceRange range = { .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, .baseMipLevel = 0u, - .levelCount = params.mipLevels, + .levelCount = 1u, .baseArrayLayer = 0u, - .layerCount = params.arrayLayers + .layerCount = 1u }; - image_memory_barrier_t imageBarrier = - { - .barrier = {.dep = {}}, - .image = image, - .subresourceRange = range, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = newLayout - }; + std::vector imageBarriers(images.size()); - if constexpr (newLayout == IImage::LAYOUT::GENERAL) + for (uint32_t i = 0; i < imageBarriers.size(); ++i) { - // READ_ONLY_OPTIMAL -> GENERAL, RW - imageBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - imageBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - imageBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - imageBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - imageBarrier.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - } - else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - { - // GENERAL -> READ_ONLY_OPTIMAL, RO - imageBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - imageBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - imageBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - imageBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - imageBarrier.oldLayout = IImage::LAYOUT::GENERAL; + auto &it = imageBarriers[i] = + { + .barrier = {.dep = {}}, + .image = images[i], + .subresourceRange = range, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = newLayout + }; + + if constexpr (newLayout == IImage::LAYOUT::GENERAL) + { + // READ_ONLY_OPTIMAL -> GENERAL, RW + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + } + else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + { + // GENERAL -> READ_ONLY_OPTIMAL, RO + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.oldLayout = IImage::LAYOUT::GENERAL; + } + + if constexpr (undefined) + it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init } - if constexpr (undefined) - imageBarrier.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init + return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); + } + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + { + if (not image) + return false; - return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = { &imageBarrier, 1 } }); + auto in = std::to_array({ image }); + return barrier(cb, in); } }; From adeaca2504329ce514e3f1363c2fa9786ca25fd4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 21 Oct 2025 12:43:56 +0200 Subject: [PATCH 051/219] get test imgui window on screen with IES input, fix some runtime bugs with asset IXs --- 50.IESProfileTest/CMakeLists.txt | 15 +++- 50.IESProfileTest/main.cpp | 130 +++++++++++++++++++++++++------ 2 files changed, 116 insertions(+), 29 deletions(-) diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index d5ef049d2..78ff81aac 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -1,10 +1,16 @@ +if(NBL_BUILD_IMGUI) set(SRCs - AppInputParser.cpp - AppInputParser.hpp + AppInputParser.cpp AppInputParser.hpp inputs.json ) -nbl_create_executable_project("${SRCs}" "" "" "" "") +set(LIBs + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" +) + +nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") @@ -64,4 +70,5 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) +endif() \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 4d551c4e9..f47344132 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -5,6 +5,9 @@ #include "nbl/examples/examples.hpp" #include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" #include "app_resources/common.hlsl" #include "AppInputParser.hpp" @@ -18,13 +21,6 @@ using namespace video; using namespace scene; using namespace nbl::examples; -#define BENCHMARK_TILL_FIRST_FRAME - -#ifdef BENCHMARK_TILL_FIRST_FRAME -const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); -bool stopBenchamrkFlag = false; -#endif - constexpr static std::string_view InputsJson = "../inputs.json"; constexpr static std::string_view MediaEntry = "../../media"; @@ -36,7 +32,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp public: IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 1280,720 }, EF_D16_UNORM, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + device_base_t({ 640,640 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { } inline bool onAppInitialized(smart_refctd_ptr&& system) override @@ -271,7 +267,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); { std::array, 4u + 1u> infos; - #define FILL_INFO(DESC, IX) \ +#define FILL_INFO(DESC, IX) \ { \ auto& info = infos[IX].emplace_back(); \ info.desc = DESC; \ @@ -283,9 +279,9 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp auto& ies = assets[i]; FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) } FILL_INFO(generalSampler, 4u); auto* samplerInfo = infos.back().data(); @@ -322,6 +318,61 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } } + // imGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = 0u,.bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u,.bindingIx = 1u }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, 2u + MaxFramesInFlight); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + + auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); + if (not imgui) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + + { + const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + ui.descriptor = make_smart_refctd_ptr(std::move(ds)); + if (!ui.descriptor) + return logFail("Failed to create the descriptor set"); + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + ui.descriptor->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = 0u, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({ &write,1 }, {})) + return logFail("Failed to write the descriptor set"); + } + + imgui->registerListener([this]() + { + ImGui::SetNextWindowSize(ImVec2(200.0f, 200.0f), ImGuiCond_FirstUseEver); + if (ImGui::Begin("test", nullptr, ImGuiWindowFlags_None)) + { + ImGui::TextUnformatted("test text"); + } + ImGui::End(); + }); + } + m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); @@ -422,8 +473,24 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp m_inputSystem->getDefaultMouse(&mouse); m_inputSystem->getDefaultKeyboard(&keyboard); { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); }, m_logger.get()); + struct + { + std::vector mouse {}; std::vector keyboard {}; + } captured; + + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = captured.mouse, + .keyboardEvents = captured.keyboard + }; + + ui.it->update(params); } auto& ies = assets[activeAssetIx]; @@ -447,16 +514,12 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp { cb->beginDebugMarker("IES::compute"); IES::barrier(cb, image); - auto* layout = computePipeline->getLayout(); cb->bindComputePipeline(computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; cb->dispatch(xGroups, xGroups, 1); - - // TODO: barier - cb->endDebugMarker(); } @@ -504,9 +567,22 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp { auto* layout = graphicsPipeline->getLayout(); cb->bindGraphicsPipeline(graphicsPipeline.get()); - cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, 1, &descriptor); + cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); ext::FullScreenTriangle::recordDrawCall(cb); + { + auto* imgui = ui.it.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + const auto* ds = ui.descriptor->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, wait)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } } cb->endRenderPass(); cb->endDebugMarker(); @@ -735,6 +811,11 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; + struct { + smart_refctd_ptr it; + smart_refctd_ptr descriptor; + } ui; + // TODO: lets have this stuff in nice imgui void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { @@ -747,7 +828,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp auto& ies = assets[activeAssetIx]; auto* profile = ies.getProfile(); - auto impulse = ev.scrollEvent.verticalScroll; + auto impulse = ev.scrollEvent.verticalScroll * 0.01f; ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); } } @@ -761,13 +842,12 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) { - auto& ies = assets[activeAssetIx]; - auto* profile = ies.getProfile(); - if (ev.keyCode == nbl::ui::EKC_UP_ARROW) - activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size()); + activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); else if(ev.keyCode == nbl::ui::EKC_DOWN_ARROW) - activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size()); + activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); + + auto& ies = assets[activeAssetIx]; if (ev.keyCode == nbl::ui::EKC_C) ies.mode = EM_CDC; From 349a850d4eb51619c33228c676eb1c84ac0f4a54 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 21 Oct 2025 14:11:43 +0200 Subject: [PATCH 052/219] precompile imGUI shaders & use in IES example, O3 optimization for HLSL inputs --- 50.IESProfileTest/CMakeLists.txt | 19 +++++++++++++++- .../app_resources/imgui.opts.hlsl | 16 ++++++++++++++ .../app_resources/imgui.pixel.hlsl | 6 +++++ .../app_resources/imgui.vertex.hlsl | 5 +++++ 50.IESProfileTest/main.cpp | 22 ++++++++++++------- 5 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 50.IESProfileTest/app_resources/imgui.opts.hlsl create mode 100644 50.IESProfileTest/app_resources/imgui.pixel.hlsl create mode 100644 50.IESProfileTest/app_resources/imgui.vertex.hlsl diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 78ff81aac..13ee22dc8 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -19,6 +19,9 @@ set(DEPENDS app_resources/compute.hlsl app_resources/pixel.hlsl app_resources/vertex.hlsl + app_resources/imgui.vertex.hlsl + app_resources/imgui.pixel.hlsl + app_resources/imgui.opts.hlsl ) target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) @@ -46,6 +49,20 @@ set(JSON [=[ "COMPILE_OPTIONS": ["-T", "lib_${SM}"], "DEPENDS": [], "CAPS": [] + }, + { + "INPUT": "app_resources/imgui.vertex.hlsl", + "KEY": "imgui.vertex", + "COMPILE_OPTIONS": ["-T", "lib_${SM}"], + "DEPENDS": [], + "CAPS": [] + }, + { + "INPUT": "app_resources/imgui.pixel.hlsl", + "KEY": "imgui.pixel", + "COMPILE_OPTIONS": ["-T", "lib_${SM}"], + "DEPENDS": [], + "CAPS": [] } ] ]=]) @@ -57,7 +74,7 @@ NBL_CREATE_NSC_COMPILE_RULES( DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} -O3 OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build diff --git a/50.IESProfileTest/app_resources/imgui.opts.hlsl b/50.IESProfileTest/app_resources/imgui.opts.hlsl new file mode 100644 index 000000000..54f502b0f --- /dev/null +++ b/50.IESProfileTest/app_resources/imgui.opts.hlsl @@ -0,0 +1,16 @@ +#ifndef _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#define NBL_TEXTURES_BINDING_IX 0u +#define NBL_SAMPLER_STATES_BINDING_IX 1u +#define NBL_TEXTURES_SET_IX 0u +#define NBL_SAMPLER_STATES_SET_IX 0u +#define NBL_TEXTURES_COUNT 5u +#define NBL_SAMPLERS_COUNT 2u + +#endif // _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + diff --git a/50.IESProfileTest/app_resources/imgui.pixel.hlsl b/50.IESProfileTest/app_resources/imgui.pixel.hlsl new file mode 100644 index 000000000..fe93c3a70 --- /dev/null +++ b/50.IESProfileTest/app_resources/imgui.pixel.hlsl @@ -0,0 +1,6 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "imgui.opts.hlsl" +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" diff --git a/50.IESProfileTest/app_resources/imgui.vertex.hlsl b/50.IESProfileTest/app_resources/imgui.vertex.hlsl new file mode 100644 index 000000000..2063db84b --- /dev/null +++ b/50.IESProfileTest/app_resources/imgui.vertex.hlsl @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index f47344132..a4c4ef709 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -9,6 +9,7 @@ #include "nbl/ext/ImGui/ImGui.h" #include "imgui/imgui_internal.h" #include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" #include "AppInputParser.hpp" using namespace nbl; @@ -142,13 +143,15 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp #define CREATE_SHADER(SHADER, PATH) \ if (!(SHADER = createShader.template operator()() )) return false; - smart_refctd_ptr compute, pixel, vertex; + smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; { m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); auto start = std::chrono::high_resolution_clock::now(); CREATE_SHADER(compute, "compute") CREATE_SHADER(pixel, "pixel") CREATE_SHADER(vertex, "vertex") + CREATE_SHADER(imguiVertex, "imgui.vertex") + CREATE_SHADER(imguiPixel, "imgui.pixel") auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); @@ -267,7 +270,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); { std::array, 4u + 1u> infos; -#define FILL_INFO(DESC, IX) \ + #define FILL_INFO(DESC, IX) \ { \ auto& info = infos[IX].emplace_back(); \ info.desc = DESC; \ @@ -279,9 +282,9 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp auto& ies = assets[i]; FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) } FILL_INFO(generalSampler, 4u); auto* samplerInfo = infos.back().data(); @@ -322,16 +325,19 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp { auto scRes = static_cast(m_surface->getSwapchainResources()); ext::imgui::UI::SCreationParameters params = {}; - params.resources.texturesInfo = { .setIx = 0u,.bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u,.bindingIx = 1u }; + params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; + params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; params.utilities = m_utils; params.transfer = getTransferUpQueue(); - params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, 2u + MaxFramesInFlight); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); params.subpassIx = 0u; params.pipelineCache = nullptr; + using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; + params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); + auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); if (not imgui) return logFail("Failed to create `nbl::ext::imgui::UI` class"); From bdbae93a6389414dac1e47f59fb5bb74d99e68ec Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 12:15:30 +0200 Subject: [PATCH 053/219] a little of UI, will add a few more controls I have some small artifacts which I need to inspect when I'm done with UI --- 50.IESProfileTest/main.cpp | 149 ++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 11 deletions(-) diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index a4c4ef709..5a7e04c5d 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -370,12 +370,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp imgui->registerListener([this]() { - ImGui::SetNextWindowSize(ImVec2(200.0f, 200.0f), ImGuiCond_FirstUseEver); - if (ImGui::Begin("test", nullptr, ImGuiWindowFlags_None)) - { - ImGui::TextUnformatted("test text"); - } - ImGui::End(); + uiListener(); }); } @@ -484,8 +479,8 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp std::vector mouse {}; std::vector keyboard {}; } captured; - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); const auto cursorPosition = m_window->getCursorControl()->getPosition(); ext::imgui::UI::SUpdateParameters params = @@ -822,7 +817,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr descriptor; } ui; - // TODO: lets have this stuff in nice imgui void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { for (auto it = events.begin(); it != events.end(); it++) @@ -834,7 +828,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp auto& ies = assets[activeAssetIx]; auto* profile = ies.getProfile(); - auto impulse = ev.scrollEvent.verticalScroll * 0.01f; + auto impulse = ev.scrollEvent.verticalScroll * 0.02f; ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); } } @@ -871,7 +865,6 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp } } } - // <- core::smart_refctd_ptr createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) { @@ -985,6 +978,140 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp out.mode = in.mode; out.texIx = activeAssetIx; } + + inline void uiListener() + { + auto& ies = assets[activeAssetIx]; + const auto name = path(ies.key).filename().string(); + auto* profile = ies.getProfile(); + const float lowerBound = (float)profile->getHoriAngles().front(); + const float upperBound = (float)profile->getHoriAngles().back(); + const bool singleAngle = (upperBound == lowerBound); + + auto getModeRS = [&]() + { + switch (ies.mode) + { + case EM_CDC: + return "Candlepower Distribution Curve"; + case EM_IES_C: + return "Sample IES Candela"; + case EM_SPERICAL_C: + return "Sample Spherical Coordinates"; + case EM_DIRECTION: + return "Sample Direction"; + case EM_PASS_T_MASK: + return "Sample Pass Mask"; + default: + return "ERROR (view)"; + } + }; + + auto getSymmetryRS = [&]() + { + switch (profile->getSymmetry()) + { + case asset::CIESProfile::ISOTROPIC: + return "ISOTROPIC"; + case asset::CIESProfile::QUAD_SYMETRIC: + return "QUAD_SYMETRIC"; + case asset::CIESProfile::HALF_SYMETRIC: + return "HALF_SYMETRIC"; + case asset::CIESProfile::OTHER_HALF_SYMMETRIC: + return "OTHER_HALF_SYMMETRIC"; + case asset::CIESProfile::NO_LATERAL_SYMMET: + return "NO_LATERAL_SYMMET"; + default: + return "ERROR (symmetry)"; + } + }; + + auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); + const ImGuiViewport* vp = ImGui::GetMainViewport(); + { + ImDrawList* fg = ImGui::GetForegroundDrawList(); + float x = vp->Pos.x + 8.f; + float y = vp->Pos.y + 8.f; + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), getModeRS()); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), getSymmetryRS()); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); + y += ImGui::GetTextLineHeightWithSpacing(); + + char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); + } + + { + const float pad = 8.f; + const float sliderW = 74.f; + const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); + ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; + if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) + { + ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); + ImVec2 rmin = ImGui::GetItemRectMin(); + ImVec2 rmax = ImGui::GetItemRectMax(); + ImDrawList* dl = ImGui::GetWindowDrawList(); + ImU32 col = IM_COL32(220, 60, 60, 255); + + float knobR = 7.f; + float trackX = rmax.x - 12.f; + float y0 = rmin.y + knobR + 1.f; + float y1 = rmax.y - knobR - 1.f; + + dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + + if (singleAngle) + { + float y = (y0 + y1) * 0.5f; + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + else + { + for (int i = 0; i < 5; ++i) + { + float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); + float t = (v - lowerBound) / (upperBound - lowerBound); + float y = y1 - t * (y1 - y0); + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + } + + float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); + float knobY = y1 - t * (y1 - y0); + dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); + dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + + if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + { + float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); + float nt = (y1 - my) / (y1 - y0); + angle = lowerBound + nt * (upperBound - lowerBound); + } + } + ImGui::End(); + ImGui::PopStyleVar(2); + } + + ies.zDegree = angle; + } }; NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From ac85bdb781c77fdde694a3da81a8f5831d17e96d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:57:36 +0200 Subject: [PATCH 054/219] shorten json payload --- 50.IESProfileTest/CMakeLists.txt | 34 ++++++++++++-------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 13ee22dc8..8a03714af 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -31,50 +31,42 @@ set(JSON [=[ [ { "INPUT": "app_resources/compute.hlsl", - "KEY": "compute", - "COMPILE_OPTIONS": ["-T", "lib_${SM}"], - "DEPENDS": [], - "CAPS": [] + "KEY": "compute" }, { "INPUT": "app_resources/pixel.hlsl", - "KEY": "pixel", - "COMPILE_OPTIONS": ["-T", "lib_${SM}"], - "DEPENDS": [], - "CAPS": [] + "KEY": "pixel" }, { "INPUT": "app_resources/vertex.hlsl", - "KEY": "vertex", - "COMPILE_OPTIONS": ["-T", "lib_${SM}"], - "DEPENDS": [], - "CAPS": [] + "KEY": "vertex" }, { "INPUT": "app_resources/imgui.vertex.hlsl", - "KEY": "imgui.vertex", - "COMPILE_OPTIONS": ["-T", "lib_${SM}"], - "DEPENDS": [], - "CAPS": [] + "KEY": "imgui.vertex" }, { "INPUT": "app_resources/imgui.pixel.hlsl", - "KEY": "imgui.pixel", - "COMPILE_OPTIONS": ["-T", "lib_${SM}"], - "DEPENDS": [], - "CAPS": [] + "KEY": "imgui.pixel" } ] ]=]) string(CONFIGURE "${JSON}" JSON) +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to imgui ext headers which are not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} -O3 + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build From b5edbdab73bec6c4c2d4ef0fe473d8980057bf73 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 27 Oct 2025 13:34:49 +0100 Subject: [PATCH 055/219] split sources, I will need to add framebuffers + UI integration before I go to 3D IES viewer --- 50.IESProfileTest/App.cpp | 16 + 50.IESProfileTest/App.hpp | 59 ++ 50.IESProfileTest/AppEvent.cpp | 54 ++ 50.IESProfileTest/AppGPU.cpp | 100 +++ 50.IESProfileTest/AppInit.cpp | 438 ++++++++++ 50.IESProfileTest/AppInputParser.cpp | 11 +- 50.IESProfileTest/AppInputParser.hpp | 1 - 50.IESProfileTest/AppRender.cpp | 227 ++++++ 50.IESProfileTest/AppUI.cpp | 105 +++ 50.IESProfileTest/CMakeLists.txt | 4 +- 50.IESProfileTest/IES.cpp | 71 ++ 50.IESProfileTest/IES.hpp | 118 +++ 50.IESProfileTest/main.cpp | 1113 +------------------------- 13 files changed, 1194 insertions(+), 1123 deletions(-) create mode 100644 50.IESProfileTest/App.cpp create mode 100644 50.IESProfileTest/App.hpp create mode 100644 50.IESProfileTest/AppEvent.cpp create mode 100644 50.IESProfileTest/AppGPU.cpp create mode 100644 50.IESProfileTest/AppInit.cpp create mode 100644 50.IESProfileTest/AppRender.cpp create mode 100644 50.IESProfileTest/AppUI.cpp create mode 100644 50.IESProfileTest/IES.cpp create mode 100644 50.IESProfileTest/IES.hpp diff --git a/50.IESProfileTest/App.cpp b/50.IESProfileTest/App.cpp new file mode 100644 index 000000000..60195da32 --- /dev/null +++ b/50.IESProfileTest/App.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +#define APP_WINDOW_WIDTH 640 +#define APP_WINDOW_HEIGHT 640 +#define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN + +IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ APP_WINDOW_WIDTH, APP_WINDOW_HEIGHT }, APP_DEPTH_BUFFER_FORMAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) +{ + // empty +} \ No newline at end of file diff --git a/50.IESProfileTest/App.hpp b/50.IESProfileTest/App.hpp new file mode 100644 index 000000000..f06e8fc14 --- /dev/null +++ b/50.IESProfileTest/App.hpp @@ -0,0 +1,59 @@ +#ifndef _THIS_EXAMPLE_APP_HPP_ +#define _THIS_EXAMPLE_APP_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "IES.hpp" + +NBL_EXPOSE_NAMESPACES + +class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD); + + bool onAppInitialized(smart_refctd_ptr&& system) override; + IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override; + +protected: + const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; + +private: + smart_refctd_ptr graphicsPipeline; + smart_refctd_ptr computePipeline; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; + + bool running = true; + std::vector assets; + size_t activeAssetIx = 0; + + size_t m_realFrameIx = 0; + smart_refctd_ptr m_semaphore; + std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + struct { + smart_refctd_ptr it; + smart_refctd_ptr descriptor; + } ui; + + void processMouse(const IMouseEventChannel::range_t& events); + void processKeyboard(const IKeyboardEventChannel::range_t& events); + + smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name); + smart_refctd_ptr createBuffer(const core::vector& in, std::string name); + + void uiListener(); +}; + +#endif // _THIS_EXAMPLE_APP_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/AppEvent.cpp b/50.IESProfileTest/AppEvent.cpp new file mode 100644 index 000000000..07a11f5e9 --- /dev/null +++ b/50.IESProfileTest/AppEvent.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + auto ev = *it; + + if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) + { + auto& ies = assets[activeAssetIx]; + auto* profile = ies.getProfile(); + + auto impulse = ev.scrollEvent.verticalScroll * 0.02f; + ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); + } + } +} + +void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + const auto ev = *it; + + if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + { + if (ev.keyCode == nbl::ui::EKC_UP_ARROW) + activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); + else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) + activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); + + auto& ies = assets[activeAssetIx]; + + if (ev.keyCode == nbl::ui::EKC_C) + ies.mode = IES::EM_CDC; + else if (ev.keyCode == nbl::ui::EKC_V) + ies.mode = IES::EM_IES_C; + else if (ev.keyCode == nbl::ui::EKC_S) + ies.mode = IES::EM_SPERICAL_C; + else if (ev.keyCode == nbl::ui::EKC_D) + ies.mode = IES::EM_DIRECTION; + else if (ev.keyCode == nbl::ui::EKC_M) + ies.mode = IES::EM_PASS_T_MASK; + + if (ev.keyCode == nbl::ui::EKC_Q) + running = false; + } + } +} \ No newline at end of file diff --git a/50.IESProfileTest/AppGPU.cpp b/50.IESProfileTest/AppGPU.cpp new file mode 100644 index 000000000..aa13994d6 --- /dev/null +++ b/50.IESProfileTest/AppGPU.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) +{ + IGPUImage::SCreationParams imageParams{}; + imageParams.type = IImage::E_TYPE::ET_2D; + imageParams.extent.height = height; + imageParams.extent.width = width; + imageParams.extent.depth = 1u; + imageParams.format = format; + imageParams.mipLevels = 1u; + imageParams.flags = IImage::ECF_NONE; + imageParams.arrayLayers = 1u; + imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; + + auto image = m_device->createImage(std::move(imageParams)); + image->setObjectDebugName(name.c_str()); + + if (!image) + { + m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!allocation.isValid()) + { + m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + IGPUImageView::SCreationParams viewParams{}; + viewParams.image = std::move(image); + viewParams.format = format; + viewParams.viewType = IGPUImageView::ET_2D; + viewParams.flags = IImageViewBase::ECF_NONE; + viewParams.subresourceRange.baseArrayLayer = 0u; + viewParams.subresourceRange.baseMipLevel = 0u; + viewParams.subresourceRange.layerCount = 1u; + viewParams.subresourceRange.levelCount = 1u; + viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); + + auto imageView = m_device->createImageView(std::move(viewParams)); + + if (not imageView) + m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + + return imageView; +} + +core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) +{ + IGPUBuffer::SCreationParams bufferParams = {}; + bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; + bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); + + auto buffer = m_device->createBuffer(std::move(bufferParams)); + buffer->setObjectDebugName(name.c_str()); + + if (not buffer) + { + m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto memoryReqs = buffer->getMemoryReqs(); + + if (m_utils) + memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); + if (not allocation.isValid()) + { + m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); + + if (not mappedPointer) + { + m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + memcpy(mappedPointer, in.data(), buffer->getSize()); + + if (not allocation.memory->unmap()) + { + m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + return buffer; +} \ No newline at end of file diff --git a/50.IESProfileTest/AppInit.cpp b/50.IESProfileTest/AppInit.cpp new file mode 100644 index 000000000..60fbb54b6 --- /dev/null +++ b/50.IESProfileTest/AppInit.cpp @@ -0,0 +1,438 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "AppInputParser.hpp" +#include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#define MEDIA_ENTRY "../../media" +#define INPUT_JSON_FILE "../inputs.json" + +bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) +{ + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + const auto media = absolute(path(MEDIA_ENTRY)); + + AppInputParser::Output out; + AppInputParser parser(system::logger_opt_ptr(m_logger.get())); + if (!parser.parse(out, INPUT_JSON_FILE, media.string())) + return false; + + m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + size_t loaded = {}, total = out.inputList.size(); + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + + for (const auto& in : out.inputList) + { + auto asset = m_assetMgr->getAsset(in.c_str(), lp); + + if (asset.getMetadata()) + { + auto& ies = assets.emplace_back(); + ies.bundle = std::move(asset); + ies.key = path(in).lexically_relative(media).string(); + ++loaded; + + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); + } + else + m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); + } + const auto sl = std::to_string(loaded), st = std::to_string(total); + const bool passed = loaded == total; + + if (not passed) + { + auto diff = std::to_string(total - loaded); + m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + for (auto& ies : assets) + { + const auto* profile = ies.getProfile(); + const auto resolution = profile->getOptimalIESResolution(); + + #define CREATE_VIEW(VIEW, FORMAT, NAME) \ + if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; + + CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") + CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") + CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") + CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") + + #define CREATE_BUFFER(BUFFER, DATA, NAME) \ + if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; + + CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") + CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") + CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + auto createShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto assets = assetBundle.getContents(); + + if (assets.empty()) + { + m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); + return nullptr; + } + + auto spirvShader = IAsset::castDown(assets[0]); + + if (spirvShader) + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); + else + m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); + + return spirvShader; + }; + + #define CREATE_SHADER(SHADER, PATH) \ + if (!(SHADER = createShader.template operator()() )) return false; + + m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); + smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; + { + auto start = std::chrono::high_resolution_clock::now(); + CREATE_SHADER(compute, "compute") + CREATE_SHADER(pixel, "pixel") + CREATE_SHADER(vertex, "vertex") + CREATE_SHADER(imguiVertex, "imgui.vertex") + CREATE_SHADER(imguiPixel, "imgui.pixel") + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + // Pipelines & Descriptor Sets + { + using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + using stage_flags_t = asset::IShader::E_SHADER_STAGE; + static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; + static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); + static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; + + //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier + #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } + #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } + static constexpr auto bindings = std::to_array + ({ + BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela + BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical + BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction + BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask + BINDING_SAMPLER(0u + 100u) + }); + + const uint32_t texturesCount = assets.size(); + smart_refctd_ptr generalSampler; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; + params.MinFilter = ISampler::ETF_LINEAR; + params.MaxFilter = ISampler::ETF_LINEAR; + params.MipmapMode = ISampler::ESMM_LINEAR; + params.AnisotropicFilter = 0u; + params.CompareEnable = false; + params.CompareFunc = ISampler::ECO_ALWAYS; + + generalSampler = m_device->createSampler(params); + + if (not generalSampler) + { + m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); + return false; + } + + generalSampler->setObjectDebugName("General IES sampler"); + } + + auto scRes = static_cast(m_surface->getSwapchainResources()); + scRes->getRenderpass(); // note it also creates rp if nulled + { + auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + + if (not descriptorSetLayout) + return logFail("Failed to create descriptor set layout!"); + + auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); + auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); + + if (not pipelineLayout) + return logFail("Failed to create pipeline layout!"); + + // Compute Pipeline + { + auto params = std::to_array({ {} });; + params[0].layout = pipelineLayout.get(); + params[0].shader.shader = compute.get(); + params[0].shader.entryPoint = "main"; + + if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) + return logFail("Failed to create compute pipeline!"); + } + + // Graphics Pipeline + { + IGPUPipelineBase::SShaderEntryMap specConstants; + const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + { + {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, + {.shader = pixel.get(), .entryPoint = "PSMain" } + }; + + auto params = std::to_array({ {} }); + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + params[0].layout = pipelineLayout.get(); + params[0].cached = + { + .vertexInput = {}, // full screen tri ext, no inputs + .primitiveAssembly = {}, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {}, + .subpassIx = 0u + }; + + if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) + return logFail("Failed to create graphics pipeline!"); + } + + const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); + { + std::array, 4u + 1u> infos; + #define FILL_INFO(DESC, IX) \ + { \ + auto& info = infos[IX].emplace_back(); \ + info.desc = DESC; \ + info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ + } + + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + FILL_INFO(ies.views.candela, 0u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) + } + FILL_INFO(generalSampler, 4u); + auto* samplerInfo = infos.back().data(); + samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writes; + for (uint32_t i = 0; i < 4u; ++i) + { + auto& write = writes[i]; + write.count = assets.size(); + write.info = infos[i].data(); + write.dstSet = descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = i; + } + + for (uint32_t i = 4u; i < 8u; ++i) + { + auto ix = i - 4u; + auto& write = writes[i] = writes[ix]; + write.binding = ix + 10u; + } + + auto& write = writes.back(); + write.count = 1u; + write.info = samplerInfo; + write.dstSet = descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = 0u + 100u; + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write descriptor sets"); + } + } + } + + // imGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; + params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + + using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; + params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); + + auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); + if (not imgui) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + + { + const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + ui.descriptor = make_smart_refctd_ptr(std::move(ds)); + if (!ui.descriptor) + return logFail("Failed to create the descriptor set"); + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + ui.descriptor->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = 0u, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({ &write,1 }, {})) + return logFail("Failed to write the descriptor set"); + } + + imgui->registerListener([this]() + { + uiListener(); + }); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; + + auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool + { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); + if (!pool) + return logFail("Couldn't create command pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) + return logFail("Couldn't create command buffer!"); + return true; + }; + + // render loop command buffers + if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) + return false; + + // transient command buffer + { + auto* queue = getGraphicsQueue(); + auto cbs = std::to_array({ smart_refctd_ptr() }); + if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) + return false; + + std::vector images; + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); + images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); + images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); + images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); + } + + auto* cb = cbs.front().get(); + cb->setObjectDebugName("Transient Command Buffer"); + + if (not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return logFail("Couldn't begin command buffer!"); + + if (not IES::barrier(cb, images)) + return logFail("Failed to record pipeline barriers!"); + + if (not cb->end()) + return logFail("Couldn't end command buffer!"); + + core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); + semaphore->setObjectDebugName("Scratch Semaphore"); + { + IQueue::SSubmitInfo::SSemaphoreInfo signal = + { + .semaphore = semaphore.get(), + .value = 1u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; + + const IQueue::SSubmitInfo infos[] = + { { + .waitSemaphores = {}, + .commandBuffers = cmds, + .signalSemaphores = {&signal,1} + } }; + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + return logFail("Failed to submit queue!"); + } + + { + const ISemaphore::SWaitInfo infos[] = + { { + .semaphore = semaphore.get(), + .value = 1u + } }; + + if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) + return logFail("Couldn't block for scratch semaphore!"); + } + } + + onAppInitializedFinish(); + + return true; +} \ No newline at end of file diff --git a/50.IESProfileTest/AppInputParser.cpp b/50.IESProfileTest/AppInputParser.cpp index 30e4767f3..0f236969b 100644 --- a/50.IESProfileTest/AppInputParser.cpp +++ b/50.IESProfileTest/AppInputParser.cpp @@ -3,16 +3,9 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "AppInputParser.hpp" +#include "nlohmann/json.hpp" -using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; -using namespace scene; -using namespace nbl::examples; +NBL_EXPOSE_NAMESPACES using namespace nlohmann; bool AppInputParser::parse(Output& out, const std::string input, const std::string cwd) diff --git a/50.IESProfileTest/AppInputParser.hpp b/50.IESProfileTest/AppInputParser.hpp index c5df7cab5..18b5e4fe3 100644 --- a/50.IESProfileTest/AppInputParser.hpp +++ b/50.IESProfileTest/AppInputParser.hpp @@ -6,7 +6,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/examples/examples.hpp" -#include "nlohmann/json.hpp" struct AppInputParser { diff --git a/50.IESProfileTest/AppRender.cpp b/50.IESProfileTest/AppRender.cpp new file mode 100644 index 000000000..136d6d63b --- /dev/null +++ b/50.IESProfileTest/AppRender.cpp @@ -0,0 +1,227 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "app_resources/common.hlsl" + +IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) +{ + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + { + struct + { + std::vector mouse{}; std::vector keyboard{}; + } captured; + + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = captured.mouse, + .keyboardEvents = captured.keyboard + }; + + ui.it->update(params); + } + + auto& ies = assets[activeAssetIx]; + PushConstants pc; + { + pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); + pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); + pc.dataBDA = ies.buffers.data->getDeviceAddress(); + + const auto* profile = ies.getProfile(); + + pc.maxIValue = profile->getMaxCandelaValue(); + pc.vAnglesCount = profile->getVertAngles().size(); + pc.hAnglesCount = profile->getHoriAngles().size(); + pc.dataCount = profile->getData().size(); + + pc.zAngleDegreeRotation = ies.zDegree; + pc.mode = ies.mode; + pc.texIx = activeAssetIx; + } + + for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity + { + auto bound = buffer->getBoundMemory(); + if (bound.memory->haveToMakeVisible()) + { + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); + m_device->flushMappedMemoryRanges(1, &range); + } + } + + auto* const descriptor = descriptors[0].get(); + auto* image = ies.getActiveImage(); + + // Compute + { + cb->beginDebugMarker("IES::compute"); + IES::barrier(cb, image); + auto* layout = computePipeline->getLayout(); + cb->bindComputePipeline(computePipeline.get()); + cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; + cb->dispatch(xGroups, xGroups, 1); + cb->endDebugMarker(); + } + + // Graphics + { + cb->beginDebugMarker("IES::render"); + IES::barrier(cb, image); + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = m_window->getWidth(); + viewport.height = m_window->getHeight(); + } + cb->setViewport(0u, 1u, &viewport); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + auto* layout = graphicsPipeline->getLayout(); + cb->bindGraphicsPipeline(graphicsPipeline.get()); + cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + ext::FullScreenTriangle::recordDrawCall(cb); + { + auto* imgui = ui.it.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + const auto* ds = ui.descriptor->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, wait)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + cb->end(); + } + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] IES Viewer"; + { + m_window->setCaption(caption); + } + return retval; +} + +const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::getDefaultSubpassDependencies() const +{ + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; +} \ No newline at end of file diff --git a/50.IESProfileTest/AppUI.cpp b/50.IESProfileTest/AppUI.cpp new file mode 100644 index 000000000..c4efc9ccf --- /dev/null +++ b/50.IESProfileTest/AppUI.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "imgui/imgui_internal.h" +#include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" + +void IESViewer::uiListener() +{ + auto& ies = assets[activeAssetIx]; + const auto name = path(ies.key).filename().string(); + auto* profile = ies.getProfile(); + const float lowerBound = (float)profile->getHoriAngles().front(); + const float upperBound = (float)profile->getHoriAngles().back(); + const bool singleAngle = (upperBound == lowerBound); + + auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); + const ImGuiViewport* vp = ImGui::GetMainViewport(); + { + ImDrawList* fg = ImGui::GetForegroundDrawList(); + float x = vp->Pos.x + 8.f; + float y = vp->Pos.y + 8.f; + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(ies.mode)); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(profile->getSymmetry())); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); + y += ImGui::GetTextLineHeightWithSpacing(); + + char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); + } + + { + const float pad = 8.f; + const float sliderW = 74.f; + const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); + ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; + + if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) + { + ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); + ImVec2 rmin = ImGui::GetItemRectMin(); + ImVec2 rmax = ImGui::GetItemRectMax(); + ImDrawList* dl = ImGui::GetWindowDrawList(); + ImU32 col = IM_COL32(220, 60, 60, 255); + + float knobR = 7.f; + float trackX = rmax.x - 12.f; + float y0 = rmin.y + knobR + 1.f; + float y1 = rmax.y - knobR - 1.f; + + dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + + if (singleAngle) + { + float y = (y0 + y1) * 0.5f; + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + else + { + for (int i = 0; i < 5; ++i) + { + float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); + float t = (v - lowerBound) / (upperBound - lowerBound); + float y = y1 - t * (y1 - y0); + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + } + + float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); + float knobY = y1 - t * (y1 - y0); + dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); + dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + + if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + { + float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); + float nt = (y1 - my) / (y1 - y0); + angle = lowerBound + nt * (upperBound - lowerBound); + } + } + ImGui::End(); + ImGui::PopStyleVar(2); + } + + ies.zDegree = angle; +} \ No newline at end of file diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 8a03714af..ca1c66488 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -1,6 +1,8 @@ if(NBL_BUILD_IMGUI) set(SRCs - AppInputParser.cpp AppInputParser.hpp + App.cpp AppInit.cpp AppRender.cpp AppGPU.cpp AppUI.cpp AppEvent.cpp AppInputParser.cpp + App.hpp AppInputParser.hpp + IES.cpp IES.hpp inputs.json ) diff --git a/50.IESProfileTest/IES.cpp b/50.IESProfileTest/IES.cpp new file mode 100644 index 000000000..357d3d88b --- /dev/null +++ b/50.IESProfileTest/IES.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "IES.hpp" + +const asset::CIESProfile* IES::getProfile() const +{ + auto* meta = bundle.getMetadata(); + if (meta) + return &meta->selfCast()->profile; + + return nullptr; +} + +video::IGPUImage* IES::getActiveImage() const +{ + switch (mode) + { + case EM_IES_C: + return views.candela->getCreationParameters().image.get(); + case EM_SPERICAL_C: + return views.spherical->getCreationParameters().image.get(); + case EM_DIRECTION: + return views.direction->getCreationParameters().image.get(); + case EM_PASS_T_MASK: + return views.mask->getCreationParameters().image.get(); + + case EM_CDC: + default: + return nullptr; + } +} + +const char* IES::modeToRS(E_MODE mode) +{ + switch (mode) + { + case IES::EM_CDC: + return "Candlepower Distribution Curve"; + case IES::EM_IES_C: + return "Sample IES Candela"; + case IES::EM_SPERICAL_C: + return "Sample Spherical Coordinates"; + case IES::EM_DIRECTION: + return "Sample Direction"; + case IES::EM_PASS_T_MASK: + return "Sample Pass Mask"; + default: + return "ERROR (mode)"; + } +} + +const char* IES::symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry) +{ + switch (symmetry) + { + case asset::CIESProfile::ISOTROPIC: + return "ISOTROPIC"; + case asset::CIESProfile::QUAD_SYMETRIC: + return "QUAD_SYMETRIC"; + case asset::CIESProfile::HALF_SYMETRIC: + return "HALF_SYMETRIC"; + case asset::CIESProfile::OTHER_HALF_SYMMETRIC: + return "OTHER_HALF_SYMMETRIC"; + case asset::CIESProfile::NO_LATERAL_SYMMET: + return "NO_LATERAL_SYMMET"; + default: + return "ERROR (symmetry)"; + } +} \ No newline at end of file diff --git a/50.IESProfileTest/IES.hpp b/50.IESProfileTest/IES.hpp new file mode 100644 index 000000000..04485366f --- /dev/null +++ b/50.IESProfileTest/IES.hpp @@ -0,0 +1,118 @@ +#ifndef _THIS_EXAMPLE_IES_HPP_ +#define _THIS_EXAMPLE_IES_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" + +NBL_EXPOSE_NAMESPACES + +struct IES +{ + enum E_MODE : uint32_t + { + EM_CDC, //! Candlepower Distribution Curve + EM_IES_C, //! IES Candela + EM_SPERICAL_C, //! Sperical coordinates + EM_DIRECTION, //! Sample direction + EM_PASS_T_MASK, //! Test mask + + EM_SIZE + }; + + struct + { + smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; + } views; + + struct + { + smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; + } buffers; + + SAssetBundle bundle; + std::string key; + + float zDegree = 0.f; + E_MODE mode = EM_CDC; + + const asset::CIESProfile* getProfile() const; + video::IGPUImage* getActiveImage() const; + + static const char* modeToRS(E_MODE mode); + static const char* symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry); + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) + { + if (images.empty()) + return false; + + if (not cb) + return false; + + using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; + const IGPUImage::SSubresourceRange range = + { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + + std::vector imageBarriers(images.size()); + + for (uint32_t i = 0; i < imageBarriers.size(); ++i) + { + auto& it = imageBarriers[i] = + { + .barrier = {.dep = {}}, + .image = images[i], + .subresourceRange = range, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = newLayout + }; + + if constexpr (newLayout == IImage::LAYOUT::GENERAL) + { + // READ_ONLY_OPTIMAL -> GENERAL, RW + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + } + else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + { + // GENERAL -> READ_ONLY_OPTIMAL, RO + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.oldLayout = IImage::LAYOUT::GENERAL; + } + + if constexpr (undefined) + it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init + } + + return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); + } + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + { + if (not image) + return false; + + auto in = std::to_array({ image }); + return barrier(cb, in); + } +}; + +#endif // _THIS_EXAMPLE_IES_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 5a7e04c5d..2d9b3e7c3 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -2,1116 +2,5 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/examples/examples.hpp" -#include "nbl/this_example/builtin/build/spirv/keys.hpp" -#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "nbl/ui/ICursorControl.h" -#include "nbl/ext/ImGui/ImGui.h" -#include "imgui/imgui_internal.h" -#include "app_resources/common.hlsl" -#include "app_resources/imgui.opts.hlsl" -#include "AppInputParser.hpp" - -using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; -using namespace scene; -using namespace nbl::examples; - -constexpr static std::string_view InputsJson = "../inputs.json"; -constexpr static std::string_view MediaEntry = "../../media"; - -class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication -{ - using device_base_t = MonoWindowApplication; - using asset_base_t = BuiltinResourcesApplication; - -public: - IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 640,640 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { - } - - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - const auto media = absolute(path(MediaEntry.data())); - - AppInputParser::Output out; - AppInputParser parser(system::logger_opt_ptr(m_logger.get())); - if (!parser.parse(out, InputsJson.data(), media.string())) - return false; - - { - m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); - auto start = std::chrono::high_resolution_clock::now(); - size_t loaded = {}, total = out.inputList.size(); - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = system::logger_opt_ptr(m_logger.get()); - - for (const auto& in : out.inputList) - { - auto asset = m_assetMgr->getAsset(in.c_str(), lp); - - if (asset.getMetadata()) - { - auto& ies = assets.emplace_back(); - ies.bundle = std::move(asset); - ies.key = path(in).lexically_relative(media).string(); - ++loaded; - - m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); - } - else - m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); - } - const auto sl = std::to_string(loaded), st = std::to_string(total); - const bool passed = loaded == total; - - if (not passed) - { - auto diff = std::to_string(total - loaded); - m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); - } - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - { - m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); - auto start = std::chrono::high_resolution_clock::now(); - for (auto& ies : assets) - { - const auto* profile = ies.getProfile(); - const auto resolution = profile->getOptimalIESResolution(); - - #define CREATE_VIEW(VIEW, FORMAT, NAME) \ - if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; - - CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") - CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") - CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") - CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") - - #define CREATE_BUFFER(BUFFER, DATA, NAME) \ - if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; - - CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") - CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") - CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") - } - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - auto createShader = [&]() -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = system::logger_opt_ptr(m_logger.get()); - lp.workingDirectory = "app_resources"; - - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key, lp); - const auto assets = assetBundle.getContents(); - - if (assets.empty()) - { - m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); - return nullptr; - } - - auto spirvShader = IAsset::castDown(assets[0]); - - if (spirvShader) - m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); - else - m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); - - return spirvShader; - }; - - #define CREATE_SHADER(SHADER, PATH) \ - if (!(SHADER = createShader.template operator()() )) return false; - - smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; - { - m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); - auto start = std::chrono::high_resolution_clock::now(); - CREATE_SHADER(compute, "compute") - CREATE_SHADER(pixel, "pixel") - CREATE_SHADER(vertex, "vertex") - CREATE_SHADER(imguiVertex, "imgui.vertex") - CREATE_SHADER(imguiPixel, "imgui.pixel") - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - // Pipelines & Descriptor Sets - { - using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; - using stage_flags_t = asset::IShader::E_SHADER_STAGE; - static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; - static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); - static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; - - //! singe descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier - //! TODO: maybe could use subpass dependencies if this becomes part of renderpass, instead of a barrier - - #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } - #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } - static constexpr auto bindings = std::to_array - ({ - BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela - BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical - BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction - BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask - BINDING_SAMPLER(0u + 100u) - }); - - const uint32_t texturesCount = assets.size(); - smart_refctd_ptr generalSampler; - { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - params.MinFilter = ISampler::ETF_LINEAR; - params.MaxFilter = ISampler::ETF_LINEAR; - params.MipmapMode = ISampler::ESMM_LINEAR; - params.AnisotropicFilter = 0u; - params.CompareEnable = false; - params.CompareFunc = ISampler::ECO_ALWAYS; - - generalSampler = m_device->createSampler(params); - - if (not generalSampler) - { - m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); - return false; - } - - generalSampler->setObjectDebugName("General IES sampler"); - } - - auto scRes = static_cast(m_surface->getSwapchainResources()); - scRes->getRenderpass(); // note it also creates rp if nulled - { - auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - - if(not descriptorSetLayout) - return logFail("Failed to create descriptor set layout!"); - - auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); - auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); - - if(not pipelineLayout) - return logFail("Failed to create pipeline layout!"); - - // Compute Pipeline - { - auto params = std::to_array({ {} });; - params[0].layout = pipelineLayout.get(); - params[0].shader.shader = compute.get(); - params[0].shader.entryPoint = "main"; - - if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) - return logFail("Failed to create compute pipeline!"); - } - - // Graphics Pipeline - { - IGPUPipelineBase::SShaderEntryMap specConstants; - const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); - specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; - - video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = - { - {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, - {.shader = pixel.get(), .entryPoint = "PSMain" } - }; - - auto params = std::to_array({ {} }); - params[0].renderpass = scRes->getRenderpass(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; - params[0].layout = pipelineLayout.get(); - params[0].cached = - { - .vertexInput = {}, // full screen tri ext, no inputs - .primitiveAssembly = {}, - .rasterization = { - .polygonMode = EPM_FILL, - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - }, - .blend = {}, - .subpassIx = 0u - }; - - if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) - return logFail("Failed to create graphics pipeline!"); - } - - const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); - pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); - { - std::array, 4u + 1u> infos; - #define FILL_INFO(DESC, IX) \ - { \ - auto& info = infos[IX].emplace_back(); \ - info.desc = DESC; \ - info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ - } - - for (uint32_t i = 0; i < assets.size(); ++i) - { - auto& ies = assets[i]; - - FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) - } - FILL_INFO(generalSampler, 4u); - auto* samplerInfo = infos.back().data(); - samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writes; - for (uint32_t i = 0; i < 4u; ++i) - { - auto& write = writes[i]; - write.count = assets.size(); - write.info = infos[i].data(); - write.dstSet = descriptors[0u].get(); - write.arrayElement = 0u; - write.binding = i; - } - - for (uint32_t i = 4u; i < 8u; ++i) - { - auto ix = i - 4u; - auto& write = writes[i] = writes[ix]; - write.binding = ix + 10u; - } - - auto& write = writes.back(); - write.count = 1u; - write.info = samplerInfo; - write.dstSet = descriptors[0u].get(); - write.arrayElement = 0u; - write.binding = 0u + 100u; - - if (!m_device->updateDescriptorSets(writes, {})) - return logFail("Failed to write descriptor sets"); - } - } - } - - // imGUI - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::imgui::UI::SCreationParameters params = {}; - params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; - params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; - params.utilities = m_utils; - params.transfer = getTransferUpQueue(); - params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); - params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); - params.subpassIx = 0u; - params.pipelineCache = nullptr; - - using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; - params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); - - auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); - if (not imgui) - return logFail("Failed to create `nbl::ext::imgui::UI` class"); - - { - const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); - auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); - ui.descriptor = make_smart_refctd_ptr(std::move(ds)); - if (!ui.descriptor) - return logFail("Failed to create the descriptor set"); - { - auto dummy = SubAllocatedDescriptorSet::invalid_value; - ui.descriptor->multi_allocate(0, 1, &dummy); - assert(dummy == ext::imgui::UI::FontAtlasTexId); - } - IGPUDescriptorSet::SDescriptorInfo info = {}; - info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); - info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - const IGPUDescriptorSet::SWriteDescriptorSet write = { - .dstSet = ui.descriptor->getDescriptorSet(), - .binding = 0u, - .arrayElement = ext::imgui::UI::FontAtlasTexId, - .count = 1, - .info = &info - }; - if (!m_device->updateDescriptorSets({ &write,1 }, {})) - return logFail("Failed to write the descriptor set"); - } - - imgui->registerListener([this]() - { - uiListener(); - }); - } - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; - - auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool - { - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); - if (!pool) - return logFail("Couldn't create command pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) - return logFail("Couldn't create command buffer!"); - return true; - }; - - // render loop command buffers - if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) - return false; - - // transient command buffer - { - auto* queue = getGraphicsQueue(); - auto cbs = std::to_array({ smart_refctd_ptr() }); - if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) - return false; - - std::vector images; - for (uint32_t i = 0; i < assets.size(); ++i) - { - auto& ies = assets[i]; - - images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); - images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); - images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); - images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); - } - - auto* cb = cbs.front().get(); - cb->setObjectDebugName("Transient Command Buffer"); - - if(not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) - return logFail("Couldn't begin command buffer!"); - - if(not IES::barrier(cb, images)) - return logFail("Failed to record pipeline barriers!"); - - if(not cb->end()) - return logFail("Couldn't end command buffer!"); - - core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); - semaphore->setObjectDebugName("Scratch Semaphore"); - { - IQueue::SSubmitInfo::SSemaphoreInfo signal = - { - .semaphore = semaphore.get(), - .value = 1u, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - - const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; - - const IQueue::SSubmitInfo infos[] = - { { - .waitSemaphores = {}, - .commandBuffers = cmds, - .signalSemaphores = {&signal,1} - } }; - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - return logFail("Failed to submit queue!"); - } - - { - const ISemaphore::SWaitInfo infos[] = - { { - .semaphore = semaphore.get(), - .value = 1u - } }; - - if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) - return logFail("Couldn't block for scratch semaphore!"); - } - } - - onAppInitializedFinish(); - - return true; - } - - inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override - { - const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - { - struct - { - std::vector mouse {}; std::vector keyboard {}; - } captured; - - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { mouseProcess(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { keyboardProcess(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); - - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), - .displaySize = {m_window->getWidth(),m_window->getHeight()}, - .mouseEvents = captured.mouse, - .keyboardEvents = captured.keyboard - }; - - ui.it->update(params); - } - - auto& ies = assets[activeAssetIx]; - PushConstants pc; - updatePushConstants(pc, ies); - - for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity - { - auto bound = buffer->getBoundMemory(); - if (bound.memory->haveToMakeVisible()) - { - const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); - m_device->flushMappedMemoryRanges(1, &range); - } - } - - auto* const descriptor = descriptors[0].get(); - auto* image = ies.getActiveImage(); - - // Compute - { - cb->beginDebugMarker("IES::compute"); - IES::barrier(cb, image); - auto* layout = computePipeline->getLayout(); - cb->bindComputePipeline(computePipeline.get()); - cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); - const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; - cb->dispatch(xGroups, xGroups, 1); - cb->endDebugMarker(); - } - - // Graphics - { - cb->beginDebugMarker("IES::render"); - IES::barrier(cb, image); - - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = m_window->getWidth(); - viewport.height = m_window->getHeight(); - } - cb->setViewport(0u, 1u, &viewport); - - VkRect2D scissor = - { - .offset = { 0, 0 }, - .extent = { m_window->getWidth(), m_window->getHeight() }, - }; - cb->setScissor(0u, 1u, &scissor); - - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; - const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; - auto scRes = static_cast(m_surface->getSwapchainResources()); - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), - .colorClearValues = &clearValue, - .depthStencilClearValues = &depthValue, - .renderArea = currentRenderArea - }; - - cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - { - auto* layout = graphicsPipeline->getLayout(); - cb->bindGraphicsPipeline(graphicsPipeline.get()); - cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); - ext::FullScreenTriangle::recordDrawCall(cb); - { - auto* imgui = ui.it.get(); - auto* pipeline = imgui->getPipeline(); - cb->bindGraphicsPipeline(pipeline); - const auto* ds = ui.descriptor->getDescriptorSet(); - cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); - const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - if (!imgui->render(cb, wait)) - { - m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); - return {}; - } - } - } - cb->endRenderPass(); - cb->endDebugMarker(); - cb->end(); - } - - IQueue::SSubmitInfo::SSemaphoreInfo retval = - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - }; - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb } - }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { - { - .semaphore = device_base_t::getCurrentAcquire().semaphore, - .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval,1} - } - }; - - if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) - { - retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal - m_realFrameIx--; - } - - std::string caption = "[Nabla Engine] IES Viewer"; - { - m_window->setCaption(caption); - } - return retval; - } - -protected: - const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override - { - // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later - .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, - // don't want any writes to be available, we'll clear - .srcAccessMask = ACCESS_FLAGS::NONE, - // destination needs to wait as early as possible - // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // because depth and color get cleared first no read mask - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - // color from ATTACHMENT_OPTIMAL to PRESENT_SRC - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - return dependencies; - } - -private: - enum E_MODE : uint32_t - { - EM_CDC, //! Candlepower Distribution Curve - EM_IES_C, //! IES Candela - EM_SPERICAL_C, //! Sperical coordinates - EM_DIRECTION, //! Sample direction - EM_PASS_T_MASK, //! Test mask - - EM_SIZE - }; - - struct IES - { - struct - { - core::smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; - } views; - - struct - { - core::smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; - } buffers; - - asset::SAssetBundle bundle; - std::string key; - - float zDegree = 0.f; - E_MODE mode = EM_CDC; - - inline const asset::CIESProfile* getProfile() const - { - auto* meta = bundle.getMetadata(); - if (meta) - return &meta->selfCast()->profile; - - return nullptr; - } - - inline video::IGPUImage* getActiveImage() const - { - switch (mode) - { - case EM_IES_C: - return views.candela->getCreationParameters().image.get(); - case EM_SPERICAL_C: - return views.spherical->getCreationParameters().image.get(); - case EM_DIRECTION: - return views.direction->getCreationParameters().image.get(); - case EM_PASS_T_MASK: - return views.mask->getCreationParameters().image.get(); - - case EM_CDC: - default: - return nullptr; - } - } - - template - requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) - { - if (images.empty()) - return false; - - if (not cb) - return false; - - using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; - const IGPUImage::SSubresourceRange range = - { - .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - - std::vector imageBarriers(images.size()); - - for (uint32_t i = 0; i < imageBarriers.size(); ++i) - { - auto &it = imageBarriers[i] = - { - .barrier = {.dep = {}}, - .image = images[i], - .subresourceRange = range, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = newLayout - }; - - if constexpr (newLayout == IImage::LAYOUT::GENERAL) - { - // READ_ONLY_OPTIMAL -> GENERAL, RW - it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - } - else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - { - // GENERAL -> READ_ONLY_OPTIMAL, RO - it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - it.oldLayout = IImage::LAYOUT::GENERAL; - } - - if constexpr (undefined) - it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init - } - - return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); - } - - template - requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) - { - if (not image) - return false; - - auto in = std::to_array({ image }); - return barrier(cb, in); - } - }; - - smart_refctd_ptr graphicsPipeline; - smart_refctd_ptr computePipeline; - std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; - - bool running = true; - std::vector assets; - size_t activeAssetIx = 0; - - smart_refctd_ptr m_semaphore; - uint64_t m_realFrameIx = 0; - std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; - - struct { - smart_refctd_ptr it; - smart_refctd_ptr descriptor; - } ui; - - void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) - { - for (auto it = events.begin(); it != events.end(); it++) - { - auto ev = *it; - - if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) - { - auto& ies = assets[activeAssetIx]; - auto* profile = ies.getProfile(); - - auto impulse = ev.scrollEvent.verticalScroll * 0.02f; - ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); - } - } - } - - void keyboardProcess(const nbl::ui::IKeyboardEventChannel::range_t& events) - { - for (auto it = events.begin(); it != events.end(); it++) - { - const auto ev = *it; - - if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) - { - if (ev.keyCode == nbl::ui::EKC_UP_ARROW) - activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); - else if(ev.keyCode == nbl::ui::EKC_DOWN_ARROW) - activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); - - auto& ies = assets[activeAssetIx]; - - if (ev.keyCode == nbl::ui::EKC_C) - ies.mode = EM_CDC; - else if (ev.keyCode == nbl::ui::EKC_V) - ies.mode = EM_IES_C; - else if (ev.keyCode == nbl::ui::EKC_S) - ies.mode = EM_SPERICAL_C; - else if (ev.keyCode == nbl::ui::EKC_D) - ies.mode = EM_DIRECTION; - else if (ev.keyCode == nbl::ui::EKC_M) - ies.mode = EM_PASS_T_MASK; - - if (ev.keyCode == nbl::ui::EKC_Q) - running = false; - } - } - } - - core::smart_refctd_ptr createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) - { - IGPUImage::SCreationParams imageParams {}; - imageParams.type = IImage::E_TYPE::ET_2D; - imageParams.extent.height = height; - imageParams.extent.width = width; - imageParams.extent.depth = 1u; - imageParams.format = format; - imageParams.mipLevels = 1u; - imageParams.flags = IImage::ECF_NONE; - imageParams.arrayLayers = 1u; - imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; - - auto image = m_device->createImage(std::move(imageParams)); - image->setObjectDebugName(name.c_str()); - - if (!image) - { - m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); - if (!allocation.isValid()) - { - m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - IGPUImageView::SCreationParams viewParams {}; - viewParams.image = std::move(image); - viewParams.format = format; - viewParams.viewType = IGPUImageView::ET_2D; - viewParams.flags = IImageViewBase::ECF_NONE; - viewParams.subresourceRange.baseArrayLayer = 0u; - viewParams.subresourceRange.baseMipLevel = 0u; - viewParams.subresourceRange.layerCount = 1u; - viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); - - auto imageView = m_device->createImageView(std::move(viewParams)); - - if(not imageView) - m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - - return imageView; - } - - core::smart_refctd_ptr createBuffer(const core::vector& in, std::string name) - { - IGPUBuffer::SCreationParams bufferParams = {}; - bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; - bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); - - auto buffer = m_device->createBuffer(std::move(bufferParams)); - buffer->setObjectDebugName(name.c_str()); - - if (not buffer) - { - m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto memoryReqs = buffer->getMemoryReqs(); - - if(m_utils) - memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - - auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); - if (not allocation.isValid()) - { - m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); - - if (not mappedPointer) - { - m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - memcpy(mappedPointer, in.data(), buffer->getSize()); - - if (not allocation.memory->unmap()) - { - m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - return buffer; - } - - inline void updatePushConstants(PushConstants& out, const IES& in) - { - out.vAnglesBDA = in.buffers.vAngles->getDeviceAddress(); - out.hAnglesBDA = in.buffers.hAngles->getDeviceAddress(); - out.dataBDA = in.buffers.data->getDeviceAddress(); - - const auto* profile = in.getProfile(); - - out.maxIValue = profile->getMaxCandelaValue(); - out.vAnglesCount = profile->getVertAngles().size(); - out.hAnglesCount = profile->getHoriAngles().size(); - out.dataCount = profile->getData().size(); - - out.zAngleDegreeRotation = in.zDegree; - out.mode = in.mode; - out.texIx = activeAssetIx; - } - - inline void uiListener() - { - auto& ies = assets[activeAssetIx]; - const auto name = path(ies.key).filename().string(); - auto* profile = ies.getProfile(); - const float lowerBound = (float)profile->getHoriAngles().front(); - const float upperBound = (float)profile->getHoriAngles().back(); - const bool singleAngle = (upperBound == lowerBound); - - auto getModeRS = [&]() - { - switch (ies.mode) - { - case EM_CDC: - return "Candlepower Distribution Curve"; - case EM_IES_C: - return "Sample IES Candela"; - case EM_SPERICAL_C: - return "Sample Spherical Coordinates"; - case EM_DIRECTION: - return "Sample Direction"; - case EM_PASS_T_MASK: - return "Sample Pass Mask"; - default: - return "ERROR (view)"; - } - }; - - auto getSymmetryRS = [&]() - { - switch (profile->getSymmetry()) - { - case asset::CIESProfile::ISOTROPIC: - return "ISOTROPIC"; - case asset::CIESProfile::QUAD_SYMETRIC: - return "QUAD_SYMETRIC"; - case asset::CIESProfile::HALF_SYMETRIC: - return "HALF_SYMETRIC"; - case asset::CIESProfile::OTHER_HALF_SYMMETRIC: - return "OTHER_HALF_SYMMETRIC"; - case asset::CIESProfile::NO_LATERAL_SYMMET: - return "NO_LATERAL_SYMMET"; - default: - return "ERROR (symmetry)"; - } - }; - - auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); - const ImGuiViewport* vp = ImGui::GetMainViewport(); - { - ImDrawList* fg = ImGui::GetForegroundDrawList(); - float x = vp->Pos.x + 8.f; - float y = vp->Pos.y + 8.f; - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), getModeRS()); - y += ImGui::GetTextLineHeightWithSpacing(); - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), getSymmetryRS()); - y += ImGui::GetTextLineHeightWithSpacing(); - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); - y += ImGui::GetTextLineHeightWithSpacing(); - - char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); - } - - { - const float pad = 8.f; - const float sliderW = 74.f; - const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); - ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); - ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); - ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | - ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | - ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; - if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) - { - ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); - ImVec2 rmin = ImGui::GetItemRectMin(); - ImVec2 rmax = ImGui::GetItemRectMax(); - ImDrawList* dl = ImGui::GetWindowDrawList(); - ImU32 col = IM_COL32(220, 60, 60, 255); - - float knobR = 7.f; - float trackX = rmax.x - 12.f; - float y0 = rmin.y + knobR + 1.f; - float y1 = rmax.y - knobR - 1.f; - - dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); - - if (singleAngle) - { - float y = (y0 + y1) * 0.5f; - dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); - } - else - { - for (int i = 0; i < 5; ++i) - { - float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); - float t = (v - lowerBound) / (upperBound - lowerBound); - float y = y1 - t * (y1 - y0); - dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); - } - } - - float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); - float knobY = y1 - t * (y1 - y0); - dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); - dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); - - if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) - { - float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); - float nt = (y1 - my) / (y1 - y0); - angle = lowerBound + nt * (upperBound - lowerBound); - } - } - ImGui::End(); - ImGui::PopStyleVar(2); - } - - ies.zDegree = angle; - } -}; - +#include "App.hpp" NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 776f925b8fa72348aa41a687089af280a5a8f57c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 27 Oct 2025 13:48:17 +0100 Subject: [PATCH 056/219] rename 50. ex project directory, move ctor to main.cpp --- 50.IESViewer/App.hpp | 59 +++ 50.IESViewer/AppEvent.cpp | 54 +++ 50.IESViewer/AppGPU.cpp | 100 +++++ 50.IESViewer/AppInit.cpp | 438 +++++++++++++++++++ 50.IESViewer/AppInputParser.cpp | 108 +++++ 50.IESViewer/AppInputParser.hpp | 27 ++ 50.IESViewer/AppRender.cpp | 227 ++++++++++ 50.IESViewer/AppUI.cpp | 105 +++++ 50.IESViewer/CMakeLists.txt | 85 ++++ 50.IESViewer/IES.cpp | 71 +++ 50.IESViewer/IES.hpp | 118 +++++ 50.IESViewer/app_resources/common.hlsl | 67 +++ 50.IESViewer/app_resources/compute.hlsl | 222 ++++++++++ 50.IESViewer/app_resources/imgui.opts.hlsl | 16 + 50.IESViewer/app_resources/imgui.pixel.hlsl | 6 + 50.IESViewer/app_resources/imgui.vertex.hlsl | 5 + 50.IESViewer/app_resources/pixel.hlsl | 58 +++ 50.IESViewer/app_resources/vertex.hlsl | 6 + 50.IESViewer/inputs.json | 14 + 50.IESViewer/main.cpp | 18 + 20 files changed, 1804 insertions(+) create mode 100644 50.IESViewer/App.hpp create mode 100644 50.IESViewer/AppEvent.cpp create mode 100644 50.IESViewer/AppGPU.cpp create mode 100644 50.IESViewer/AppInit.cpp create mode 100644 50.IESViewer/AppInputParser.cpp create mode 100644 50.IESViewer/AppInputParser.hpp create mode 100644 50.IESViewer/AppRender.cpp create mode 100644 50.IESViewer/AppUI.cpp create mode 100644 50.IESViewer/CMakeLists.txt create mode 100644 50.IESViewer/IES.cpp create mode 100644 50.IESViewer/IES.hpp create mode 100644 50.IESViewer/app_resources/common.hlsl create mode 100644 50.IESViewer/app_resources/compute.hlsl create mode 100644 50.IESViewer/app_resources/imgui.opts.hlsl create mode 100644 50.IESViewer/app_resources/imgui.pixel.hlsl create mode 100644 50.IESViewer/app_resources/imgui.vertex.hlsl create mode 100644 50.IESViewer/app_resources/pixel.hlsl create mode 100644 50.IESViewer/app_resources/vertex.hlsl create mode 100644 50.IESViewer/inputs.json create mode 100644 50.IESViewer/main.cpp diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp new file mode 100644 index 000000000..f06e8fc14 --- /dev/null +++ b/50.IESViewer/App.hpp @@ -0,0 +1,59 @@ +#ifndef _THIS_EXAMPLE_APP_HPP_ +#define _THIS_EXAMPLE_APP_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "IES.hpp" + +NBL_EXPOSE_NAMESPACES + +class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD); + + bool onAppInitialized(smart_refctd_ptr&& system) override; + IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override; + +protected: + const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; + +private: + smart_refctd_ptr graphicsPipeline; + smart_refctd_ptr computePipeline; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; + + bool running = true; + std::vector assets; + size_t activeAssetIx = 0; + + size_t m_realFrameIx = 0; + smart_refctd_ptr m_semaphore; + std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + struct { + smart_refctd_ptr it; + smart_refctd_ptr descriptor; + } ui; + + void processMouse(const IMouseEventChannel::range_t& events); + void processKeyboard(const IKeyboardEventChannel::range_t& events); + + smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name); + smart_refctd_ptr createBuffer(const core::vector& in, std::string name); + + void uiListener(); +}; + +#endif // _THIS_EXAMPLE_APP_HPP_ \ No newline at end of file diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp new file mode 100644 index 000000000..07a11f5e9 --- /dev/null +++ b/50.IESViewer/AppEvent.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + auto ev = *it; + + if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) + { + auto& ies = assets[activeAssetIx]; + auto* profile = ies.getProfile(); + + auto impulse = ev.scrollEvent.verticalScroll * 0.02f; + ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); + } + } +} + +void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + const auto ev = *it; + + if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + { + if (ev.keyCode == nbl::ui::EKC_UP_ARROW) + activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); + else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) + activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); + + auto& ies = assets[activeAssetIx]; + + if (ev.keyCode == nbl::ui::EKC_C) + ies.mode = IES::EM_CDC; + else if (ev.keyCode == nbl::ui::EKC_V) + ies.mode = IES::EM_IES_C; + else if (ev.keyCode == nbl::ui::EKC_S) + ies.mode = IES::EM_SPERICAL_C; + else if (ev.keyCode == nbl::ui::EKC_D) + ies.mode = IES::EM_DIRECTION; + else if (ev.keyCode == nbl::ui::EKC_M) + ies.mode = IES::EM_PASS_T_MASK; + + if (ev.keyCode == nbl::ui::EKC_Q) + running = false; + } + } +} \ No newline at end of file diff --git a/50.IESViewer/AppGPU.cpp b/50.IESViewer/AppGPU.cpp new file mode 100644 index 000000000..aa13994d6 --- /dev/null +++ b/50.IESViewer/AppGPU.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) +{ + IGPUImage::SCreationParams imageParams{}; + imageParams.type = IImage::E_TYPE::ET_2D; + imageParams.extent.height = height; + imageParams.extent.width = width; + imageParams.extent.depth = 1u; + imageParams.format = format; + imageParams.mipLevels = 1u; + imageParams.flags = IImage::ECF_NONE; + imageParams.arrayLayers = 1u; + imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; + + auto image = m_device->createImage(std::move(imageParams)); + image->setObjectDebugName(name.c_str()); + + if (!image) + { + m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!allocation.isValid()) + { + m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + IGPUImageView::SCreationParams viewParams{}; + viewParams.image = std::move(image); + viewParams.format = format; + viewParams.viewType = IGPUImageView::ET_2D; + viewParams.flags = IImageViewBase::ECF_NONE; + viewParams.subresourceRange.baseArrayLayer = 0u; + viewParams.subresourceRange.baseMipLevel = 0u; + viewParams.subresourceRange.layerCount = 1u; + viewParams.subresourceRange.levelCount = 1u; + viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); + + auto imageView = m_device->createImageView(std::move(viewParams)); + + if (not imageView) + m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + + return imageView; +} + +core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) +{ + IGPUBuffer::SCreationParams bufferParams = {}; + bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; + bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); + + auto buffer = m_device->createBuffer(std::move(bufferParams)); + buffer->setObjectDebugName(name.c_str()); + + if (not buffer) + { + m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto memoryReqs = buffer->getMemoryReqs(); + + if (m_utils) + memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); + if (not allocation.isValid()) + { + m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); + + if (not mappedPointer) + { + m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + memcpy(mappedPointer, in.data(), buffer->getSize()); + + if (not allocation.memory->unmap()) + { + m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + return buffer; +} \ No newline at end of file diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp new file mode 100644 index 000000000..60fbb54b6 --- /dev/null +++ b/50.IESViewer/AppInit.cpp @@ -0,0 +1,438 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "AppInputParser.hpp" +#include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#define MEDIA_ENTRY "../../media" +#define INPUT_JSON_FILE "../inputs.json" + +bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) +{ + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + const auto media = absolute(path(MEDIA_ENTRY)); + + AppInputParser::Output out; + AppInputParser parser(system::logger_opt_ptr(m_logger.get())); + if (!parser.parse(out, INPUT_JSON_FILE, media.string())) + return false; + + m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + size_t loaded = {}, total = out.inputList.size(); + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + + for (const auto& in : out.inputList) + { + auto asset = m_assetMgr->getAsset(in.c_str(), lp); + + if (asset.getMetadata()) + { + auto& ies = assets.emplace_back(); + ies.bundle = std::move(asset); + ies.key = path(in).lexically_relative(media).string(); + ++loaded; + + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); + } + else + m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); + } + const auto sl = std::to_string(loaded), st = std::to_string(total); + const bool passed = loaded == total; + + if (not passed) + { + auto diff = std::to_string(total - loaded); + m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + for (auto& ies : assets) + { + const auto* profile = ies.getProfile(); + const auto resolution = profile->getOptimalIESResolution(); + + #define CREATE_VIEW(VIEW, FORMAT, NAME) \ + if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; + + CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") + CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") + CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") + CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") + + #define CREATE_BUFFER(BUFFER, DATA, NAME) \ + if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; + + CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") + CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") + CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + auto createShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto assets = assetBundle.getContents(); + + if (assets.empty()) + { + m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); + return nullptr; + } + + auto spirvShader = IAsset::castDown(assets[0]); + + if (spirvShader) + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); + else + m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); + + return spirvShader; + }; + + #define CREATE_SHADER(SHADER, PATH) \ + if (!(SHADER = createShader.template operator()() )) return false; + + m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); + smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; + { + auto start = std::chrono::high_resolution_clock::now(); + CREATE_SHADER(compute, "compute") + CREATE_SHADER(pixel, "pixel") + CREATE_SHADER(vertex, "vertex") + CREATE_SHADER(imguiVertex, "imgui.vertex") + CREATE_SHADER(imguiPixel, "imgui.pixel") + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + // Pipelines & Descriptor Sets + { + using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + using stage_flags_t = asset::IShader::E_SHADER_STAGE; + static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; + static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); + static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; + + //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier + #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } + #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } + static constexpr auto bindings = std::to_array + ({ + BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela + BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical + BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction + BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask + BINDING_SAMPLER(0u + 100u) + }); + + const uint32_t texturesCount = assets.size(); + smart_refctd_ptr generalSampler; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; + params.MinFilter = ISampler::ETF_LINEAR; + params.MaxFilter = ISampler::ETF_LINEAR; + params.MipmapMode = ISampler::ESMM_LINEAR; + params.AnisotropicFilter = 0u; + params.CompareEnable = false; + params.CompareFunc = ISampler::ECO_ALWAYS; + + generalSampler = m_device->createSampler(params); + + if (not generalSampler) + { + m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); + return false; + } + + generalSampler->setObjectDebugName("General IES sampler"); + } + + auto scRes = static_cast(m_surface->getSwapchainResources()); + scRes->getRenderpass(); // note it also creates rp if nulled + { + auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + + if (not descriptorSetLayout) + return logFail("Failed to create descriptor set layout!"); + + auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); + auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); + + if (not pipelineLayout) + return logFail("Failed to create pipeline layout!"); + + // Compute Pipeline + { + auto params = std::to_array({ {} });; + params[0].layout = pipelineLayout.get(); + params[0].shader.shader = compute.get(); + params[0].shader.entryPoint = "main"; + + if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) + return logFail("Failed to create compute pipeline!"); + } + + // Graphics Pipeline + { + IGPUPipelineBase::SShaderEntryMap specConstants; + const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + { + {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, + {.shader = pixel.get(), .entryPoint = "PSMain" } + }; + + auto params = std::to_array({ {} }); + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + params[0].layout = pipelineLayout.get(); + params[0].cached = + { + .vertexInput = {}, // full screen tri ext, no inputs + .primitiveAssembly = {}, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {}, + .subpassIx = 0u + }; + + if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) + return logFail("Failed to create graphics pipeline!"); + } + + const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); + { + std::array, 4u + 1u> infos; + #define FILL_INFO(DESC, IX) \ + { \ + auto& info = infos[IX].emplace_back(); \ + info.desc = DESC; \ + info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ + } + + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + FILL_INFO(ies.views.candela, 0u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) + } + FILL_INFO(generalSampler, 4u); + auto* samplerInfo = infos.back().data(); + samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writes; + for (uint32_t i = 0; i < 4u; ++i) + { + auto& write = writes[i]; + write.count = assets.size(); + write.info = infos[i].data(); + write.dstSet = descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = i; + } + + for (uint32_t i = 4u; i < 8u; ++i) + { + auto ix = i - 4u; + auto& write = writes[i] = writes[ix]; + write.binding = ix + 10u; + } + + auto& write = writes.back(); + write.count = 1u; + write.info = samplerInfo; + write.dstSet = descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = 0u + 100u; + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write descriptor sets"); + } + } + } + + // imGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; + params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + + using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; + params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); + + auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); + if (not imgui) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + + { + const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + ui.descriptor = make_smart_refctd_ptr(std::move(ds)); + if (!ui.descriptor) + return logFail("Failed to create the descriptor set"); + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + ui.descriptor->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = 0u, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({ &write,1 }, {})) + return logFail("Failed to write the descriptor set"); + } + + imgui->registerListener([this]() + { + uiListener(); + }); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; + + auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool + { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); + if (!pool) + return logFail("Couldn't create command pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) + return logFail("Couldn't create command buffer!"); + return true; + }; + + // render loop command buffers + if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) + return false; + + // transient command buffer + { + auto* queue = getGraphicsQueue(); + auto cbs = std::to_array({ smart_refctd_ptr() }); + if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) + return false; + + std::vector images; + for (uint32_t i = 0; i < assets.size(); ++i) + { + auto& ies = assets[i]; + + images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); + images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); + images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); + images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); + } + + auto* cb = cbs.front().get(); + cb->setObjectDebugName("Transient Command Buffer"); + + if (not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return logFail("Couldn't begin command buffer!"); + + if (not IES::barrier(cb, images)) + return logFail("Failed to record pipeline barriers!"); + + if (not cb->end()) + return logFail("Couldn't end command buffer!"); + + core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); + semaphore->setObjectDebugName("Scratch Semaphore"); + { + IQueue::SSubmitInfo::SSemaphoreInfo signal = + { + .semaphore = semaphore.get(), + .value = 1u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; + + const IQueue::SSubmitInfo infos[] = + { { + .waitSemaphores = {}, + .commandBuffers = cmds, + .signalSemaphores = {&signal,1} + } }; + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + return logFail("Failed to submit queue!"); + } + + { + const ISemaphore::SWaitInfo infos[] = + { { + .semaphore = semaphore.get(), + .value = 1u + } }; + + if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) + return logFail("Couldn't block for scratch semaphore!"); + } + } + + onAppInitializedFinish(); + + return true; +} \ No newline at end of file diff --git a/50.IESViewer/AppInputParser.cpp b/50.IESViewer/AppInputParser.cpp new file mode 100644 index 000000000..0f236969b --- /dev/null +++ b/50.IESViewer/AppInputParser.cpp @@ -0,0 +1,108 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "AppInputParser.hpp" +#include "nlohmann/json.hpp" + +NBL_EXPOSE_NAMESPACES +using namespace nlohmann; + +bool AppInputParser::parse(Output& out, const std::string input, const std::string cwd) +{ + const auto jInputFile = std::filesystem::absolute(input); + const auto sjInputFile = jInputFile.string(); + + std::ifstream file(sjInputFile.c_str()); + if (!file.is_open()) { + + logger.log("Could not open \"%s\" file.", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + std::stringstream buffer; + buffer << file.rdbuf(); + const auto jsonBuffer = buffer.str(); + + if (jsonBuffer.empty()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + const auto jsonMap = json::parse(jsonBuffer.c_str()); + + if (!jsonMap["directories"].is_array()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["files"].is_array()) + { + logger.log("\"%s\" file's field \"files\" is not an array!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["writeAssets"].is_boolean()) + { + logger.log("\"%s\" file's field \"writeAssets\" is not a boolean!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + auto addFile = [&](const std::string_view in) -> bool + { + auto path = std::filesystem::absolute(cwd / std::filesystem::path(in)); + + if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") + out.inputList.push_back(path.string()); + else + { + logger.log("Invalid \"%s\" input!", system::ILogger::ELL_ERROR, path.string().c_str()); + return false; + } + + return true; + }; + + auto addFiles = [&](const std::string_view directoryPath) -> bool + { + auto directory(std::filesystem::absolute(cwd / std::filesystem::path(directoryPath))); + if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) + { + logger.log("Invalid \"%s\" directory!", system::ILogger::ELL_ERROR, directory.string().c_str()); + return false; + } + + for (const auto& entry : std::filesystem::directory_iterator(directory)) + if (!addFile(entry.path().string().c_str())) + return false; + + return true; + }; + + // parse json + { + std::vector jDirectories; + jsonMap["directories"].get_to(jDirectories); + + for (const auto& it : jDirectories) + if (!addFiles(it)) + return false; + + std::vector jFiles; + jsonMap["files"].get_to(jFiles); + + for (const auto& it : jFiles) + if (!addFile(it)) + return false; + } + + out.withGUI = false; + jsonMap["gui"].get_to(out.withGUI); + + out.writeAssets = false; + jsonMap["writeAssets"].get_to(out.writeAssets); + + return true; +} \ No newline at end of file diff --git a/50.IESViewer/AppInputParser.hpp b/50.IESViewer/AppInputParser.hpp new file mode 100644 index 000000000..18b5e4fe3 --- /dev/null +++ b/50.IESViewer/AppInputParser.hpp @@ -0,0 +1,27 @@ +#ifndef _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ +#define _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" + +struct AppInputParser +{ +public: + struct Output + { + std::vector inputList; + bool withGUI; + bool writeAssets; + }; + + AppInputParser(nbl::system::logger_opt_ptr _logger = nullptr) : logger(_logger) {} + bool parse(Output& out, const std::string jFilePath, const std::string cwd = "."); + +private: + nbl::system::logger_opt_ptr logger; +}; + +#endif // _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ \ No newline at end of file diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp new file mode 100644 index 000000000..136d6d63b --- /dev/null +++ b/50.IESViewer/AppRender.cpp @@ -0,0 +1,227 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "app_resources/common.hlsl" + +IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) +{ + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + { + struct + { + std::vector mouse{}; std::vector keyboard{}; + } captured; + + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = captured.mouse, + .keyboardEvents = captured.keyboard + }; + + ui.it->update(params); + } + + auto& ies = assets[activeAssetIx]; + PushConstants pc; + { + pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); + pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); + pc.dataBDA = ies.buffers.data->getDeviceAddress(); + + const auto* profile = ies.getProfile(); + + pc.maxIValue = profile->getMaxCandelaValue(); + pc.vAnglesCount = profile->getVertAngles().size(); + pc.hAnglesCount = profile->getHoriAngles().size(); + pc.dataCount = profile->getData().size(); + + pc.zAngleDegreeRotation = ies.zDegree; + pc.mode = ies.mode; + pc.texIx = activeAssetIx; + } + + for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity + { + auto bound = buffer->getBoundMemory(); + if (bound.memory->haveToMakeVisible()) + { + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); + m_device->flushMappedMemoryRanges(1, &range); + } + } + + auto* const descriptor = descriptors[0].get(); + auto* image = ies.getActiveImage(); + + // Compute + { + cb->beginDebugMarker("IES::compute"); + IES::barrier(cb, image); + auto* layout = computePipeline->getLayout(); + cb->bindComputePipeline(computePipeline.get()); + cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; + cb->dispatch(xGroups, xGroups, 1); + cb->endDebugMarker(); + } + + // Graphics + { + cb->beginDebugMarker("IES::render"); + IES::barrier(cb, image); + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = m_window->getWidth(); + viewport.height = m_window->getHeight(); + } + cb->setViewport(0u, 1u, &viewport); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + auto* layout = graphicsPipeline->getLayout(); + cb->bindGraphicsPipeline(graphicsPipeline.get()); + cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + ext::FullScreenTriangle::recordDrawCall(cb); + { + auto* imgui = ui.it.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + const auto* ds = ui.descriptor->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, wait)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + cb->end(); + } + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] IES Viewer"; + { + m_window->setCaption(caption); + } + return retval; +} + +const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::getDefaultSubpassDependencies() const +{ + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; +} \ No newline at end of file diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp new file mode 100644 index 000000000..c4efc9ccf --- /dev/null +++ b/50.IESViewer/AppUI.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include "imgui/imgui_internal.h" +#include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" + +void IESViewer::uiListener() +{ + auto& ies = assets[activeAssetIx]; + const auto name = path(ies.key).filename().string(); + auto* profile = ies.getProfile(); + const float lowerBound = (float)profile->getHoriAngles().front(); + const float upperBound = (float)profile->getHoriAngles().back(); + const bool singleAngle = (upperBound == lowerBound); + + auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); + const ImGuiViewport* vp = ImGui::GetMainViewport(); + { + ImDrawList* fg = ImGui::GetForegroundDrawList(); + float x = vp->Pos.x + 8.f; + float y = vp->Pos.y + 8.f; + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(ies.mode)); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(profile->getSymmetry())); + y += ImGui::GetTextLineHeightWithSpacing(); + + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); + y += ImGui::GetTextLineHeightWithSpacing(); + + char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); + } + + { + const float pad = 8.f; + const float sliderW = 74.f; + const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); + ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; + + if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) + { + ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); + ImVec2 rmin = ImGui::GetItemRectMin(); + ImVec2 rmax = ImGui::GetItemRectMax(); + ImDrawList* dl = ImGui::GetWindowDrawList(); + ImU32 col = IM_COL32(220, 60, 60, 255); + + float knobR = 7.f; + float trackX = rmax.x - 12.f; + float y0 = rmin.y + knobR + 1.f; + float y1 = rmax.y - knobR - 1.f; + + dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + + if (singleAngle) + { + float y = (y0 + y1) * 0.5f; + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + else + { + for (int i = 0; i < 5; ++i) + { + float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); + float t = (v - lowerBound) / (upperBound - lowerBound); + float y = y1 - t * (y1 - y0); + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + } + + float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); + float knobY = y1 - t * (y1 - y0); + dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); + dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + + if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + { + float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); + float nt = (y1 - my) / (y1 - y0); + angle = lowerBound + nt * (upperBound - lowerBound); + } + } + ImGui::End(); + ImGui::PopStyleVar(2); + } + + ies.zDegree = angle; +} \ No newline at end of file diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt new file mode 100644 index 000000000..7de094510 --- /dev/null +++ b/50.IESViewer/CMakeLists.txt @@ -0,0 +1,85 @@ +if(NBL_BUILD_IMGUI) +set(SRCs + AppInit.cpp AppRender.cpp AppGPU.cpp AppUI.cpp AppEvent.cpp AppInputParser.cpp + App.hpp AppInputParser.hpp + IES.cpp IES.hpp + inputs.json +) + +set(LIBs + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" +) + +nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/compute.hlsl + app_resources/pixel.hlsl + app_resources/vertex.hlsl + app_resources/imgui.vertex.hlsl + app_resources/imgui.pixel.hlsl + app_resources/imgui.opts.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/compute.hlsl", + "KEY": "compute" + }, + { + "INPUT": "app_resources/pixel.hlsl", + "KEY": "pixel" + }, + { + "INPUT": "app_resources/vertex.hlsl", + "KEY": "vertex" + }, + { + "INPUT": "app_resources/imgui.vertex.hlsl", + "KEY": "imgui.vertex" + }, + { + "INPUT": "app_resources/imgui.pixel.hlsl", + "KEY": "imgui.pixel" + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to imgui ext headers which are not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) +endif() \ No newline at end of file diff --git a/50.IESViewer/IES.cpp b/50.IESViewer/IES.cpp new file mode 100644 index 000000000..357d3d88b --- /dev/null +++ b/50.IESViewer/IES.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "IES.hpp" + +const asset::CIESProfile* IES::getProfile() const +{ + auto* meta = bundle.getMetadata(); + if (meta) + return &meta->selfCast()->profile; + + return nullptr; +} + +video::IGPUImage* IES::getActiveImage() const +{ + switch (mode) + { + case EM_IES_C: + return views.candela->getCreationParameters().image.get(); + case EM_SPERICAL_C: + return views.spherical->getCreationParameters().image.get(); + case EM_DIRECTION: + return views.direction->getCreationParameters().image.get(); + case EM_PASS_T_MASK: + return views.mask->getCreationParameters().image.get(); + + case EM_CDC: + default: + return nullptr; + } +} + +const char* IES::modeToRS(E_MODE mode) +{ + switch (mode) + { + case IES::EM_CDC: + return "Candlepower Distribution Curve"; + case IES::EM_IES_C: + return "Sample IES Candela"; + case IES::EM_SPERICAL_C: + return "Sample Spherical Coordinates"; + case IES::EM_DIRECTION: + return "Sample Direction"; + case IES::EM_PASS_T_MASK: + return "Sample Pass Mask"; + default: + return "ERROR (mode)"; + } +} + +const char* IES::symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry) +{ + switch (symmetry) + { + case asset::CIESProfile::ISOTROPIC: + return "ISOTROPIC"; + case asset::CIESProfile::QUAD_SYMETRIC: + return "QUAD_SYMETRIC"; + case asset::CIESProfile::HALF_SYMETRIC: + return "HALF_SYMETRIC"; + case asset::CIESProfile::OTHER_HALF_SYMMETRIC: + return "OTHER_HALF_SYMMETRIC"; + case asset::CIESProfile::NO_LATERAL_SYMMET: + return "NO_LATERAL_SYMMET"; + default: + return "ERROR (symmetry)"; + } +} \ No newline at end of file diff --git a/50.IESViewer/IES.hpp b/50.IESViewer/IES.hpp new file mode 100644 index 000000000..04485366f --- /dev/null +++ b/50.IESViewer/IES.hpp @@ -0,0 +1,118 @@ +#ifndef _THIS_EXAMPLE_IES_HPP_ +#define _THIS_EXAMPLE_IES_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" + +NBL_EXPOSE_NAMESPACES + +struct IES +{ + enum E_MODE : uint32_t + { + EM_CDC, //! Candlepower Distribution Curve + EM_IES_C, //! IES Candela + EM_SPERICAL_C, //! Sperical coordinates + EM_DIRECTION, //! Sample direction + EM_PASS_T_MASK, //! Test mask + + EM_SIZE + }; + + struct + { + smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; + } views; + + struct + { + smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; + } buffers; + + SAssetBundle bundle; + std::string key; + + float zDegree = 0.f; + E_MODE mode = EM_CDC; + + const asset::CIESProfile* getProfile() const; + video::IGPUImage* getActiveImage() const; + + static const char* modeToRS(E_MODE mode); + static const char* symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry); + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) + { + if (images.empty()) + return false; + + if (not cb) + return false; + + using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; + const IGPUImage::SSubresourceRange range = + { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + + std::vector imageBarriers(images.size()); + + for (uint32_t i = 0; i < imageBarriers.size(); ++i) + { + auto& it = imageBarriers[i] = + { + .barrier = {.dep = {}}, + .image = images[i], + .subresourceRange = range, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = newLayout + }; + + if constexpr (newLayout == IImage::LAYOUT::GENERAL) + { + // READ_ONLY_OPTIMAL -> GENERAL, RW + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + } + else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + { + // GENERAL -> READ_ONLY_OPTIMAL, RO + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.oldLayout = IImage::LAYOUT::GENERAL; + } + + if constexpr (undefined) + it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init + } + + return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); + } + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + { + if (not image) + return false; + + auto in = std::to_array({ image }); + return barrier(cb, in); + } +}; + +#endif // _THIS_EXAMPLE_IES_HPP_ \ No newline at end of file diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl new file mode 100644 index 000000000..9705c2282 --- /dev/null +++ b/50.IESViewer/app_resources/common.hlsl @@ -0,0 +1,67 @@ +#ifndef _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#endif // __HLSL_VERSION + +// -> TODO: use NBL_CONTEXPR or something +#ifndef UINT16_MAX +#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do +#endif // UINT16_MAX +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do +#endif // M_PI + +#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do +#define QUANT_ERROR_ADMISSIBLE 1/1024 + +#define WORKGROUP_SIZE 256u +#define WORKGROUP_DIMENSION 16u +// <- + wipe whatever we already have + +// TODO: since NSC prebuilds into SPIRV - maybe could make it a CMake option with a default val +#define MAX_IES_IMAGES 6969 + +using namespace nbl::hlsl; + +struct PushConstants +{ + uint64_t hAnglesBDA; + uint64_t vAnglesBDA; + uint64_t dataBDA; + float64_t maxIValue; + + uint32_t hAnglesCount; + uint32_t vAnglesCount; + uint32_t dataCount; + + uint32_t mode; + uint32_t texIx; + float32_t zAngleDegreeRotation; + + uint32_t dummy; + + #ifdef __HLSL_VERSION + float64_t getHorizontalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(hAnglesBDA) + i).deref().load(); } + float64_t getVerticalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(vAnglesBDA) + i).deref().load(); } + float64_t getData(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(dataBDA) + i).deref().load(); } + #endif // __HLSL_VERSION +}; + +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; +[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; +[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; +[[vk::push_constant]] struct PushConstants pc; +#endif // __HLSL_VERSION + +#endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESViewer/app_resources/compute.hlsl b/50.IESViewer/app_resources/compute.hlsl new file mode 100644 index 000000000..cf22466fc --- /dev/null +++ b/50.IESViewer/app_resources/compute.hlsl @@ -0,0 +1,222 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "common.hlsl" + +float32_t3 octahedronUVToDir(float64_t2 uv) +{ + float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); + float32_t2 absP = float32_t2(abs(position.x), abs(position.y)); + + position.z = 1.0 - absP.x - absP.y; + + if (position.z < 0.0) + { + position.x = sign(position.x) * (1.0 - absP.y); + position.y = sign(position.y) * (1.0 - absP.x); + } + + // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" + if (pc.zAngleDegreeRotation != 0.0) + { + float64_t rDegree = pc.zAngleDegreeRotation; + + const float32_t zAngleRadians = float32_t(rDegree * M_PI / 180.0); + const float64_t cosineV = cos(zAngleRadians); + const float64_t sineV = sin(zAngleRadians); + + position = float32_t3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); + } + + return normalize(position); +} + +//! Returns spherical coordinates with physics convention in radians +/* + https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg + Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle + in [-PI, PI] range +*/ + +float32_t2 sphericalDirToRadians(float32_t3 direction) +{ + float32_t theta = acos(clamp(direction.z / length(direction), -1.0, 1.0)); + float32_t phi = atan2(direction.y, direction.x); // TODO: check it + + return float32_t2(theta, phi); +} + +uint32_t implGetVUB(const float64_t angle) +{ + for (uint32_t i = 0; i < pc.vAnglesCount; ++i) + if (pc.getVerticalAngle(i) > angle) + return i; + + return pc.vAnglesCount; +} + +uint32_t implGetHUB(const float64_t angle) +{ + for (uint32_t i = 0; i < pc.hAnglesCount; ++i) + if (pc.getHorizontalAngle(i) > angle) + return i; + + return pc.hAnglesCount; +} + +uint32_t getVLB(const float64_t angle) +{ + return uint32_t(max(int(implGetVUB(angle)) - 1, 0)); +} + +uint32_t getHLB(const float64_t angle) +{ + return uint32_t(max(int(implGetHUB(angle)) - 1, 0)); +} + +uint32_t getVUB(const float64_t angle) +{ + return uint32_t(min(int(implGetVUB(angle)), int(pc.vAnglesCount) - 1)); +} + +uint32_t getHUB(const float64_t angle) +{ + return uint32_t(min(int(implGetHUB(angle)), int(pc.hAnglesCount) - 1)); +} + +float64_t getValue(uint32_t i, uint32_t j) +{ + return pc.getData(pc.vAnglesCount * i + j); +} + +// symmetry +#define ISOTROPIC 0u +#define QUAD_SYMETRIC 1u +#define HALF_SYMETRIC 2u +#define NO_LATERAL_SYMMET 3u + +uint32_t getSymmetry() // TODO: to reduce check time we could pass it with PCs +{ + if (pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake + return ISOTROPIC; + + const float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + + if (hABack == 90) + return QUAD_SYMETRIC; + else if (hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here + return HALF_SYMETRIC; + else + return NO_LATERAL_SYMMET; +} + +float32_t wrapPhi(const float32_t phi, const uint32_t symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry +{ + switch (symmetry) + { + case ISOTROPIC: + return 0.0; + case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + { + float32_t wrapPhi = abs(phi); //! first MIRROR + + if (wrapPhi > M_HALF_PI) //! then REPEAT + wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); + + return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 + } + case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + case NO_LATERAL_SYMMET: + { + if (phi < 0) + return phi + 2.0 * M_PI; + else + return phi; + } + } + + return 69; +} + +float64_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) +{ + const float64_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); + + float64_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); + float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); + + if (vAngle > vABack) + return 0.0; + + // bilinear interpolation + uint32_t j0 = getVLB(vAngle); + uint32_t j1 = getVUB(vAngle); + uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); + uint32_t i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); + + float64_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); + float64_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); + + float64_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; + float64_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; + + float64_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); + float64_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); + + return s0 * (1.0 - u) + s1 * u; +} + +//! Checks if (x,y) /in [0,PI] x [-PI,PI] product +/* + IES vertical range is [0, 180] degrees + and horizontal range is [0, 360] degrees + but for easier computations (MIRROR & MIRROW_REPEAT operations) + we represent horizontal range as [-180, 180] given spherical coordinates +*/ + +bool isWithinSCDomain(const float64_t2 p) +{ + const float64_t2 lb = float64_t2(0, -M_PI); + const float64_t2 ub = float64_t2(M_PI, M_PI); + + return all(lb <= p) && all(p <= ub); +} + +[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + uint32_t2 destinationSize; + outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); + const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); + + const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); + const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); + + if (all(pixelCoordinates < destinationSize)) + { + const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); + const float32_t3 direction = octahedronUVToDir(uv); + const float32_t2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped + + const float32_t normD = length(direction); + float32_t2 mask; + + if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) + mask.x = 1.0; // pass + else + mask.x = 0; + + if (isWithinSCDomain(sphericalCoordinates)) + mask.y = 1.0; // pass + else + mask.y = 0; + + outIESCandelaImage[pc.texIx][pixelCoordinates] = float32_t(sampleI(sphericalCoordinates, getSymmetry()) / pc.maxIValue); + outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sphericalCoordinates; + outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = direction; + outPassTMask[pc.texIx][pixelCoordinates] = mask; + } +} \ No newline at end of file diff --git a/50.IESViewer/app_resources/imgui.opts.hlsl b/50.IESViewer/app_resources/imgui.opts.hlsl new file mode 100644 index 000000000..54f502b0f --- /dev/null +++ b/50.IESViewer/app_resources/imgui.opts.hlsl @@ -0,0 +1,16 @@ +#ifndef _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#define NBL_TEXTURES_BINDING_IX 0u +#define NBL_SAMPLER_STATES_BINDING_IX 1u +#define NBL_TEXTURES_SET_IX 0u +#define NBL_SAMPLER_STATES_SET_IX 0u +#define NBL_TEXTURES_COUNT 5u +#define NBL_SAMPLERS_COUNT 2u + +#endif // _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + diff --git a/50.IESViewer/app_resources/imgui.pixel.hlsl b/50.IESViewer/app_resources/imgui.pixel.hlsl new file mode 100644 index 000000000..fe93c3a70 --- /dev/null +++ b/50.IESViewer/app_resources/imgui.pixel.hlsl @@ -0,0 +1,6 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "imgui.opts.hlsl" +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" diff --git a/50.IESViewer/app_resources/imgui.vertex.hlsl b/50.IESViewer/app_resources/imgui.vertex.hlsl new file mode 100644 index 000000000..2063db84b --- /dev/null +++ b/50.IESViewer/app_resources/imgui.vertex.hlsl @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/50.IESViewer/app_resources/pixel.hlsl b/50.IESViewer/app_resources/pixel.hlsl new file mode 100644 index 000000000..5fe452b2d --- /dev/null +++ b/50.IESViewer/app_resources/pixel.hlsl @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "common.hlsl" +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" +using namespace nbl::hlsl::ext::FullScreenTriangle; + +float32_t2 iesDirToUv(float32_t3 dir) +{ + float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); + float32_t3 s = dir / sum; + + if (s.z < 0.0f) + s.xy = sign(s.xy) * (1.0f - abs(s.yx)); + + return s.xy * 0.5f + 0.5f; +} + +float32_t plot(float32_t cand, float32_t pct, float32_t bold) +{ + return smoothstep(pct-0.005*bold, pct, cand) - smoothstep( pct, pct+0.005*bold, cand); +} + +// vertical cut of IES (i.e. cut by plane x = 0) +float32_t f(float32_t2 uv) +{ + return inIESCandelaImage[pc.texIx].Sample(generalSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; +} + +[shader("pixel")] +float32_t4 PSMain(SVertexAttributes input) : SV_Target0 +{ + switch (pc.mode) + { + case 0: + { + float32_t2 ndc = input.uv * 2.f - 1.f; + float32_t dist = length(ndc) * 1.015625f; + float32_t p = plot(dist, 1.0f, 0.75f); + float32_t3 col = float32_t3(p, p, p); + + float32_t normalizedStrength = f(ndc); + if (dist < normalizedStrength) + col += float32_t3(1.0f, 0.0f, 0.0f); + + return float32_t4(col, 1.0f); + } + case 1: + return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); + case 2: + return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + case 3: + return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); + default: + return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + } +} diff --git a/50.IESViewer/app_resources/vertex.hlsl b/50.IESViewer/app_resources/vertex.hlsl new file mode 100644 index 000000000..a0f565455 --- /dev/null +++ b/50.IESViewer/app_resources/vertex.hlsl @@ -0,0 +1,6 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// small trick, temporary, we will have a separate rule for compiling this ext and embed into Nabla DLL +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" diff --git a/50.IESViewer/inputs.json b/50.IESViewer/inputs.json new file mode 100644 index 000000000..fbb833112 --- /dev/null +++ b/50.IESViewer/inputs.json @@ -0,0 +1,14 @@ +{ + "directories": [ + "mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" + ], + "files": [ + "mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", + "mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + ], + "gui": true, + "writeAssets": false +} \ No newline at end of file diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp new file mode 100644 index 000000000..579ac030b --- /dev/null +++ b/50.IESViewer/main.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +#define APP_WINDOW_WIDTH 640 +#define APP_WINDOW_HEIGHT 640 +#define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN + +IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ APP_WINDOW_WIDTH, APP_WINDOW_HEIGHT }, APP_DEPTH_BUFFER_FORMAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) +{ + // empty +} + +NBL_MAIN_FUNC(IESViewer) \ No newline at end of file From 06b2d09bbc338ee3f57d27587d099bbf1ad22949 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 28 Oct 2025 16:27:07 +0100 Subject: [PATCH 057/219] add framebuffers, display 2D plot in separate imgui window --- 50.IESProfileTest/App.cpp | 16 - 50.IESProfileTest/App.hpp | 59 --- 50.IESProfileTest/AppEvent.cpp | 54 --- 50.IESProfileTest/AppGPU.cpp | 100 ---- 50.IESProfileTest/AppInit.cpp | 438 ------------------ 50.IESProfileTest/AppInputParser.cpp | 108 ----- 50.IESProfileTest/AppInputParser.hpp | 27 -- 50.IESProfileTest/AppRender.cpp | 227 --------- 50.IESProfileTest/AppUI.cpp | 105 ----- 50.IESProfileTest/CMakeLists.txt | 85 ---- 50.IESProfileTest/IES.cpp | 71 --- 50.IESProfileTest/IES.hpp | 118 ----- 50.IESProfileTest/app_resources/common.hlsl | 67 --- 50.IESProfileTest/app_resources/compute.hlsl | 222 --------- .../app_resources/imgui.opts.hlsl | 16 - .../app_resources/imgui.pixel.hlsl | 6 - .../app_resources/imgui.vertex.hlsl | 5 - 50.IESProfileTest/app_resources/pixel.hlsl | 58 --- 50.IESProfileTest/app_resources/vertex.hlsl | 6 - 50.IESProfileTest/inputs.json | 14 - 50.IESProfileTest/main.cpp | 6 - 50.IESViewer/App.hpp | 20 +- 50.IESViewer/AppEvent.cpp | 10 +- 50.IESViewer/AppGPU.cpp | 6 +- 50.IESViewer/AppInit.cpp | 141 ++++-- 50.IESViewer/AppRender.cpp | 74 +-- 50.IESViewer/AppUI.cpp | 38 +- 50.IESViewer/app_resources/imgui.opts.hlsl | 2 +- 50.IESViewer/main.cpp | 7 +- CMakeLists.txt | 2 +- common/include/nbl/examples/examples.hpp | 11 + 31 files changed, 221 insertions(+), 1898 deletions(-) delete mode 100644 50.IESProfileTest/App.cpp delete mode 100644 50.IESProfileTest/App.hpp delete mode 100644 50.IESProfileTest/AppEvent.cpp delete mode 100644 50.IESProfileTest/AppGPU.cpp delete mode 100644 50.IESProfileTest/AppInit.cpp delete mode 100644 50.IESProfileTest/AppInputParser.cpp delete mode 100644 50.IESProfileTest/AppInputParser.hpp delete mode 100644 50.IESProfileTest/AppRender.cpp delete mode 100644 50.IESProfileTest/AppUI.cpp delete mode 100644 50.IESProfileTest/CMakeLists.txt delete mode 100644 50.IESProfileTest/IES.cpp delete mode 100644 50.IESProfileTest/IES.hpp delete mode 100644 50.IESProfileTest/app_resources/common.hlsl delete mode 100644 50.IESProfileTest/app_resources/compute.hlsl delete mode 100644 50.IESProfileTest/app_resources/imgui.opts.hlsl delete mode 100644 50.IESProfileTest/app_resources/imgui.pixel.hlsl delete mode 100644 50.IESProfileTest/app_resources/imgui.vertex.hlsl delete mode 100644 50.IESProfileTest/app_resources/pixel.hlsl delete mode 100644 50.IESProfileTest/app_resources/vertex.hlsl delete mode 100644 50.IESProfileTest/inputs.json delete mode 100644 50.IESProfileTest/main.cpp diff --git a/50.IESProfileTest/App.cpp b/50.IESProfileTest/App.cpp deleted file mode 100644 index 60195da32..000000000 --- a/50.IESProfileTest/App.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" - -#define APP_WINDOW_WIDTH 640 -#define APP_WINDOW_HEIGHT 640 -#define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN - -IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ APP_WINDOW_WIDTH, APP_WINDOW_HEIGHT }, APP_DEPTH_BUFFER_FORMAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) -{ - // empty -} \ No newline at end of file diff --git a/50.IESProfileTest/App.hpp b/50.IESProfileTest/App.hpp deleted file mode 100644 index f06e8fc14..000000000 --- a/50.IESProfileTest/App.hpp +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef _THIS_EXAMPLE_APP_HPP_ -#define _THIS_EXAMPLE_APP_HPP_ - -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/examples/examples.hpp" -#include "nbl/ui/ICursorControl.h" -#include "nbl/ext/ImGui/ImGui.h" -#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "IES.hpp" - -NBL_EXPOSE_NAMESPACES - -class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication -{ - using device_base_t = MonoWindowApplication; - using asset_base_t = BuiltinResourcesApplication; - -public: - IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD); - - bool onAppInitialized(smart_refctd_ptr&& system) override; - IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override; - -protected: - const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; - -private: - smart_refctd_ptr graphicsPipeline; - smart_refctd_ptr computePipeline; - std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; - - bool running = true; - std::vector assets; - size_t activeAssetIx = 0; - - size_t m_realFrameIx = 0; - smart_refctd_ptr m_semaphore; - std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; - - struct { - smart_refctd_ptr it; - smart_refctd_ptr descriptor; - } ui; - - void processMouse(const IMouseEventChannel::range_t& events); - void processKeyboard(const IKeyboardEventChannel::range_t& events); - - smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name); - smart_refctd_ptr createBuffer(const core::vector& in, std::string name); - - void uiListener(); -}; - -#endif // _THIS_EXAMPLE_APP_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/AppEvent.cpp b/50.IESProfileTest/AppEvent.cpp deleted file mode 100644 index 07a11f5e9..000000000 --- a/50.IESProfileTest/AppEvent.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" - -void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) -{ - for (auto it = events.begin(); it != events.end(); it++) - { - auto ev = *it; - - if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) - { - auto& ies = assets[activeAssetIx]; - auto* profile = ies.getProfile(); - - auto impulse = ev.scrollEvent.verticalScroll * 0.02f; - ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); - } - } -} - -void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& events) -{ - for (auto it = events.begin(); it != events.end(); it++) - { - const auto ev = *it; - - if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) - { - if (ev.keyCode == nbl::ui::EKC_UP_ARROW) - activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); - else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) - activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); - - auto& ies = assets[activeAssetIx]; - - if (ev.keyCode == nbl::ui::EKC_C) - ies.mode = IES::EM_CDC; - else if (ev.keyCode == nbl::ui::EKC_V) - ies.mode = IES::EM_IES_C; - else if (ev.keyCode == nbl::ui::EKC_S) - ies.mode = IES::EM_SPERICAL_C; - else if (ev.keyCode == nbl::ui::EKC_D) - ies.mode = IES::EM_DIRECTION; - else if (ev.keyCode == nbl::ui::EKC_M) - ies.mode = IES::EM_PASS_T_MASK; - - if (ev.keyCode == nbl::ui::EKC_Q) - running = false; - } - } -} \ No newline at end of file diff --git a/50.IESProfileTest/AppGPU.cpp b/50.IESProfileTest/AppGPU.cpp deleted file mode 100644 index aa13994d6..000000000 --- a/50.IESProfileTest/AppGPU.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" - -core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) -{ - IGPUImage::SCreationParams imageParams{}; - imageParams.type = IImage::E_TYPE::ET_2D; - imageParams.extent.height = height; - imageParams.extent.width = width; - imageParams.extent.depth = 1u; - imageParams.format = format; - imageParams.mipLevels = 1u; - imageParams.flags = IImage::ECF_NONE; - imageParams.arrayLayers = 1u; - imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; - - auto image = m_device->createImage(std::move(imageParams)); - image->setObjectDebugName(name.c_str()); - - if (!image) - { - m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); - if (!allocation.isValid()) - { - m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - IGPUImageView::SCreationParams viewParams{}; - viewParams.image = std::move(image); - viewParams.format = format; - viewParams.viewType = IGPUImageView::ET_2D; - viewParams.flags = IImageViewBase::ECF_NONE; - viewParams.subresourceRange.baseArrayLayer = 0u; - viewParams.subresourceRange.baseMipLevel = 0u; - viewParams.subresourceRange.layerCount = 1u; - viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); - - auto imageView = m_device->createImageView(std::move(viewParams)); - - if (not imageView) - m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); - - return imageView; -} - -core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) -{ - IGPUBuffer::SCreationParams bufferParams = {}; - bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; - bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); - - auto buffer = m_device->createBuffer(std::move(bufferParams)); - buffer->setObjectDebugName(name.c_str()); - - if (not buffer) - { - m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto memoryReqs = buffer->getMemoryReqs(); - - if (m_utils) - memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - - auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); - if (not allocation.isValid()) - { - m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); - - if (not mappedPointer) - { - m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - memcpy(mappedPointer, in.data(), buffer->getSize()); - - if (not allocation.memory->unmap()) - { - m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } - - return buffer; -} \ No newline at end of file diff --git a/50.IESProfileTest/AppInit.cpp b/50.IESProfileTest/AppInit.cpp deleted file mode 100644 index 60fbb54b6..000000000 --- a/50.IESProfileTest/AppInit.cpp +++ /dev/null @@ -1,438 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" -#include "AppInputParser.hpp" -#include "app_resources/common.hlsl" -#include "app_resources/imgui.opts.hlsl" -#include "nbl/this_example/builtin/build/spirv/keys.hpp" - -#define MEDIA_ENTRY "../../media" -#define INPUT_JSON_FILE "../inputs.json" - -bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) -{ - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - const auto media = absolute(path(MEDIA_ENTRY)); - - AppInputParser::Output out; - AppInputParser parser(system::logger_opt_ptr(m_logger.get())); - if (!parser.parse(out, INPUT_JSON_FILE, media.string())) - return false; - - m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); - { - auto start = std::chrono::high_resolution_clock::now(); - size_t loaded = {}, total = out.inputList.size(); - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = system::logger_opt_ptr(m_logger.get()); - - for (const auto& in : out.inputList) - { - auto asset = m_assetMgr->getAsset(in.c_str(), lp); - - if (asset.getMetadata()) - { - auto& ies = assets.emplace_back(); - ies.bundle = std::move(asset); - ies.key = path(in).lexically_relative(media).string(); - ++loaded; - - m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); - } - else - m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); - } - const auto sl = std::to_string(loaded), st = std::to_string(total); - const bool passed = loaded == total; - - if (not passed) - { - auto diff = std::to_string(total - loaded); - m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); - } - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); - { - auto start = std::chrono::high_resolution_clock::now(); - for (auto& ies : assets) - { - const auto* profile = ies.getProfile(); - const auto resolution = profile->getOptimalIESResolution(); - - #define CREATE_VIEW(VIEW, FORMAT, NAME) \ - if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; - - CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") - CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") - CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") - CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") - - #define CREATE_BUFFER(BUFFER, DATA, NAME) \ - if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; - - CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") - CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") - CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") - } - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - auto createShader = [&]() -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = system::logger_opt_ptr(m_logger.get()); - lp.workingDirectory = "app_resources"; - - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key, lp); - const auto assets = assetBundle.getContents(); - - if (assets.empty()) - { - m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); - return nullptr; - } - - auto spirvShader = IAsset::castDown(assets[0]); - - if (spirvShader) - m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); - else - m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); - - return spirvShader; - }; - - #define CREATE_SHADER(SHADER, PATH) \ - if (!(SHADER = createShader.template operator()() )) return false; - - m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); - smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; - { - auto start = std::chrono::high_resolution_clock::now(); - CREATE_SHADER(compute, "compute") - CREATE_SHADER(pixel, "pixel") - CREATE_SHADER(vertex, "vertex") - CREATE_SHADER(imguiVertex, "imgui.vertex") - CREATE_SHADER(imguiPixel, "imgui.pixel") - auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); - auto took = std::to_string(elapsed.count()); - m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); - } - - // Pipelines & Descriptor Sets - { - using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; - using stage_flags_t = asset::IShader::E_SHADER_STAGE; - static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; - static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); - static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; - - //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier - #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } - #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } - static constexpr auto bindings = std::to_array - ({ - BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela - BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical - BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction - BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask - BINDING_SAMPLER(0u + 100u) - }); - - const uint32_t texturesCount = assets.size(); - smart_refctd_ptr generalSampler; - { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; - params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - params.MinFilter = ISampler::ETF_LINEAR; - params.MaxFilter = ISampler::ETF_LINEAR; - params.MipmapMode = ISampler::ESMM_LINEAR; - params.AnisotropicFilter = 0u; - params.CompareEnable = false; - params.CompareFunc = ISampler::ECO_ALWAYS; - - generalSampler = m_device->createSampler(params); - - if (not generalSampler) - { - m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); - return false; - } - - generalSampler->setObjectDebugName("General IES sampler"); - } - - auto scRes = static_cast(m_surface->getSwapchainResources()); - scRes->getRenderpass(); // note it also creates rp if nulled - { - auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - - if (not descriptorSetLayout) - return logFail("Failed to create descriptor set layout!"); - - auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); - auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); - - if (not pipelineLayout) - return logFail("Failed to create pipeline layout!"); - - // Compute Pipeline - { - auto params = std::to_array({ {} });; - params[0].layout = pipelineLayout.get(); - params[0].shader.shader = compute.get(); - params[0].shader.entryPoint = "main"; - - if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) - return logFail("Failed to create compute pipeline!"); - } - - // Graphics Pipeline - { - IGPUPipelineBase::SShaderEntryMap specConstants; - const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); - specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; - - video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = - { - {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, - {.shader = pixel.get(), .entryPoint = "PSMain" } - }; - - auto params = std::to_array({ {} }); - params[0].renderpass = scRes->getRenderpass(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; - params[0].layout = pipelineLayout.get(); - params[0].cached = - { - .vertexInput = {}, // full screen tri ext, no inputs - .primitiveAssembly = {}, - .rasterization = { - .polygonMode = EPM_FILL, - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - }, - .blend = {}, - .subpassIx = 0u - }; - - if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) - return logFail("Failed to create graphics pipeline!"); - } - - const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); - pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); - { - std::array, 4u + 1u> infos; - #define FILL_INFO(DESC, IX) \ - { \ - auto& info = infos[IX].emplace_back(); \ - info.desc = DESC; \ - info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ - } - - for (uint32_t i = 0; i < assets.size(); ++i) - { - auto& ies = assets[i]; - - FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) - } - FILL_INFO(generalSampler, 4u); - auto* samplerInfo = infos.back().data(); - samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writes; - for (uint32_t i = 0; i < 4u; ++i) - { - auto& write = writes[i]; - write.count = assets.size(); - write.info = infos[i].data(); - write.dstSet = descriptors[0u].get(); - write.arrayElement = 0u; - write.binding = i; - } - - for (uint32_t i = 4u; i < 8u; ++i) - { - auto ix = i - 4u; - auto& write = writes[i] = writes[ix]; - write.binding = ix + 10u; - } - - auto& write = writes.back(); - write.count = 1u; - write.info = samplerInfo; - write.dstSet = descriptors[0u].get(); - write.arrayElement = 0u; - write.binding = 0u + 100u; - - if (!m_device->updateDescriptorSets(writes, {})) - return logFail("Failed to write descriptor sets"); - } - } - } - - // imGUI - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::imgui::UI::SCreationParameters params = {}; - params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; - params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; - params.utilities = m_utils; - params.transfer = getTransferUpQueue(); - params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); - params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); - params.subpassIx = 0u; - params.pipelineCache = nullptr; - - using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; - params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); - - auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); - if (not imgui) - return logFail("Failed to create `nbl::ext::imgui::UI` class"); - - { - const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); - auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); - ui.descriptor = make_smart_refctd_ptr(std::move(ds)); - if (!ui.descriptor) - return logFail("Failed to create the descriptor set"); - { - auto dummy = SubAllocatedDescriptorSet::invalid_value; - ui.descriptor->multi_allocate(0, 1, &dummy); - assert(dummy == ext::imgui::UI::FontAtlasTexId); - } - IGPUDescriptorSet::SDescriptorInfo info = {}; - info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); - info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - const IGPUDescriptorSet::SWriteDescriptorSet write = { - .dstSet = ui.descriptor->getDescriptorSet(), - .binding = 0u, - .arrayElement = ext::imgui::UI::FontAtlasTexId, - .count = 1, - .info = &info - }; - if (!m_device->updateDescriptorSets({ &write,1 }, {})) - return logFail("Failed to write the descriptor set"); - } - - imgui->registerListener([this]() - { - uiListener(); - }); - } - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; - - auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool - { - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); - if (!pool) - return logFail("Couldn't create command pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) - return logFail("Couldn't create command buffer!"); - return true; - }; - - // render loop command buffers - if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) - return false; - - // transient command buffer - { - auto* queue = getGraphicsQueue(); - auto cbs = std::to_array({ smart_refctd_ptr() }); - if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) - return false; - - std::vector images; - for (uint32_t i = 0; i < assets.size(); ++i) - { - auto& ies = assets[i]; - - images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); - images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); - images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); - images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); - } - - auto* cb = cbs.front().get(); - cb->setObjectDebugName("Transient Command Buffer"); - - if (not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) - return logFail("Couldn't begin command buffer!"); - - if (not IES::barrier(cb, images)) - return logFail("Failed to record pipeline barriers!"); - - if (not cb->end()) - return logFail("Couldn't end command buffer!"); - - core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); - semaphore->setObjectDebugName("Scratch Semaphore"); - { - IQueue::SSubmitInfo::SSemaphoreInfo signal = - { - .semaphore = semaphore.get(), - .value = 1u, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - - const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; - - const IQueue::SSubmitInfo infos[] = - { { - .waitSemaphores = {}, - .commandBuffers = cmds, - .signalSemaphores = {&signal,1} - } }; - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - return logFail("Failed to submit queue!"); - } - - { - const ISemaphore::SWaitInfo infos[] = - { { - .semaphore = semaphore.get(), - .value = 1u - } }; - - if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) - return logFail("Couldn't block for scratch semaphore!"); - } - } - - onAppInitializedFinish(); - - return true; -} \ No newline at end of file diff --git a/50.IESProfileTest/AppInputParser.cpp b/50.IESProfileTest/AppInputParser.cpp deleted file mode 100644 index 0f236969b..000000000 --- a/50.IESProfileTest/AppInputParser.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "AppInputParser.hpp" -#include "nlohmann/json.hpp" - -NBL_EXPOSE_NAMESPACES -using namespace nlohmann; - -bool AppInputParser::parse(Output& out, const std::string input, const std::string cwd) -{ - const auto jInputFile = std::filesystem::absolute(input); - const auto sjInputFile = jInputFile.string(); - - std::ifstream file(sjInputFile.c_str()); - if (!file.is_open()) { - - logger.log("Could not open \"%s\" file.", system::ILogger::ELL_ERROR, sjInputFile.c_str()); - return false; - } - - std::stringstream buffer; - buffer << file.rdbuf(); - const auto jsonBuffer = buffer.str(); - - if (jsonBuffer.empty()) - { - logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); - return false; - } - - const auto jsonMap = json::parse(jsonBuffer.c_str()); - - if (!jsonMap["directories"].is_array()) - { - logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); - return false; - } - - if (!jsonMap["files"].is_array()) - { - logger.log("\"%s\" file's field \"files\" is not an array!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); - return false; - } - - if (!jsonMap["writeAssets"].is_boolean()) - { - logger.log("\"%s\" file's field \"writeAssets\" is not a boolean!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); - return false; - } - - auto addFile = [&](const std::string_view in) -> bool - { - auto path = std::filesystem::absolute(cwd / std::filesystem::path(in)); - - if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") - out.inputList.push_back(path.string()); - else - { - logger.log("Invalid \"%s\" input!", system::ILogger::ELL_ERROR, path.string().c_str()); - return false; - } - - return true; - }; - - auto addFiles = [&](const std::string_view directoryPath) -> bool - { - auto directory(std::filesystem::absolute(cwd / std::filesystem::path(directoryPath))); - if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) - { - logger.log("Invalid \"%s\" directory!", system::ILogger::ELL_ERROR, directory.string().c_str()); - return false; - } - - for (const auto& entry : std::filesystem::directory_iterator(directory)) - if (!addFile(entry.path().string().c_str())) - return false; - - return true; - }; - - // parse json - { - std::vector jDirectories; - jsonMap["directories"].get_to(jDirectories); - - for (const auto& it : jDirectories) - if (!addFiles(it)) - return false; - - std::vector jFiles; - jsonMap["files"].get_to(jFiles); - - for (const auto& it : jFiles) - if (!addFile(it)) - return false; - } - - out.withGUI = false; - jsonMap["gui"].get_to(out.withGUI); - - out.writeAssets = false; - jsonMap["writeAssets"].get_to(out.writeAssets); - - return true; -} \ No newline at end of file diff --git a/50.IESProfileTest/AppInputParser.hpp b/50.IESProfileTest/AppInputParser.hpp deleted file mode 100644 index 18b5e4fe3..000000000 --- a/50.IESProfileTest/AppInputParser.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ -#define _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ - -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/examples/examples.hpp" - -struct AppInputParser -{ -public: - struct Output - { - std::vector inputList; - bool withGUI; - bool writeAssets; - }; - - AppInputParser(nbl::system::logger_opt_ptr _logger = nullptr) : logger(_logger) {} - bool parse(Output& out, const std::string jFilePath, const std::string cwd = "."); - -private: - nbl::system::logger_opt_ptr logger; -}; - -#endif // _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/AppRender.cpp b/50.IESProfileTest/AppRender.cpp deleted file mode 100644 index 136d6d63b..000000000 --- a/50.IESProfileTest/AppRender.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" -#include "app_resources/common.hlsl" - -IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) -{ - const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - { - struct - { - std::vector mouse{}; std::vector keyboard{}; - } captured; - - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); - - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), - .displaySize = {m_window->getWidth(),m_window->getHeight()}, - .mouseEvents = captured.mouse, - .keyboardEvents = captured.keyboard - }; - - ui.it->update(params); - } - - auto& ies = assets[activeAssetIx]; - PushConstants pc; - { - pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); - pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); - pc.dataBDA = ies.buffers.data->getDeviceAddress(); - - const auto* profile = ies.getProfile(); - - pc.maxIValue = profile->getMaxCandelaValue(); - pc.vAnglesCount = profile->getVertAngles().size(); - pc.hAnglesCount = profile->getHoriAngles().size(); - pc.dataCount = profile->getData().size(); - - pc.zAngleDegreeRotation = ies.zDegree; - pc.mode = ies.mode; - pc.texIx = activeAssetIx; - } - - for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity - { - auto bound = buffer->getBoundMemory(); - if (bound.memory->haveToMakeVisible()) - { - const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); - m_device->flushMappedMemoryRanges(1, &range); - } - } - - auto* const descriptor = descriptors[0].get(); - auto* image = ies.getActiveImage(); - - // Compute - { - cb->beginDebugMarker("IES::compute"); - IES::barrier(cb, image); - auto* layout = computePipeline->getLayout(); - cb->bindComputePipeline(computePipeline.get()); - cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); - const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; - cb->dispatch(xGroups, xGroups, 1); - cb->endDebugMarker(); - } - - // Graphics - { - cb->beginDebugMarker("IES::render"); - IES::barrier(cb, image); - - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = m_window->getWidth(); - viewport.height = m_window->getHeight(); - } - cb->setViewport(0u, 1u, &viewport); - - VkRect2D scissor = - { - .offset = { 0, 0 }, - .extent = { m_window->getWidth(), m_window->getHeight() }, - }; - cb->setScissor(0u, 1u, &scissor); - - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; - const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; - auto scRes = static_cast(m_surface->getSwapchainResources()); - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), - .colorClearValues = &clearValue, - .depthStencilClearValues = &depthValue, - .renderArea = currentRenderArea - }; - - cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - { - auto* layout = graphicsPipeline->getLayout(); - cb->bindGraphicsPipeline(graphicsPipeline.get()); - cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); - ext::FullScreenTriangle::recordDrawCall(cb); - { - auto* imgui = ui.it.get(); - auto* pipeline = imgui->getPipeline(); - cb->bindGraphicsPipeline(pipeline); - const auto* ds = ui.descriptor->getDescriptorSet(); - cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); - const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - if (!imgui->render(cb, wait)) - { - m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); - return {}; - } - } - } - cb->endRenderPass(); - cb->endDebugMarker(); - cb->end(); - } - - IQueue::SSubmitInfo::SSemaphoreInfo retval = - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - }; - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb } - }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = device_base_t::getCurrentAcquire().semaphore, - .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval,1} - } - }; - - if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) - { - retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal - m_realFrameIx--; - } - - std::string caption = "[Nabla Engine] IES Viewer"; - { - m_window->setCaption(caption); - } - return retval; -} - -const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::getDefaultSubpassDependencies() const -{ - // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = - { - // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later - .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, - // don't want any writes to be available, we'll clear - .srcAccessMask = ACCESS_FLAGS::NONE, - // destination needs to wait as early as possible - // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // because depth and color get cleared first no read mask - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - // color from ATTACHMENT_OPTIMAL to PRESENT_SRC - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - return dependencies; -} \ No newline at end of file diff --git a/50.IESProfileTest/AppUI.cpp b/50.IESProfileTest/AppUI.cpp deleted file mode 100644 index c4efc9ccf..000000000 --- a/50.IESProfileTest/AppUI.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" -#include "imgui/imgui_internal.h" -#include "app_resources/common.hlsl" -#include "app_resources/imgui.opts.hlsl" - -void IESViewer::uiListener() -{ - auto& ies = assets[activeAssetIx]; - const auto name = path(ies.key).filename().string(); - auto* profile = ies.getProfile(); - const float lowerBound = (float)profile->getHoriAngles().front(); - const float upperBound = (float)profile->getHoriAngles().back(); - const bool singleAngle = (upperBound == lowerBound); - - auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); - const ImGuiViewport* vp = ImGui::GetMainViewport(); - { - ImDrawList* fg = ImGui::GetForegroundDrawList(); - float x = vp->Pos.x + 8.f; - float y = vp->Pos.y + 8.f; - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(ies.mode)); - y += ImGui::GetTextLineHeightWithSpacing(); - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(profile->getSymmetry())); - y += ImGui::GetTextLineHeightWithSpacing(); - - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); - y += ImGui::GetTextLineHeightWithSpacing(); - - char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); - } - - { - const float pad = 8.f; - const float sliderW = 74.f; - const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); - ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); - ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); - ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | - ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | - ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; - - if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) - { - ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); - ImVec2 rmin = ImGui::GetItemRectMin(); - ImVec2 rmax = ImGui::GetItemRectMax(); - ImDrawList* dl = ImGui::GetWindowDrawList(); - ImU32 col = IM_COL32(220, 60, 60, 255); - - float knobR = 7.f; - float trackX = rmax.x - 12.f; - float y0 = rmin.y + knobR + 1.f; - float y1 = rmax.y - knobR - 1.f; - - dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); - - if (singleAngle) - { - float y = (y0 + y1) * 0.5f; - dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); - } - else - { - for (int i = 0; i < 5; ++i) - { - float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); - float t = (v - lowerBound) / (upperBound - lowerBound); - float y = y1 - t * (y1 - y0); - dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); - } - } - - float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); - float knobY = y1 - t * (y1 - y0); - dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); - dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); - - if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) - { - float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); - float nt = (y1 - my) / (y1 - y0); - angle = lowerBound + nt * (upperBound - lowerBound); - } - } - ImGui::End(); - ImGui::PopStyleVar(2); - } - - ies.zDegree = angle; -} \ No newline at end of file diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt deleted file mode 100644 index ca1c66488..000000000 --- a/50.IESProfileTest/CMakeLists.txt +++ /dev/null @@ -1,85 +0,0 @@ -if(NBL_BUILD_IMGUI) -set(SRCs - App.cpp AppInit.cpp AppRender.cpp AppGPU.cpp AppUI.cpp AppEvent.cpp AppInputParser.cpp - App.hpp AppInputParser.hpp - IES.cpp IES.hpp - inputs.json -) - -set(LIBs - imtestengine - imguizmo - "${NBL_EXT_IMGUI_UI_LIB}" -) - -nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") -target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) - -set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/compute.hlsl - app_resources/pixel.hlsl - app_resources/vertex.hlsl - app_resources/imgui.vertex.hlsl - app_resources/imgui.pixel.hlsl - app_resources/imgui.opts.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) - -set(SM 6_8) -set(JSON [=[ -[ - { - "INPUT": "app_resources/compute.hlsl", - "KEY": "compute" - }, - { - "INPUT": "app_resources/pixel.hlsl", - "KEY": "pixel" - }, - { - "INPUT": "app_resources/vertex.hlsl", - "KEY": "vertex" - }, - { - "INPUT": "app_resources/imgui.vertex.hlsl", - "KEY": "imgui.vertex" - }, - { - "INPUT": "app_resources/imgui.pixel.hlsl", - "KEY": "imgui.pixel" - } -] -]=]) -string(CONFIGURE "${JSON}" JSON) - -set(COMPILE_OPTIONS - -I "${NBL_ROOT_PATH}/include" # a workaround due to imgui ext headers which are not part of Nabla builtin archive - -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 - -T lib_${SM} -) - -NBL_CREATE_NSC_COMPILE_RULES( - TARGET ${EXECUTABLE_NAME}SPIRV - LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} - BINARY_DIR ${OUTPUT_DIRECTORY} - MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS ${COMPILE_OPTIONS} - OUTPUT_VAR KEYS - INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp - NAMESPACE nbl::this_example::builtin::build - INPUTS ${JSON} -) - -NBL_CREATE_RESOURCE_ARCHIVE( - NAMESPACE nbl::this_example::builtin::build - TARGET ${EXECUTABLE_NAME}_builtinsBuild - LINK_TO ${EXECUTABLE_NAME} - BIND ${OUTPUT_DIRECTORY} - BUILTINS ${KEYS} -) -endif() \ No newline at end of file diff --git a/50.IESProfileTest/IES.cpp b/50.IESProfileTest/IES.cpp deleted file mode 100644 index 357d3d88b..000000000 --- a/50.IESProfileTest/IES.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "IES.hpp" - -const asset::CIESProfile* IES::getProfile() const -{ - auto* meta = bundle.getMetadata(); - if (meta) - return &meta->selfCast()->profile; - - return nullptr; -} - -video::IGPUImage* IES::getActiveImage() const -{ - switch (mode) - { - case EM_IES_C: - return views.candela->getCreationParameters().image.get(); - case EM_SPERICAL_C: - return views.spherical->getCreationParameters().image.get(); - case EM_DIRECTION: - return views.direction->getCreationParameters().image.get(); - case EM_PASS_T_MASK: - return views.mask->getCreationParameters().image.get(); - - case EM_CDC: - default: - return nullptr; - } -} - -const char* IES::modeToRS(E_MODE mode) -{ - switch (mode) - { - case IES::EM_CDC: - return "Candlepower Distribution Curve"; - case IES::EM_IES_C: - return "Sample IES Candela"; - case IES::EM_SPERICAL_C: - return "Sample Spherical Coordinates"; - case IES::EM_DIRECTION: - return "Sample Direction"; - case IES::EM_PASS_T_MASK: - return "Sample Pass Mask"; - default: - return "ERROR (mode)"; - } -} - -const char* IES::symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry) -{ - switch (symmetry) - { - case asset::CIESProfile::ISOTROPIC: - return "ISOTROPIC"; - case asset::CIESProfile::QUAD_SYMETRIC: - return "QUAD_SYMETRIC"; - case asset::CIESProfile::HALF_SYMETRIC: - return "HALF_SYMETRIC"; - case asset::CIESProfile::OTHER_HALF_SYMMETRIC: - return "OTHER_HALF_SYMMETRIC"; - case asset::CIESProfile::NO_LATERAL_SYMMET: - return "NO_LATERAL_SYMMET"; - default: - return "ERROR (symmetry)"; - } -} \ No newline at end of file diff --git a/50.IESProfileTest/IES.hpp b/50.IESProfileTest/IES.hpp deleted file mode 100644 index 04485366f..000000000 --- a/50.IESProfileTest/IES.hpp +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef _THIS_EXAMPLE_IES_HPP_ -#define _THIS_EXAMPLE_IES_HPP_ - -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/examples/examples.hpp" - -NBL_EXPOSE_NAMESPACES - -struct IES -{ - enum E_MODE : uint32_t - { - EM_CDC, //! Candlepower Distribution Curve - EM_IES_C, //! IES Candela - EM_SPERICAL_C, //! Sperical coordinates - EM_DIRECTION, //! Sample direction - EM_PASS_T_MASK, //! Test mask - - EM_SIZE - }; - - struct - { - smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; - } views; - - struct - { - smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; - } buffers; - - SAssetBundle bundle; - std::string key; - - float zDegree = 0.f; - E_MODE mode = EM_CDC; - - const asset::CIESProfile* getProfile() const; - video::IGPUImage* getActiveImage() const; - - static const char* modeToRS(E_MODE mode); - static const char* symmetryToRS(CIESProfile::LuminairePlanesSymmetry symmetry); - - template - requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) - { - if (images.empty()) - return false; - - if (not cb) - return false; - - using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; - const IGPUImage::SSubresourceRange range = - { - .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - - std::vector imageBarriers(images.size()); - - for (uint32_t i = 0; i < imageBarriers.size(); ++i) - { - auto& it = imageBarriers[i] = - { - .barrier = {.dep = {}}, - .image = images[i], - .subresourceRange = range, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = newLayout - }; - - if constexpr (newLayout == IImage::LAYOUT::GENERAL) - { - // READ_ONLY_OPTIMAL -> GENERAL, RW - it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - } - else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - { - // GENERAL -> READ_ONLY_OPTIMAL, RO - it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; - it.oldLayout = IImage::LAYOUT::GENERAL; - } - - if constexpr (undefined) - it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init - } - - return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); - } - - template - requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) - static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) - { - if (not image) - return false; - - auto in = std::to_array({ image }); - return barrier(cb, in); - } -}; - -#endif // _THIS_EXAMPLE_IES_HPP_ \ No newline at end of file diff --git a/50.IESProfileTest/app_resources/common.hlsl b/50.IESProfileTest/app_resources/common.hlsl deleted file mode 100644 index 9705c2282..000000000 --- a/50.IESProfileTest/app_resources/common.hlsl +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ -#define _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -#ifdef __HLSL_VERSION -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#endif // __HLSL_VERSION - -// -> TODO: use NBL_CONTEXPR or something -#ifndef UINT16_MAX -#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do -#endif // UINT16_MAX -#ifndef M_PI -#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do -#endif // M_PI - -#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do -#define QUANT_ERROR_ADMISSIBLE 1/1024 - -#define WORKGROUP_SIZE 256u -#define WORKGROUP_DIMENSION 16u -// <- + wipe whatever we already have - -// TODO: since NSC prebuilds into SPIRV - maybe could make it a CMake option with a default val -#define MAX_IES_IMAGES 6969 - -using namespace nbl::hlsl; - -struct PushConstants -{ - uint64_t hAnglesBDA; - uint64_t vAnglesBDA; - uint64_t dataBDA; - float64_t maxIValue; - - uint32_t hAnglesCount; - uint32_t vAnglesCount; - uint32_t dataCount; - - uint32_t mode; - uint32_t texIx; - float32_t zAngleDegreeRotation; - - uint32_t dummy; - - #ifdef __HLSL_VERSION - float64_t getHorizontalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(hAnglesBDA) + i).deref().load(); } - float64_t getVerticalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(vAnglesBDA) + i).deref().load(); } - float64_t getData(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(dataBDA) + i).deref().load(); } - #endif // __HLSL_VERSION -}; - -#ifdef __HLSL_VERSION -[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; -[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; -[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; -[[vk::push_constant]] struct PushConstants pc; -#endif // __HLSL_VERSION - -#endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESProfileTest/app_resources/compute.hlsl b/50.IESProfileTest/app_resources/compute.hlsl deleted file mode 100644 index cf22466fc..000000000 --- a/50.IESProfileTest/app_resources/compute.hlsl +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.hlsl" - -float32_t3 octahedronUVToDir(float64_t2 uv) -{ - float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); - float32_t2 absP = float32_t2(abs(position.x), abs(position.y)); - - position.z = 1.0 - absP.x - absP.y; - - if (position.z < 0.0) - { - position.x = sign(position.x) * (1.0 - absP.y); - position.y = sign(position.y) * (1.0 - absP.x); - } - - // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" - if (pc.zAngleDegreeRotation != 0.0) - { - float64_t rDegree = pc.zAngleDegreeRotation; - - const float32_t zAngleRadians = float32_t(rDegree * M_PI / 180.0); - const float64_t cosineV = cos(zAngleRadians); - const float64_t sineV = sin(zAngleRadians); - - position = float32_t3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); - } - - return normalize(position); -} - -//! Returns spherical coordinates with physics convention in radians -/* - https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg - Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle - in [-PI, PI] range -*/ - -float32_t2 sphericalDirToRadians(float32_t3 direction) -{ - float32_t theta = acos(clamp(direction.z / length(direction), -1.0, 1.0)); - float32_t phi = atan2(direction.y, direction.x); // TODO: check it - - return float32_t2(theta, phi); -} - -uint32_t implGetVUB(const float64_t angle) -{ - for (uint32_t i = 0; i < pc.vAnglesCount; ++i) - if (pc.getVerticalAngle(i) > angle) - return i; - - return pc.vAnglesCount; -} - -uint32_t implGetHUB(const float64_t angle) -{ - for (uint32_t i = 0; i < pc.hAnglesCount; ++i) - if (pc.getHorizontalAngle(i) > angle) - return i; - - return pc.hAnglesCount; -} - -uint32_t getVLB(const float64_t angle) -{ - return uint32_t(max(int(implGetVUB(angle)) - 1, 0)); -} - -uint32_t getHLB(const float64_t angle) -{ - return uint32_t(max(int(implGetHUB(angle)) - 1, 0)); -} - -uint32_t getVUB(const float64_t angle) -{ - return uint32_t(min(int(implGetVUB(angle)), int(pc.vAnglesCount) - 1)); -} - -uint32_t getHUB(const float64_t angle) -{ - return uint32_t(min(int(implGetHUB(angle)), int(pc.hAnglesCount) - 1)); -} - -float64_t getValue(uint32_t i, uint32_t j) -{ - return pc.getData(pc.vAnglesCount * i + j); -} - -// symmetry -#define ISOTROPIC 0u -#define QUAD_SYMETRIC 1u -#define HALF_SYMETRIC 2u -#define NO_LATERAL_SYMMET 3u - -uint32_t getSymmetry() // TODO: to reduce check time we could pass it with PCs -{ - if (pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake - return ISOTROPIC; - - const float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); - - if (hABack == 90) - return QUAD_SYMETRIC; - else if (hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here - return HALF_SYMETRIC; - else - return NO_LATERAL_SYMMET; -} - -float32_t wrapPhi(const float32_t phi, const uint32_t symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry -{ - switch (symmetry) - { - case ISOTROPIC: - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float32_t wrapPhi = abs(phi); //! first MIRROR - - if (wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: - { - if (phi < 0) - return phi + 2.0 * M_PI; - else - return phi; - } - } - - return 69; -} - -float64_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) -{ - const float64_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); - - float64_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); - float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); - - if (vAngle > vABack) - return 0.0; - - // bilinear interpolation - uint32_t j0 = getVLB(vAngle); - uint32_t j1 = getVUB(vAngle); - uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); - uint32_t i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); - - float64_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); - float64_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); - - float64_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; - float64_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; - - float64_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); - float64_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); - - return s0 * (1.0 - u) + s1 * u; -} - -//! Checks if (x,y) /in [0,PI] x [-PI,PI] product -/* - IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) - we represent horizontal range as [-180, 180] given spherical coordinates -*/ - -bool isWithinSCDomain(const float64_t2 p) -{ - const float64_t2 lb = float64_t2(0, -M_PI); - const float64_t2 ub = float64_t2(M_PI, M_PI); - - return all(lb <= p) && all(p <= ub); -} - -[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] -[shader("compute")] -void main(uint32_t3 ID : SV_DispatchThreadID) -{ - uint32_t2 destinationSize; - outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); - const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); - - const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); - const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); - - if (all(pixelCoordinates < destinationSize)) - { - const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); - const float32_t3 direction = octahedronUVToDir(uv); - const float32_t2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped - - const float32_t normD = length(direction); - float32_t2 mask; - - if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) - mask.x = 1.0; // pass - else - mask.x = 0; - - if (isWithinSCDomain(sphericalCoordinates)) - mask.y = 1.0; // pass - else - mask.y = 0; - - outIESCandelaImage[pc.texIx][pixelCoordinates] = float32_t(sampleI(sphericalCoordinates, getSymmetry()) / pc.maxIValue); - outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sphericalCoordinates; - outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = direction; - outPassTMask[pc.texIx][pixelCoordinates] = mask; - } -} \ No newline at end of file diff --git a/50.IESProfileTest/app_resources/imgui.opts.hlsl b/50.IESProfileTest/app_resources/imgui.opts.hlsl deleted file mode 100644 index 54f502b0f..000000000 --- a/50.IESProfileTest/app_resources/imgui.opts.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ -#define _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ - -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define NBL_TEXTURES_BINDING_IX 0u -#define NBL_SAMPLER_STATES_BINDING_IX 1u -#define NBL_TEXTURES_SET_IX 0u -#define NBL_SAMPLER_STATES_SET_IX 0u -#define NBL_TEXTURES_COUNT 5u -#define NBL_SAMPLERS_COUNT 2u - -#endif // _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ - diff --git a/50.IESProfileTest/app_resources/imgui.pixel.hlsl b/50.IESProfileTest/app_resources/imgui.pixel.hlsl deleted file mode 100644 index fe93c3a70..000000000 --- a/50.IESProfileTest/app_resources/imgui.pixel.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "imgui.opts.hlsl" -#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" diff --git a/50.IESProfileTest/app_resources/imgui.vertex.hlsl b/50.IESProfileTest/app_resources/imgui.vertex.hlsl deleted file mode 100644 index 2063db84b..000000000 --- a/50.IESProfileTest/app_resources/imgui.vertex.hlsl +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/50.IESProfileTest/app_resources/pixel.hlsl b/50.IESProfileTest/app_resources/pixel.hlsl deleted file mode 100644 index 5fe452b2d..000000000 --- a/50.IESProfileTest/app_resources/pixel.hlsl +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.hlsl" -#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" -using namespace nbl::hlsl::ext::FullScreenTriangle; - -float32_t2 iesDirToUv(float32_t3 dir) -{ - float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); - float32_t3 s = dir / sum; - - if (s.z < 0.0f) - s.xy = sign(s.xy) * (1.0f - abs(s.yx)); - - return s.xy * 0.5f + 0.5f; -} - -float32_t plot(float32_t cand, float32_t pct, float32_t bold) -{ - return smoothstep(pct-0.005*bold, pct, cand) - smoothstep( pct, pct+0.005*bold, cand); -} - -// vertical cut of IES (i.e. cut by plane x = 0) -float32_t f(float32_t2 uv) -{ - return inIESCandelaImage[pc.texIx].Sample(generalSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; -} - -[shader("pixel")] -float32_t4 PSMain(SVertexAttributes input) : SV_Target0 -{ - switch (pc.mode) - { - case 0: - { - float32_t2 ndc = input.uv * 2.f - 1.f; - float32_t dist = length(ndc) * 1.015625f; - float32_t p = plot(dist, 1.0f, 0.75f); - float32_t3 col = float32_t3(p, p, p); - - float32_t normalizedStrength = f(ndc); - if (dist < normalizedStrength) - col += float32_t3(1.0f, 0.0f, 0.0f); - - return float32_t4(col, 1.0f); - } - case 1: - return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); - case 2: - return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); - case 3: - return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); - default: - return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); - } -} diff --git a/50.IESProfileTest/app_resources/vertex.hlsl b/50.IESProfileTest/app_resources/vertex.hlsl deleted file mode 100644 index a0f565455..000000000 --- a/50.IESProfileTest/app_resources/vertex.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// small trick, temporary, we will have a separate rule for compiling this ext and embed into Nabla DLL -#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json deleted file mode 100644 index fbb833112..000000000 --- a/50.IESProfileTest/inputs.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "directories": [ - "mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" - ], - "files": [ - "mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", - "mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", - "mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", - "mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", - "mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" - ], - "gui": true, - "writeAssets": false -} \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp deleted file mode 100644 index 2d9b3e7c3..000000000 --- a/50.IESProfileTest/main.cpp +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "App.hpp" -NBL_MAIN_FUNC(IESViewer) \ No newline at end of file diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index f06e8fc14..b890ff501 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -28,17 +28,19 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; private: - smart_refctd_ptr graphicsPipeline; - smart_refctd_ptr computePipeline; - std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> descriptors; + smart_refctd_ptr m_graphicsPipeline; + smart_refctd_ptr m_computePipeline; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> m_descriptors; - bool running = true; - std::vector assets; - size_t activeAssetIx = 0; + bool m_running = true; + std::vector m_assets; + size_t m_activeAssetIx = 0; size_t m_realFrameIx = 0; smart_refctd_ptr m_semaphore; - std::array, device_base_t::MaxFramesInFlight> m_cmdBufs; + std::array, device_base_t::MaxFramesInFlight> m_cmdBuffers; + std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; + InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; @@ -50,7 +52,9 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp void processMouse(const IMouseEventChannel::range_t& events); void processKeyboard(const IKeyboardEventChannel::range_t& events); - smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name); + smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, + bitflag usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT, + bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); smart_refctd_ptr createBuffer(const core::vector& in, std::string name); void uiListener(); diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index 07a11f5e9..60458f841 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -12,7 +12,7 @@ void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { - auto& ies = assets[activeAssetIx]; + auto& ies = m_assets[m_activeAssetIx]; auto* profile = ies.getProfile(); auto impulse = ev.scrollEvent.verticalScroll * 0.02f; @@ -30,11 +30,11 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) { if (ev.keyCode == nbl::ui::EKC_UP_ARROW) - activeAssetIx = std::clamp(activeAssetIx + 1, 0, assets.size() - 1u); + m_activeAssetIx = std::clamp(m_activeAssetIx + 1, 0, m_assets.size() - 1u); else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) - activeAssetIx = std::clamp(activeAssetIx - 1, 0, assets.size() - 1u); + m_activeAssetIx = std::clamp(m_activeAssetIx - 1, 0, m_assets.size() - 1u); - auto& ies = assets[activeAssetIx]; + auto& ies = m_assets[m_activeAssetIx]; if (ev.keyCode == nbl::ui::EKC_C) ies.mode = IES::EM_CDC; @@ -48,7 +48,7 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e ies.mode = IES::EM_PASS_T_MASK; if (ev.keyCode == nbl::ui::EKC_Q) - running = false; + m_running = false; } } } \ No newline at end of file diff --git a/50.IESViewer/AppGPU.cpp b/50.IESViewer/AppGPU.cpp index aa13994d6..033c5d63b 100644 --- a/50.IESViewer/AppGPU.cpp +++ b/50.IESViewer/AppGPU.cpp @@ -4,7 +4,7 @@ #include "App.hpp" -core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, asset::E_FORMAT format, std::string name) +core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, bitflag usage, bitflag aspectFlags) { IGPUImage::SCreationParams imageParams{}; imageParams.type = IImage::E_TYPE::ET_2D; @@ -16,7 +16,7 @@ core::smart_refctd_ptr IESViewer::createImageView(const size_t wi imageParams.flags = IImage::ECF_NONE; imageParams.arrayLayers = 1u; imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - imageParams.usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT; + imageParams.usage = usage; auto image = m_device->createImage(std::move(imageParams)); image->setObjectDebugName(name.c_str()); @@ -43,7 +43,7 @@ core::smart_refctd_ptr IESViewer::createImageView(const size_t wi viewParams.subresourceRange.baseMipLevel = 0u; viewParams.subresourceRange.layerCount = 1u; viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); + viewParams.subresourceRange.aspectMask = aspectFlags; auto imageView = m_device->createImageView(std::move(viewParams)); diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 60fbb54b6..0d506d1fc 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -25,7 +25,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (!parser.parse(out, INPUT_JSON_FILE, media.string())) return false; - m_logger->log("Loading IES assets..", system::ILogger::ELL_INFO); + m_logger->log("Loading IES m_assets..", system::ILogger::ELL_INFO); { auto start = std::chrono::high_resolution_clock::now(); size_t loaded = {}, total = out.inputList.size(); @@ -38,7 +38,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (asset.getMetadata()) { - auto& ies = assets.emplace_back(); + auto& ies = m_assets.emplace_back(); ies.bundle = std::move(asset); ies.key = path(in).lexically_relative(media).string(); ++loaded; @@ -54,17 +54,17 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (not passed) { auto diff = std::to_string(total - loaded); - m_logger->log("Failed to load [%s/%s] IES assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); + m_logger->log("Failed to load [%s/%s] IES m_assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); } auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); - m_logger->log("Finished loading IES assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + m_logger->log("Finished loading IES m_assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); } m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); { auto start = std::chrono::high_resolution_clock::now(); - for (auto& ies : assets) + for (auto& ies : m_assets) { const auto* profile = ies.getProfile(); const auto resolution = profile->getOptimalIESResolution(); @@ -97,15 +97,15 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); auto assetBundle = m_assetMgr->getAsset(key, lp); - const auto assets = assetBundle.getContents(); + const auto m_assets = assetBundle.getContents(); - if (assets.empty()) + if (m_assets.empty()) { m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); return nullptr; } - auto spirvShader = IAsset::castDown(assets[0]); + auto spirvShader = IAsset::castDown(m_assets[0]); if (spirvShader) m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); @@ -152,7 +152,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) BINDING_SAMPLER(0u + 100u) }); - const uint32_t texturesCount = assets.size(); + const uint32_t texturesCount = m_assets.size(); smart_refctd_ptr generalSampler; { IGPUSampler::SParams params; @@ -200,7 +200,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) params[0].shader.shader = compute.get(); params[0].shader.entryPoint = "main"; - if (!m_device->createComputePipelines(nullptr, params, &computePipeline)) + if (!m_device->createComputePipelines(nullptr, params, &m_computePipeline)) return logFail("Failed to create compute pipeline!"); } @@ -234,30 +234,30 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .subpassIx = 0u }; - if (!m_device->createGraphicsPipelines(nullptr, params, &graphicsPipeline)) + if (!m_device->createGraphicsPipelines(nullptr, params, &m_graphicsPipeline)) return logFail("Failed to create graphics pipeline!"); } - const auto dscLayoutPtrs = graphicsPipeline->getLayout()->getDescriptorSetLayouts(); + const auto dscLayoutPtrs = m_graphicsPipeline->getLayout()->getDescriptorSetLayouts(); auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); - pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), descriptors.data()); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), m_descriptors.data()); { std::array, 4u + 1u> infos; - #define FILL_INFO(DESC, IX) \ +#define FILL_INFO(DESC, IX) \ { \ auto& info = infos[IX].emplace_back(); \ info.desc = DESC; \ info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ } - for (uint32_t i = 0; i < assets.size(); ++i) + for (uint32_t i = 0; i < m_assets.size(); ++i) { - auto& ies = assets[i]; + auto& ies = m_assets[i]; FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) + FILL_INFO(ies.views.spherical, 1u) + FILL_INFO(ies.views.direction, 2u) + FILL_INFO(ies.views.mask, 3u) } FILL_INFO(generalSampler, 4u); auto* samplerInfo = infos.back().data(); @@ -267,9 +267,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) for (uint32_t i = 0; i < 4u; ++i) { auto& write = writes[i]; - write.count = assets.size(); + write.count = m_assets.size(); write.info = infos[i].data(); - write.dstSet = descriptors[0u].get(); + write.dstSet = m_descriptors[0u].get(); write.arrayElement = 0u; write.binding = i; } @@ -284,7 +284,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto& write = writes.back(); write.count = 1u; write.info = samplerInfo; - write.dstSet = descriptors[0u].get(); + write.dstSet = m_descriptors[0u].get(); write.arrayElement = 0u; write.binding = 0u + 100u; @@ -294,6 +294,52 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } + // frame buffers + { + // TODO: I will create my own + auto renderpass = smart_refctd_ptr(static_cast(m_surface->getSwapchainResources())->getRenderpass()); + + for (uint32_t i = 0u; i < m_frameBuffers2D.size(); ++i) + { + auto& fb2D = m_frameBuffers2D[i]; + auto& fb3D = m_frameBuffers3D[i]; + auto ixs = std::to_string(i); + + // TODO: may actually change it, temporary hardcoding + constexpr auto WIDTH = 640, HEIGHT = 640; + + { + auto color = createImageView(WIDTH, HEIGHT, EF_R8G8B8A8_SRGB, "[2D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + fb2D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = nullptr, + .colorAttachments = &color.get(), + .width = WIDTH, + .height = HEIGHT + } } + ); + } + + { + auto color = createImageView(WIDTH, HEIGHT, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + auto depth = createImageView(WIDTH, HEIGHT, EF_D32_SFLOAT, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + + fb3D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = nullptr, + .colorAttachments = &color.get(), + .width = WIDTH, + .height = HEIGHT + } } + ); + } + } + } + // imGUI { auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -322,23 +368,38 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) ui.descriptor = make_smart_refctd_ptr(std::move(ds)); if (!ui.descriptor) return logFail("Failed to create the descriptor set"); + { - auto dummy = SubAllocatedDescriptorSet::invalid_value; - ui.descriptor->multi_allocate(0, 1, &dummy); - assert(dummy == ext::imgui::UI::FontAtlasTexId); + std::array addresses; + addresses.fill(SubAllocatedDescriptorSet::invalid_value); + ui.descriptor->multi_allocate(0, addresses.size(), addresses.data()); + + bool ok = true; + ok &= addresses.front() == ext::imgui::UI::FontAtlasTexId; + for (auto i = ext::imgui::UI::FontAtlasTexId; i < addresses.size(); ++i) + ok &= addresses[i] == i; + + assert(ok); + + std::array infos; + for (auto& it : infos) it.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + auto* ix = addresses.data(); + infos[*ix].desc = smart_refctd_ptr(imgui->getFontAtlasView()); ++ix; + for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; + for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; + + auto writes = std::to_array({ IGPUDescriptorSet::SWriteDescriptorSet{ + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = NBL_TEXTURES_BINDING_IX, + .arrayElement = 0u, + .count = infos.size(), + .info = infos.data() + }}); + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write the descriptor set"); } - IGPUDescriptorSet::SDescriptorInfo info = {}; - info.desc = smart_refctd_ptr(imgui->getFontAtlasView()); - info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - const IGPUDescriptorSet::SWriteDescriptorSet write = { - .dstSet = ui.descriptor->getDescriptorSet(), - .binding = 0u, - .arrayElement = ext::imgui::UI::FontAtlasTexId, - .count = 1, - .info = &info - }; - if (!m_device->updateDescriptorSets({ &write,1 }, {})) - return logFail("Failed to write the descriptor set"); } imgui->registerListener([this]() @@ -364,7 +425,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) }; // render loop command buffers - if (not createCommandBuffers(getGraphicsQueue(), m_cmdBufs, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) + if (not createCommandBuffers(getGraphicsQueue(), m_cmdBuffers, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) return false; // transient command buffer @@ -375,9 +436,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) return false; std::vector images; - for (uint32_t i = 0; i < assets.size(); ++i) + for (uint32_t i = 0; i < m_assets.size(); ++i) { - auto& ies = assets[i]; + auto& ies = m_assets[i]; images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 136d6d63b..4c730a2af 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -8,7 +8,10 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) { const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); + auto* const cb = m_cmdBuffers.data()[resourceIx].get(); + auto* const fb2D = m_frameBuffers2D[resourceIx].get(); + auto* const fb3D = m_frameBuffers3D[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -35,7 +38,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi ui.it->update(params); } - auto& ies = assets[activeAssetIx]; + auto& ies = m_assets[m_activeAssetIx]; PushConstants pc; { pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); @@ -51,7 +54,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi pc.zAngleDegreeRotation = ies.zDegree; pc.mode = ies.mode; - pc.texIx = activeAssetIx; + pc.texIx = m_activeAssetIx; } for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity @@ -64,15 +67,15 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } } - auto* const descriptor = descriptors[0].get(); + auto* const descriptor = m_descriptors[0].get(); auto* image = ies.getActiveImage(); // Compute { cb->beginDebugMarker("IES::compute"); IES::barrier(cb, image); - auto* layout = computePipeline->getLayout(); - cb->bindComputePipeline(computePipeline.get()); + auto* layout = m_computePipeline->getLayout(); + cb->bindComputePipeline(m_computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; @@ -82,39 +85,41 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi // Graphics { - cb->beginDebugMarker("IES::render"); + cb->beginDebugMarker("IES::graphics 2D plot"); IES::barrier(cb, image); + auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; + asset::SViewport viewport; { viewport.minDepth = 1.f; viewport.maxDepth = 0.f; viewport.x = 0u; viewport.y = 0u; - viewport.width = m_window->getWidth(); - viewport.height = m_window->getHeight(); + viewport.width = extent.width; + viewport.height = extent.height; } cb->setViewport(0u, 1u, &viewport); VkRect2D scissor = { .offset = { 0, 0 }, - .extent = { m_window->getWidth(), m_window->getHeight() }, + .extent = { extent.width, extent.height }, }; cb->setScissor(0u, 1u, &scissor); - const VkRect2D currentRenderArea = + VkRect2D currentRenderArea = { .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} + .extent = {extent.width,extent.height} }; - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; auto scRes = static_cast(m_surface->getSwapchainResources()); - const IGPUCommandBuffer::SRenderpassBeginInfo info = + IGPUCommandBuffer::SRenderpassBeginInfo info = { - .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .framebuffer = fb2D, .colorClearValues = &clearValue, .depthStencilClearValues = &depthValue, .renderArea = currentRenderArea @@ -122,23 +127,36 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); { - auto* layout = graphicsPipeline->getLayout(); - cb->bindGraphicsPipeline(graphicsPipeline.get()); + auto* layout = m_graphicsPipeline->getLayout(); + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + + cb->beginDebugMarker("IES::graphics ImGUI"); + + viewport.width = m_window->getWidth(); viewport.height = m_window->getHeight(); + scissor.extent = { m_window->getWidth(), m_window->getHeight() }; + cb->setScissor(0u, 1u, &scissor); + currentRenderArea.extent = { m_window->getWidth(),m_window->getHeight() }; + info.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + info.renderArea = currentRenderArea; + + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + auto* imgui = ui.it.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + const auto* ds = ui.descriptor->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, wait)) { - auto* imgui = ui.it.get(); - auto* pipeline = imgui->getPipeline(); - cb->bindGraphicsPipeline(pipeline); - const auto* ds = ui.descriptor->getDescriptorSet(); - cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); - const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - if (!imgui->render(cb, wait)) - { - m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); - return {}; - } + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; } } cb->endRenderPass(); diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index c4efc9ccf..b300af245 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -9,7 +9,14 @@ void IESViewer::uiListener() { - auto& ies = assets[activeAssetIx]; + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + + SImResourceInfo info; + // note to self: for 2D, for 3d ext::imgui::UI::FontAtlasTexId + device_base_t::MaxFramesInFlight + resourceIx + 1u + info.textureID = ext::imgui::UI::FontAtlasTexId + resourceIx + 1u; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + auto& ies = m_assets[m_activeAssetIx]; const auto name = path(ies.key).filename().string(); auto* profile = ies.getProfile(); const float lowerBound = (float)profile->getHoriAngles().front(); @@ -18,6 +25,7 @@ void IESViewer::uiListener() auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); const ImGuiViewport* vp = ImGui::GetMainViewport(); + { ImDrawList* fg = ImGui::GetForegroundDrawList(); float x = vp->Pos.x + 8.f; @@ -36,6 +44,34 @@ void IESViewer::uiListener() fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); } + { + const ImVec2 imageSize(640.f, 640.f); + const ImVec2 imageCenter( + vp->Pos.x + vp->Size.x * 0.5f, + vp->Pos.y + vp->Size.y * 0.5f + ); + + ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + + ImGuiWindowFlags imgFlags = + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse; + + if (ImGui::Begin("2D Plot", nullptr, imgFlags)) + { + ImGui::Image(info, imageSize); + } + ImGui::End(); + + ImGui::PopStyleVar(2); + } + { const float pad = 8.f; const float sliderW = 74.f; diff --git a/50.IESViewer/app_resources/imgui.opts.hlsl b/50.IESViewer/app_resources/imgui.opts.hlsl index 54f502b0f..fc5cf0fb0 100644 --- a/50.IESViewer/app_resources/imgui.opts.hlsl +++ b/50.IESViewer/app_resources/imgui.opts.hlsl @@ -9,7 +9,7 @@ #define NBL_SAMPLER_STATES_BINDING_IX 1u #define NBL_TEXTURES_SET_IX 0u #define NBL_SAMPLER_STATES_SET_IX 0u -#define NBL_TEXTURES_COUNT 5u +#define NBL_TEXTURES_COUNT 10u #define NBL_SAMPLERS_COUNT 2u #endif // _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp index 579ac030b..a082bca6d 100644 --- a/50.IESViewer/main.cpp +++ b/50.IESViewer/main.cpp @@ -4,15 +4,16 @@ #include "App.hpp" -#define APP_WINDOW_WIDTH 640 -#define APP_WINDOW_HEIGHT 640 +// TODO +#define APP_WINDOW_WIDTH 640*2u +#define APP_WINDOW_HEIGHT 640*2u #define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), device_base_t({ APP_WINDOW_WIDTH, APP_WINDOW_HEIGHT }, APP_DEPTH_BUFFER_FORMAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { - // empty + } NBL_MAIN_FUNC(IESViewer) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ad88dfa4..5ce756d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ if(NBL_BUILD_EXAMPLES) #add_subdirectory(43_SumAndCDFFilters) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) - add_subdirectory(50.IESProfileTest) + add_subdirectory(50.IESViewer) add_subdirectory(54_Transformations EXCLUDE_FROM_ALL) add_subdirectory(55_RGB18E7S3 EXCLUDE_FROM_ALL) add_subdirectory(61_UI) diff --git a/common/include/nbl/examples/examples.hpp b/common/include/nbl/examples/examples.hpp index 1450abc2a..134fe9b33 100644 --- a/common/include/nbl/examples/examples.hpp +++ b/common/include/nbl/examples/examples.hpp @@ -20,4 +20,15 @@ // cannot be in PCH because depens on definition of `this_example` for Example's builtins #include "nbl/examples/common/BuiltinResourcesApplication.hpp" +#define NBL_EXPOSE_NAMESPACES \ +using namespace nbl; \ +using namespace core; \ +using namespace hlsl; \ +using namespace system; \ +using namespace asset; \ +using namespace ui; \ +using namespace video; \ +using namespace scene; \ +using namespace nbl::examples; + #endif // _NBL_EXAMPLES_HPP_ \ No newline at end of file From 7d0b0db6159ee194c8d9a2f094a03f85fc0c7f8c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 29 Oct 2025 11:58:19 +0100 Subject: [PATCH 058/219] some room for 3d render, now need mesh sphere input and a shader --- 50.IESViewer/AppRender.cpp | 16 ++++++++++++++-- 50.IESViewer/AppUI.cpp | 38 +++++++++++++++++++++++++++++++++++--- 50.IESViewer/main.cpp | 4 ++-- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 4c730a2af..e4a133d66 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -85,8 +85,8 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi // Graphics { - cb->beginDebugMarker("IES::graphics 2D plot"); IES::barrier(cb, image); + cb->beginDebugMarker("IES::graphics 2D plot"); auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; @@ -116,7 +116,6 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; - auto scRes = static_cast(m_surface->getSwapchainResources()); IGPUCommandBuffer::SRenderpassBeginInfo info = { .framebuffer = fb2D, @@ -136,12 +135,25 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->endRenderPass(); cb->endDebugMarker(); + const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {0.f,1.f,0.f,1.f} }; + auto info3D = info; + info3D.colorClearValues = &d3clearValue; // tmp + info3D.framebuffer = fb3D; + cb->beginDebugMarker("IES::graphics 3D plot"); + cb->beginRenderPass(info3D, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + // dummy, tmp + } + cb->endRenderPass(); + cb->endDebugMarker(); + cb->beginDebugMarker("IES::graphics ImGUI"); viewport.width = m_window->getWidth(); viewport.height = m_window->getHeight(); scissor.extent = { m_window->getWidth(), m_window->getHeight() }; cb->setScissor(0u, 1u, &scissor); currentRenderArea.extent = { m_window->getWidth(),m_window->getHeight() }; + auto scRes = static_cast(m_surface->getSwapchainResources()); info.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); info.renderArea = currentRenderArea; diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index b300af245..40e9a364a 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -12,7 +12,6 @@ void IESViewer::uiListener() const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; SImResourceInfo info; - // note to self: for 2D, for 3d ext::imgui::UI::FontAtlasTexId + device_base_t::MaxFramesInFlight + resourceIx + 1u info.textureID = ext::imgui::UI::FontAtlasTexId + resourceIx + 1u; info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; @@ -25,7 +24,9 @@ void IESViewer::uiListener() auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); const ImGuiViewport* vp = ImGui::GetMainViewport(); + const ImVec2 imageSize(640.f, 640.f); + // 2D Plot { ImDrawList* fg = ImGui::GetForegroundDrawList(); float x = vp->Pos.x + 8.f; @@ -45,10 +46,9 @@ void IESViewer::uiListener() } { - const ImVec2 imageSize(640.f, 640.f); const ImVec2 imageCenter( vp->Pos.x + vp->Size.x * 0.5f, - vp->Pos.y + vp->Size.y * 0.5f + vp->Pos.y + vp->Size.y * 0.25f ); ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); @@ -138,4 +138,36 @@ void IESViewer::uiListener() } ies.zDegree = angle; + + // 3D plot + { + info.textureID += device_base_t::MaxFramesInFlight; + + { + const ImVec2 imageCenter( + vp->Pos.x + vp->Size.x * 0.5f, + vp->Pos.y + vp->Size.y * 0.75f + ); + + ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + + ImGuiWindowFlags imgFlags = + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse; + + if (ImGui::Begin("3D Plot", nullptr, imgFlags)) + { + ImGui::Image(info, imageSize); + } + ImGui::End(); + + ImGui::PopStyleVar(2); + } + } } \ No newline at end of file diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp index a082bca6d..d8c15de00 100644 --- a/50.IESViewer/main.cpp +++ b/50.IESViewer/main.cpp @@ -5,8 +5,8 @@ #include "App.hpp" // TODO -#define APP_WINDOW_WIDTH 640*2u -#define APP_WINDOW_HEIGHT 640*2u +#define APP_WINDOW_WIDTH 669*2u +#define APP_WINDOW_HEIGHT APP_WINDOW_WIDTH #define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) From 6d2c3d4f32383eaeb706bef30b47e68292e7f24f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 29 Oct 2025 15:11:36 +0100 Subject: [PATCH 059/219] new inject lambda for CGeometryCreatorScene, temporary use CSimpleDebugRenderer for debugging and render into frame buffer I will derive from CSimpleDebugRenderer and override shaders --- 50.IESViewer/App.hpp | 4 ++ 50.IESViewer/AppInit.cpp | 42 ++++++++++++++++++- 50.IESViewer/AppRender.cpp | 21 ++++++++-- .../geometry/CGeometryCreatorScene.hpp | 34 +++++++++------ 4 files changed, 84 insertions(+), 17 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index b890ff501..21d902583 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -41,6 +41,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp std::array, device_base_t::MaxFramesInFlight> m_cmdBuffers; std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; + smart_refctd_ptr m_scene; + smart_refctd_ptr m_renderer; // TODO: will need to derive from it + have my own pixel shader + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // TODO: orbit would be better + InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 0d506d1fc..3fb850391 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -340,9 +340,49 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // geometries for 3D scene + { + CGeometryCreatorScene::f_geometry_override_t injector = [](auto* creator, auto addGeometry) + { + addGeometry("Sphere", creator->createSphere(1.f, 32, 32)); + }; + + const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies, + .geometryOverride = injector + }, + // we want to use the vertex data through UTBs + CSimpleDebugRenderer::DefaultPolygonGeometryPatch + ); + + const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); + if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) + return logFail("Could not create 3D Plot Renderer!"); + + m_renderer->m_instances.resize(1); + m_renderer->m_instances[0].world = float32_t3x4( + float32_t4(1, 0, 0, 0), + float32_t4(0, 1, 0, 0), + float32_t4(0, 0, 1, 0) + ); + + core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); + core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); + const auto& params = m_frameBuffers3D.front()->getCreationParameters(); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(params.width) / float(params.height), 0.1, 10000); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + } + // imGUI { - auto scRes = static_cast(m_surface->getSwapchainResources()); ext::imgui::UI::SCreationParameters params = {}; params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index e4a133d66..c3b44bb26 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -23,8 +23,10 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi std::vector mouse{}; std::vector keyboard{}; } captured; - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + camera.endInputProcessing(nextPresentationTimestamp); const auto cursorPosition = m_window->getCursorControl()->getPosition(); ext::imgui::UI::SUpdateParameters params = @@ -135,14 +137,25 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->endRenderPass(); cb->endDebugMarker(); - const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {0.f,1.f,0.f,1.f} }; + const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {0.1f,0.1f,0.1f,1.f} }; auto info3D = info; info3D.colorClearValues = &d3clearValue; // tmp info3D.framebuffer = fb3D; cb->beginDebugMarker("IES::graphics 3D plot"); cb->beginRenderPass(info3D, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); { - // dummy, tmp + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data(); + m_renderer->render(cb, viewParams); } cb->endRenderPass(); cb->endDebugMarker(); diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp index 2993725a0..5212589c9 100644 --- a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -5,7 +5,6 @@ #include #include "nbl/asset/utils/CGeometryCreator.h" - namespace nbl::examples { @@ -17,13 +16,17 @@ class CGeometryCreatorScene : public core::IReferenceCounted using namespace nbl::asset; \ using namespace nbl::video public: - // + + using f_add_geometry_t = std::function&&)>; + using f_geometry_override_t = std::function; + struct SCreateParams { video::IQueue* transferQueue; video::IUtilities* utilities; system::ILogger* logger; std::span addtionalBufferOwnershipFamilies = {}; + f_geometry_override_t geometryOverride = nullptr; }; static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) { @@ -41,12 +44,11 @@ class CGeometryCreatorScene : public core::IReferenceCounted return nullptr; } - SInitParams init = {}; core::vector> geometries; // create out geometries { - auto addGeometry = [&init,&geometries](const std::string_view name, smart_refctd_ptr&& geom)->void + f_add_geometry_t addGeometry = [&init,&geometries](const auto name, auto&& geom)->void { init.geometryNames.emplace_back(name); geometries.push_back(std::move(geom)); @@ -63,13 +65,22 @@ class CGeometryCreatorScene : public core::IReferenceCounted ReferenceObjectCpu {.meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }, ReferenceObjectCpu {.meta = {.type = OT_ICOSPHERE, .name = "Icoshpere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) } */ - addGeometry("Cube",creator->createCube({1.f,1.f,1.f})); - addGeometry("Rectangle",creator->createRectangle({1.5f,3.f})); - addGeometry("Disk",creator->createDisk(2.f,30)); - addGeometry("Sphere", creator->createSphere(2, 16, 16)); - addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); - addGeometry("Cone", creator->createCone(2, 3, 10)); - addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); + + if (params.geometryOverride) + params.geometryOverride(creator.get(), addGeometry); + else + { + addGeometry("Cube", creator->createCube({ 1.f,1.f,1.f })); + addGeometry("Rectangle", creator->createRectangle({ 1.5f,3.f })); + addGeometry("Disk", creator->createDisk(2.f, 30)); + addGeometry("Sphere", creator->createSphere(2, 16, 16)); + addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); + addGeometry("Cone", creator->createCone(2, 3, 10)); + addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); + } + + if (geometries.empty()) + return nullptr; } init.geometries.reserve(init.geometryNames.size()); @@ -78,7 +89,6 @@ class CGeometryCreatorScene : public core::IReferenceCounted auto device = params.utilities->getLogicalDevice(); smart_refctd_ptr converter = CAssetConverter::create({.device=device}); - const auto transferFamily = params.transferQueue->getFamilyIndex(); struct SInputs : CAssetConverter::SInputs From 86a50c5a2b9b07aee676cc92d70b1910b4da46d4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 31 Oct 2025 12:38:37 +0100 Subject: [PATCH 060/219] inject new Grid, pass validation and convert to polygon to GPU (testing creation yet only) --- 50.IESViewer/AppInit.cpp | 6 +++++- 50.IESViewer/AppRender.cpp | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 3fb850391..275a56e2d 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -347,6 +347,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) CGeometryCreatorScene::f_geometry_override_t injector = [](auto* creator, auto addGeometry) { addGeometry("Sphere", creator->createSphere(1.f, 32, 32)); + + // testing, will use it soon + addGeometry("Grid", creator->createGrid({128u, 128u})); }; const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; @@ -358,11 +361,12 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies, .geometryOverride = injector }, - // we want to use the vertex data through UTBs + // we want to use the vertex data through UTBs CSimpleDebugRenderer::DefaultPolygonGeometryPatch ); const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) return logFail("Could not create 3D Plot Renderer!"); diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index c3b44bb26..2dfb1e2e1 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -140,6 +140,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {0.1f,0.1f,0.1f,1.f} }; auto info3D = info; info3D.colorClearValues = &d3clearValue; // tmp + info3D.depthStencilClearValues = &depthValue; info3D.framebuffer = fb3D; cb->beginDebugMarker("IES::graphics 3D plot"); cb->beginRenderPass(info3D, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); From d0156858bedf2b985316184a73ac0a889350fdc2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 3 Nov 2025 10:28:39 +0100 Subject: [PATCH 061/219] add Grid geometry to CGeometryCreatorScene, update CSimpleDebugRenderer to support BasicTriangleStrip, use in 09 example now ready to use with IES, need to pair optimal resolution with grid density --- .../examples/geometry/CGeometryCreatorScene.hpp | 1 + .../nbl/examples/geometry/CSimpleDebugRenderer.hpp | 9 +++++++++ .../src/nbl/examples/shaders/geometry/unified.hlsl | 14 ++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp index 5212589c9..fe846f76e 100644 --- a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -77,6 +77,7 @@ class CGeometryCreatorScene : public core::IReferenceCounted addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); addGeometry("Cone", creator->createCone(2, 3, 10)); addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); + addGeometry("Grid", creator->createGrid({ 32u, 32u })); } if (geometries.empty()) diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp index 9a9e5c966..77eba1d30 100644 --- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp +++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp @@ -168,6 +168,8 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted params[pipeline_e::BasicTriangleList].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; params[pipeline_e::BasicTriangleFan].vertexShader = {.shader=shader.get(),.entryPoint="BasicVS"}; params[pipeline_e::BasicTriangleFan].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; + params[pipeline_e::BasicTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "BasicVS" }; + params[pipeline_e::BasicTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "BasicFSSnake" }; params[pipeline_e::Cone].vertexShader = {.shader=shader.get(),.entryPoint="ConeVS"}; params[pipeline_e::Cone].fragmentShader = {.shader=shader.get(),.entryPoint="ConeFS"}; for (auto i=0; i(0.5f),1.f); } +[shader("pixel")] +float32_t4 BasicFSSnake(SInterpolants input, uint primID : SV_PrimitiveID) : SV_Target0 +{ + float3 N = normalize(pc.normalView < SPushConstants::DescriptorCount ? input.meta : reconstructGeometricNormal(input.meta)); + float3 base = (primID & 1u) ? float3(0.68,0.68,0.68) : float3(0.88,0.88,0.88); + + float nview = saturate(0.5 + 0.5 * N.z); + float grad = pow(nview, 0.5); + float rim = pow(1.0 - nview, 2.0) * 0.25; + + float3 col = base * (0.2 + 0.8 * grad) + rim; + return float4(col, 1.0); +} + // TODO: do smooth normals on the cone [shader("vertex")] SInterpolants ConeVS(uint32_t VertexIndex : SV_VertexID) From 462bb549f642fe0c02da384416766e8cc049adab Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 3 Nov 2025 16:29:43 +0100 Subject: [PATCH 062/219] sphere from grid geometry, add CSimpleIESRenderer.hpp and IES 3D (debug yet) shaders --- 50.IESViewer/App.hpp | 3 +- 50.IESViewer/AppInit.cpp | 11 +- 50.IESViewer/AppRender.cpp | 7 +- 50.IESViewer/CMakeLists.txt | 9 + 50.IESViewer/CSimpleIESRenderer.hpp | 413 ++++++++++++++++++++ 50.IESViewer/app_resources/ies.pcs.hlsl | 39 ++ 50.IESViewer/app_resources/ies.unified.hlsl | 75 ++++ 7 files changed, 548 insertions(+), 9 deletions(-) create mode 100644 50.IESViewer/CSimpleIESRenderer.hpp create mode 100644 50.IESViewer/app_resources/ies.pcs.hlsl create mode 100644 50.IESViewer/app_resources/ies.unified.hlsl diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index 21d902583..ec6350b25 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -10,6 +10,7 @@ #include "nbl/ext/ImGui/ImGui.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "IES.hpp" +#include "CSimpleIESRenderer.hpp" NBL_EXPOSE_NAMESPACES @@ -42,7 +43,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; smart_refctd_ptr m_scene; - smart_refctd_ptr m_renderer; // TODO: will need to derive from it + have my own pixel shader + smart_refctd_ptr m_renderer; Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // TODO: orbit would be better InputSystem::ChannelReader mouse; diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 275a56e2d..b94eba86f 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -119,12 +119,13 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (!(SHADER = createShader.template operator()() )) return false; m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); - smart_refctd_ptr compute, pixel, vertex, imguiVertex, imguiPixel; + smart_refctd_ptr compute, pixel, vertex, ies, imguiVertex, imguiPixel; { auto start = std::chrono::high_resolution_clock::now(); CREATE_SHADER(compute, "compute") CREATE_SHADER(pixel, "pixel") CREATE_SHADER(vertex, "vertex") + CREATE_SHADER(ies, "ies.unified") CREATE_SHADER(imguiVertex, "imgui.vertex") CREATE_SHADER(imguiPixel, "imgui.pixel") auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); @@ -346,9 +347,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { CGeometryCreatorScene::f_geometry_override_t injector = [](auto* creator, auto addGeometry) { - addGeometry("Sphere", creator->createSphere(1.f, 32, 32)); - - // testing, will use it soon + // TODO: un-hardcode and per IES, pair optimal resolution addGeometry("Grid", creator->createGrid({128u, 128u})); }; @@ -362,12 +361,12 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .geometryOverride = injector }, // we want to use the vertex data through UTBs - CSimpleDebugRenderer::DefaultPolygonGeometryPatch + CSimpleIESRenderer::DefaultPolygonGeometryPatch ); const auto& geometries = m_scene->getInitParams().geometries; - m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); + m_renderer = CSimpleIESRenderer::create(ies, scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) return logFail("Could not create 3D Plot Renderer!"); diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 2dfb1e2e1..6f0f14956 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -152,11 +152,14 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); } - const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // TODO: un-hardcode + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 1.f, .resX = 128u, .resY = 128u }); // tear down scene every frame m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data(); - m_renderer->render(cb, viewParams); + m_renderer->render(cb, viewParams, iesParams); } cb->endRenderPass(); cb->endDebugMarker(); diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt index 7de094510..fc6226b91 100644 --- a/50.IESViewer/CMakeLists.txt +++ b/50.IESViewer/CMakeLists.txt @@ -3,6 +3,7 @@ set(SRCs AppInit.cpp AppRender.cpp AppGPU.cpp AppUI.cpp AppEvent.cpp AppInputParser.cpp App.hpp AppInputParser.hpp IES.cpp IES.hpp + CSimpleIESRenderer.hpp inputs.json ) @@ -24,10 +25,14 @@ set(DEPENDS app_resources/imgui.vertex.hlsl app_resources/imgui.pixel.hlsl app_resources/imgui.opts.hlsl + app_resources/ies.unified.hlsl + app_resources/ies.pcs.hlsl ) target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +# TODO: have only 2 inputs "ies.unified" & "imgui.unified" + one header for PCs + set(SM 6_8) set(JSON [=[ [ @@ -43,6 +48,10 @@ set(JSON [=[ "INPUT": "app_resources/vertex.hlsl", "KEY": "vertex" }, + { + "INPUT": "app_resources/ies.unified.hlsl", + "KEY": "ies.unified" + }, { "INPUT": "app_resources/imgui.vertex.hlsl", "KEY": "imgui.vertex" diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp new file mode 100644 index 000000000..ed82f640d --- /dev/null +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -0,0 +1,413 @@ +#ifndef _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ +#define _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ + +// NOTE: this is CSimpleDebugRenderer with dirty updates, not meant to be used outside the example + +#include "nbl/examples/examples.hpp" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "app_resources/ies.pcs.hlsl" + +namespace nbl::examples +{ + +class CSimpleIESRenderer final : public core::IReferenceCounted +{ +#define EXPOSE_NABLA_NAMESPACES \ + using namespace nbl::core; \ + using namespace nbl::system; \ + using namespace nbl::asset; \ + using namespace nbl::video + + public: + // + constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0; + // + struct SViewParams + { + inline SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj) + { + view = _view; + viewProj = _viewProj; + using namespace nbl::hlsl; + normal = transpose(inverse(float32_t3x3(view))); + } + + inline auto computeForInstance(hlsl::float32_t3x4 world) const + { + using namespace nbl::hlsl; + hlsl::examples::ies::SInstanceMatrices retval = { + .worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj),float64_t3x4(world))) + }; + const auto sub3x3 = mul(float64_t3x3(viewProj),float64_t3x3(world)); + retval.normal = float32_t3x3(transpose(inverse(sub3x3))); + return retval; + } + + hlsl::float32_t3x4 view; + hlsl::float32_t4x4 viewProj; + hlsl::float32_t3x3 normal; + }; + + struct SIESParams + { + hlsl::float32_t radius; + uint32_t resX : 16; + uint32_t resY : 16; + }; + // + struct SPackedGeometry + { + core::smart_refctd_ptr pipeline = {}; + asset::SBufferBinding indexBuffer = {}; + uint32_t elementCount = 0; + // indices into the descriptor set + constexpr static inline auto MissingView = hlsl::examples::ies::SPushConstants::DescriptorCount; + uint16_t positionView = MissingView; + uint16_t normalView = MissingView; + asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN; + }; + // + struct SInstance + { + using SPushConstants = hlsl::examples::ies::SPushConstants; + inline SPushConstants computePushConstants(const SViewParams& viewParams, const SIESParams& iesParams) const + { + using namespace hlsl; + return { + .matrices = viewParams.computeForInstance(world), + .positionView = packedGeo->positionView, + .normalView = packedGeo->normalView, + .resX = iesParams.resX, + .resY = iesParams.resY, + .radius = iesParams.radius + }; + } + + hlsl::float32_t3x4 world; + const SPackedGeometry* packedGeo; + }; + + // + constexpr static inline auto DefaultPolygonGeometryPatch = []()->video::CAssetConverter::patch_t + { + // we want to use the vertex data through UTBs + using usage_f = video::IGPUBuffer::E_USAGE_FLAGS; + video::CAssetConverter::patch_t patch = {}; + patch.positionBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + patch.indexBufferUsages = usage_f::EUF_INDEX_BUFFER_BIT; + patch.otherBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + return patch; + }(); + + // + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, video::IGPURenderpass* renderpass, const uint32_t subpassIX) + { + EXPOSE_NABLA_NAMESPACES; + + if (!renderpass) + return nullptr; + auto device = const_cast(renderpass->getOriginDevice()); + auto logger = device->getLogger(); + + if (not precompiled) + return nullptr; + smart_refctd_ptr shader = precompiled; + + SInitParams init; + + // create descriptor set + { + // create Descriptor Set Layout + smart_refctd_ptr dsLayout; + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + const IGPUDescriptorSetLayout::SBinding bindings[] = + { + { + .binding = VertexAttrubUTBDescBinding, + .type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + // need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable + .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT|binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT |binding_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX|IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = SPackedGeometry::MissingView + } + }; + dsLayout = device->createDescriptorSetLayout(bindings); + if (!dsLayout) + { + logger->log("Could not create descriptor set layout!",ILogger::ELL_ERROR); + return nullptr; + } + } + + // create Descriptor Set + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT,{&dsLayout.get(),1}); + auto ds = pool->createDescriptorSet(std::move(dsLayout)); + if (!ds) + { + logger->log("Could not descriptor set!",ILogger::ELL_ERROR); + return nullptr; + } + init.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + } + + // create pipeline layout + const SPushConstantRange ranges[] = {{ + .stageFlags = hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(SInstance::SPushConstants), + }}; + init.layout = device->createPipelineLayout(ranges,smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); + + // create pipelines + using pipeline_e = SInitParams::PipelineType; + { + IGPUGraphicsPipeline::SCreationParams params[pipeline_e::Count] = {}; + params[pipeline_e::SphereTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; + params[pipeline_e::SphereTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "SphereFS" }; + for (auto i=0; i(i); + switch (type) + { + case pipeline_e::SphereTriangleStrip: + primitiveAssembly.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_STRIP; + break; + default: + assert(false); + break; + } + primitiveAssembly.primitiveRestartEnable = false; + primitiveAssembly.tessPatchVertCount = 3; + rasterization.faceCullingMode = EFCM_NONE; + params[i].cached.subpassIx = subpassIX; + params[i].renderpass = renderpass; + } + if (!device->createGraphicsPipelines(nullptr,params,init.pipelines)) + { + logger->log("Could not create Graphics Pipelines!",ILogger::ELL_ERROR); + return nullptr; + } + } + + return smart_refctd_ptr(new CSimpleIESRenderer(std::move(init)),dont_grab); + } + + // + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span geometries) + { + auto retval = create(precompiled,renderpass,subpassIX); + if (retval) + retval->addGeometries(geometries); + return retval; + } + + // + struct SInitParams + { + enum PipelineType : uint8_t + { + SphereTriangleStrip, + // TODO: I would also like to project onto cube in which a sphere is put + Count + }; + + core::smart_refctd_ptr subAllocDS; + core::smart_refctd_ptr layout; + core::smart_refctd_ptr pipelines[PipelineType::Count]; + }; + inline const SInitParams& getInitParams() const {return m_params;} + + // + inline bool addGeometries(const std::span geometries) + { + EXPOSE_NABLA_NAMESPACES; + if (geometries.empty()) + return false; + auto device = const_cast(m_params.layout->getOriginDevice()); + + core::vector writes; + core::vector infos; + bool anyFailed = false; + auto allocateUTB = [&](const IGeometry::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) + { + if (!view) + return SPackedGeometry::MissingView; + auto index = SubAllocatedDescriptorSet::invalid_value; + if (m_params.subAllocDS->multi_allocate(VertexAttrubUTBDescBinding,1,&index)!=0) + { + anyFailed = true; + return SPackedGeometry::MissingView; + } + const auto infosOffset = infos.size(); + infos.emplace_back().desc = device->createBufferView(view.src,view.composed.format); + writes.emplace_back() = { + .dstSet = m_params.subAllocDS->getDescriptorSet(), + .binding = VertexAttrubUTBDescBinding, + .arrayElement = index, + .count = 1, + .info = reinterpret_cast(infosOffset) + }; + return index; + }; + if (anyFailed) + device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!",system::ILogger::ELL_ERROR); + + auto sizeToSet = m_geoms.size(); + auto resetGeoms = core::makeRAIIExiter([&]()->void + { + for (auto& write : writes) + immediateDealloc(write.arrayElement); + m_geoms.resize(sizeToSet); + } + ); + for (const auto geom : geometries) + { + // could also check device origin on all buffers + if (!geom->valid()) + return false; + auto& out = m_geoms.emplace_back(); + using pipeline_e = SInitParams::PipelineType; + switch (geom->getIndexingCallback()->knownTopology()) + { + case E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_STRIP: + out.pipeline = m_params.pipelines[pipeline_e::SphereTriangleStrip]; + break; + default: + assert(false); + break; + } + if (const auto& view=geom->getIndexView(); view) + { + out.indexBuffer.offset = view.src.offset; + out.indexBuffer.buffer = view.src.buffer; + switch (view.composed.format) + { + case E_FORMAT::EF_R16_UINT: + out.indexType = EIT_16BIT; + break; + case E_FORMAT::EF_R32_UINT: + out.indexType = EIT_32BIT; + break; + default: + return false; + } + } + out.elementCount = geom->getVertexReferenceCount(); + out.positionView = allocateUTB(geom->getPositionView()); + out.normalView = allocateUTB(geom->getNormalView()); + } + + // no geometry + if (infos.empty()) + return false; + + // unbase our pointers + for (auto& write : writes) + write.info = infos.data()+reinterpret_cast(write.info); + if (!device->updateDescriptorSets(writes,{})) + return false; + + // retain + writes.clear(); + sizeToSet = m_geoms.size(); + return true; + } + + // + inline void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info) + { + EXPOSE_NABLA_NAMESPACES; + if (ix>=m_geoms.size()) + return; + + core::vector deferredFree; + deferredFree.reserve(3); + auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void + { + if (index>=SPackedGeometry::MissingView) + return; + if (info.semaphore) + deferredFree.push_back(index); + else + immediateDealloc(index); + }; + auto geo = m_geoms.begin() + ix; + deallocate(geo->positionView); + deallocate(geo->normalView); + m_geoms.erase(geo); + + if (deferredFree.empty()) + return; + m_params.subAllocDS->multi_deallocate(VertexAttrubUTBDescBinding,deferredFree.size(),deferredFree.data(),info); + } + + // + inline void clearGeometries(const video::ISemaphore::SWaitInfo& info) + { + // back to front to avoid O(n^2) resize + while (!m_geoms.empty()) + removeGeometry(m_geoms.size()-1,info); + } + + // + inline const auto& getGeometries() const {return m_geoms;} + inline auto& getGeometry(const uint32_t ix) {return m_geoms[ix];} + + // + inline void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams, const SIESParams& iesParams) const + { + EXPOSE_NABLA_NAMESPACES; + + cmdbuf->beginDebugMarker("CSimpleIESRenderer::render"); + + const auto* layout = m_params.layout.get(); + const auto ds = m_params.subAllocDS->getDescriptorSet(); + cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS,layout,0,1,&ds); + + for (const auto& instance : m_instances) + { + const auto* geo = instance.packedGeo; + cmdbuf->bindGraphicsPipeline(geo->pipeline.get()); + const auto pc = instance.computePushConstants(viewParams, iesParams); + cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(pc),&pc); + if (geo->indexBuffer) + { + cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType); + cmdbuf->drawIndexed(geo->elementCount,1,0,0,0); + } + else + cmdbuf->draw(geo->elementCount,1,0,0); + } + cmdbuf->endDebugMarker(); + } + + core::vector m_instances; + + protected: + inline CSimpleIESRenderer(SInitParams&& _params) : m_params(std::move(_params)) {} + inline ~CSimpleIESRenderer() + { + // clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed + const_cast(m_params.layout->getOriginDevice())->waitIdle(); + clearGeometries({}); + } + + inline void immediateDealloc(video::SubAllocatedDescriptorSet::value_type index) + { + video::IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + m_params.subAllocDS->multi_deallocate(dummy,VertexAttrubUTBDescBinding,1,&index); + } + + SInitParams m_params; + core::vector m_geoms; +#undef EXPOSE_NABLA_NAMESPACES +}; + +} +#endif // _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ \ No newline at end of file diff --git a/50.IESViewer/app_resources/ies.pcs.hlsl b/50.IESViewer/app_resources/ies.pcs.hlsl new file mode 100644 index 000000000..76356ad24 --- /dev/null +++ b/50.IESViewer/app_resources/ies.pcs.hlsl @@ -0,0 +1,39 @@ +#ifndef _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ +#define _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ + + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace examples +{ +namespace ies +{ + +struct SInstanceMatrices +{ + float32_t4x4 worldViewProj; + float32_t3x3 normal; +}; + +struct SPushConstants +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; + + SInstanceMatrices matrices; + uint32_t positionView : 16; + uint32_t normalView : 16; + uint32_t resX : 16; + uint32_t resY : 16; + float32_t radius; +}; + +} +} +} +} +#endif // _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl new file mode 100644 index 000000000..d4a824779 --- /dev/null +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -0,0 +1,75 @@ +#include "ies.pcs.hlsl" +using namespace nbl::hlsl; +using namespace nbl::hlsl::examples::ies; + +[[vk::binding(0)]] Buffer utbs[SPushConstants::DescriptorCount]; +[[vk::push_constant]] SPushConstants pc; + +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" + +struct SInterpolants +{ + float32_t4 ndc : SV_Position; + float32_t3 meta : COLOR1; + float32_t2 uv : TEXCOORD0; + nointerpolation uint triParity : TEXCOORD15; +}; + +// TODO: all of that for debugging currently, now use IES and project emission onto sphere +// later onto cube I will close my sphere into + +static float32_t3 latLongDir(float32_t2 uv) +{ + const float32_t phi = 6.28318530718f * uv.x; + const float32_t th = 3.14159265359f * uv.y; + const float32_t s = sin(th), c = cos(th); + return float32_t3(s * cos(phi), c, s * sin(phi)); +} + +[shader("vertex")] +SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) +{ + const uint32_t W = pc.resX, H = pc.resY; + const uint32_t i = VertexIndex % W, j = VertexIndex / W; + + // for sphere geometry created from our grid we need to make sure the surface is closed, aligned at U/V edges + const float32_t2 uv = float32_t2( + (float32_t(i)) / float32_t(W), + (float32_t(j)) / float32_t(H) + ); + const float32_t vPos = (j == 0u) ? 0.0f : (j == H - 1u) ? 1.0f : uv.y; + const float32_t uPos = (i == W - 1u) ? 1.0f : uv.x; + const float32_t2 uvPos = float32_t2(uPos, vPos); + + const float32_t3 dir = latLongDir(uvPos); + const float32_t3 pos = pc.radius * dir; + + SInterpolants o; + o.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj, pos); + o.meta = mul(pc.matrices.normal, dir); + o.triParity = (VertexIndex & 1u); + + // but we want to sample centers + o.uv = float32_t2( + (float32_t(i) + 0.5f) / float32_t(W), + (float32_t(j) + 0.5f) / float32_t(H) + ); + return o; +} + +[shader("pixel")] +float32_t4 SphereFS(SInterpolants input) : SV_Target0 +{ + const float32_t2 uv = input.uv; + const int32_t2 cell = int32_t2(floor(uv * float32_t2(pc.resX, pc.resY))); + const int parity = (cell.x + cell.y) & 1; + const float32_t3 base = parity == 0 ? float32_t3(0.88f,0.88f,0.88f) : float32_t3(0.68f,0.68f,0.68f); + + float32_t3 N = normalize(input.meta); + float32_t nview = saturate(0.5f + 0.5f * N.z); + float32_t grad = pow(nview, 0.5f); + float32_t rim = pow(1.0f - nview, 2.0f) * 0.25f; + + float32_t3 col = base * (0.2f + 0.8f * grad) + rim; + return float32_t4(col, 1.0f); +} \ No newline at end of file From 4cd5f027eabdf88f84e16d47f8fdc6acdd1d36b4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 5 Nov 2025 15:06:39 +0100 Subject: [PATCH 063/219] IES Lat Plot --- 50.IESViewer/App.hpp | 6 +++ 50.IESViewer/AppInit.cpp | 29 +++++++++--- 50.IESViewer/AppRender.cpp | 33 +++++++++---- 50.IESViewer/CSimpleIESRenderer.hpp | 21 +++++---- 50.IESViewer/app_resources/ies.pcs.hlsl | 1 + 50.IESViewer/app_resources/ies.unified.hlsl | 52 ++++++++++----------- 50.IESViewer/main.cpp | 5 ++ 7 files changed, 97 insertions(+), 50 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index ec6350b25..9f750f02e 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -12,6 +12,9 @@ #include "IES.hpp" #include "CSimpleIESRenderer.hpp" +// 3D plot only, full window render and no imgui +// #define DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + NBL_EXPOSE_NAMESPACES class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication @@ -40,7 +43,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp size_t m_realFrameIx = 0; smart_refctd_ptr m_semaphore; std::array, device_base_t::MaxFramesInFlight> m_cmdBuffers; + +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; +#endif smart_refctd_ptr m_scene; smart_refctd_ptr m_renderer; diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index b94eba86f..07694cb27 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -295,6 +295,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY // frame buffers { // TODO: I will create my own @@ -325,13 +326,13 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { auto color = createImageView(WIDTH, HEIGHT, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); - auto depth = createImageView(WIDTH, HEIGHT, EF_D32_SFLOAT, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + auto depth = createImageView(WIDTH, HEIGHT, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); fb3D = m_device->createFramebuffer ( { { .renderpass = renderpass, - .depthStencilAttachments = nullptr, + .depthStencilAttachments = &depth.get(), .colorAttachments = &color.get(), .width = WIDTH, .height = HEIGHT @@ -340,15 +341,21 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } } +#endif auto scRes = static_cast(m_surface->getSwapchainResources()); // geometries for 3D scene { - CGeometryCreatorScene::f_geometry_override_t injector = [](auto* creator, auto addGeometry) + CGeometryCreatorScene::f_geometry_override_t injector = [&](auto* creator, auto addGeometry) { - // TODO: un-hardcode and per IES, pair optimal resolution - addGeometry("Grid", creator->createGrid({128u, 128u})); + for (auto i = 0u; i < m_assets.size(); ++i) + { + auto& ies = m_assets[i]; + auto resolution = ies.getProfile()->getOptimalIESResolution(); + auto name = "Grid " + std::to_string(i); + addGeometry(name.c_str(), creator->createGrid({ resolution.x, resolution.y })); + } }; const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; @@ -360,13 +367,12 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies, .geometryOverride = injector }, - // we want to use the vertex data through UTBs CSimpleIESRenderer::DefaultPolygonGeometryPatch ); const auto& geometries = m_scene->getInitParams().geometries; - m_renderer = CSimpleIESRenderer::create(ies, scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); + m_renderer = CSimpleIESRenderer::create(ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) return logFail("Could not create 3D Plot Renderer!"); @@ -379,11 +385,17 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); + +#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_window->getWidth()) / float(m_window->getHeight()), 0.1, 10000); +#else const auto& params = m_frameBuffers3D.front()->getCreationParameters(); matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(params.width) / float(params.height), 0.1, 10000); +#endif camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY // imGUI { ext::imgui::UI::SCreationParameters params = {}; @@ -429,6 +441,8 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto* ix = addresses.data(); infos[*ix].desc = smart_refctd_ptr(imgui->getFontAtlasView()); ++ix; + + for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; @@ -450,6 +464,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) uiListener(); }); } +#endif m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 6f0f14956..731d07b83 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -9,8 +9,16 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi { const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; auto* const cb = m_cmdBuffers.data()[resourceIx].get(); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + +#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + IGPUFramebuffer* const fb2D = nullptr; + auto* const fb3D = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); +#else auto* const fb2D = m_frameBuffers2D[resourceIx].get(); auto* const fb3D = m_frameBuffers3D[resourceIx].get(); +#endif cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -37,18 +45,19 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi .keyboardEvents = captured.keyboard }; +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY ui.it->update(params); +#endif } auto& ies = m_assets[m_activeAssetIx]; + const auto* profile = ies.getProfile(); PushConstants pc; { pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); pc.dataBDA = ies.buffers.data->getDeviceAddress(); - const auto* profile = ies.getProfile(); - pc.maxIValue = profile->getMaxCandelaValue(); pc.vAnglesCount = profile->getVertAngles().size(); pc.hAnglesCount = profile->getHoriAngles().size(); @@ -88,9 +97,12 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi // Graphics { IES::barrier(cb, image); - cb->beginDebugMarker("IES::graphics 2D plot"); +#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + asset::VkExtent3D extent = { m_window->getWidth(), m_window->getHeight() }; +#else auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; +#endif asset::SViewport viewport; { @@ -126,6 +138,8 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi .renderArea = currentRenderArea }; +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + cb->beginDebugMarker("IES::graphics 2D plot"); cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); { auto* layout = m_graphicsPipeline->getLayout(); @@ -136,8 +150,9 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } cb->endRenderPass(); cb->endDebugMarker(); +#endif - const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {0.1f,0.1f,0.1f,1.f} }; + const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; auto info3D = info; info3D.colorClearValues = &d3clearValue; // tmp info3D.depthStencilClearValues = &depthValue; @@ -154,23 +169,24 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - // TODO: un-hardcode - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 1.f, .resX = 128u, .resY = 128u }); + auto resolution = profile->getOptimalIESResolution(); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .resX = resolution.x, .resY = resolution.y, .ds = m_descriptors[0u].get(), .texID = (uint32_t)m_activeAssetIx }); // tear down scene every frame - m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data(); + m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; m_renderer->render(cb, viewParams, iesParams); } cb->endRenderPass(); cb->endDebugMarker(); +#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY cb->beginDebugMarker("IES::graphics ImGUI"); viewport.width = m_window->getWidth(); viewport.height = m_window->getHeight(); scissor.extent = { m_window->getWidth(), m_window->getHeight() }; cb->setScissor(0u, 1u, &scissor); currentRenderArea.extent = { m_window->getWidth(),m_window->getHeight() }; - auto scRes = static_cast(m_surface->getSwapchainResources()); + info.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); info.renderArea = currentRenderArea; @@ -190,6 +206,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } cb->endRenderPass(); cb->endDebugMarker(); +#endif cb->end(); } diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index ed82f640d..f93218e91 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -50,9 +50,11 @@ class CSimpleIESRenderer final : public core::IReferenceCounted struct SIESParams { - hlsl::float32_t radius; + hlsl::float32_t radius = 1.f; uint32_t resX : 16; uint32_t resY : 16; + IGPUDescriptorSet* ds = nullptr; + uint32_t texID; }; // struct SPackedGeometry @@ -79,6 +81,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted .normalView = packedGeo->normalView, .resX = iesParams.resX, .resY = iesParams.resY, + .texID = iesParams.texID, .radius = iesParams.radius }; } @@ -100,7 +103,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted }(); // - static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, video::IGPURenderpass* renderpass, const uint32_t subpassIX) + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, core::smart_refctd_ptr iesDSLayout, video::IGPURenderpass* renderpass, const uint32_t subpassIX) { EXPOSE_NABLA_NAMESPACES; @@ -157,7 +160,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted .offset = 0, .size = sizeof(SInstance::SPushConstants), }}; - init.layout = device->createPipelineLayout(ranges,smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); + init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr(iesDSLayout), smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); // create pipelines using pipeline_e = SInitParams::PipelineType; @@ -183,8 +186,9 @@ class CSimpleIESRenderer final : public core::IReferenceCounted break; } primitiveAssembly.primitiveRestartEnable = false; - primitiveAssembly.tessPatchVertCount = 3; rasterization.faceCullingMode = EFCM_NONE; + rasterization.depthWriteEnable = true; + rasterization.depthCompareOp = ECO_GREATER; params[i].cached.subpassIx = subpassIX; params[i].renderpass = renderpass; } @@ -199,9 +203,9 @@ class CSimpleIESRenderer final : public core::IReferenceCounted } // - static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span geometries) + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, core::smart_refctd_ptr iesDSLayout, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span geometries) { - auto retval = create(precompiled,renderpass,subpassIX); + auto retval = create(precompiled, iesDSLayout, renderpass, subpassIX); if (retval) retval->addGeometries(geometries); return retval; @@ -367,8 +371,9 @@ class CSimpleIESRenderer final : public core::IReferenceCounted cmdbuf->beginDebugMarker("CSimpleIESRenderer::render"); const auto* layout = m_params.layout.get(); - const auto ds = m_params.subAllocDS->getDescriptorSet(); - cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS,layout,0,1,&ds); + + IGPUDescriptorSet* descriptors[] = { iesParams.ds, m_params.subAllocDS->getDescriptorSet() }; + cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS,layout,0,2, descriptors); for (const auto& instance : m_instances) { diff --git a/50.IESViewer/app_resources/ies.pcs.hlsl b/50.IESViewer/app_resources/ies.pcs.hlsl index 76356ad24..40b563731 100644 --- a/50.IESViewer/app_resources/ies.pcs.hlsl +++ b/50.IESViewer/app_resources/ies.pcs.hlsl @@ -29,6 +29,7 @@ struct SPushConstants uint32_t normalView : 16; uint32_t resX : 16; uint32_t resY : 16; + uint32_t texID; float32_t radius; }; diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index d4a824779..70410e15a 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -2,7 +2,12 @@ using namespace nbl::hlsl; using namespace nbl::hlsl::examples::ies; -[[vk::binding(0)]] Buffer utbs[SPushConstants::DescriptorCount]; +// TODO: need better common headers +#define MAX_IES_IMAGES 6969 +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; + +[[vk::binding(0, 1)]] Buffer utbs[SPushConstants::DescriptorCount]; [[vk::push_constant]] SPushConstants pc; #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" @@ -10,15 +15,10 @@ using namespace nbl::hlsl::examples::ies; struct SInterpolants { float32_t4 ndc : SV_Position; - float32_t3 meta : COLOR1; - float32_t2 uv : TEXCOORD0; - nointerpolation uint triParity : TEXCOORD15; + float32_t3 latDir : COLOR1; }; -// TODO: all of that for debugging currently, now use IES and project emission onto sphere -// later onto cube I will close my sphere into - -static float32_t3 latLongDir(float32_t2 uv) +float32_t3 latLongDir(float32_t2 uv) { const float32_t phi = 6.28318530718f * uv.x; const float32_t th = 3.14159265359f * uv.y; @@ -26,6 +26,18 @@ static float32_t3 latLongDir(float32_t2 uv) return float32_t3(s * cos(phi), c, s * sin(phi)); } +// TODO: should be shared, duplicated code +float32_t2 iesDirToUv(float32_t3 dir) +{ + float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); + float32_t3 s = dir / sum; + + if (s.z < 0.0f) + s.xy = sign(s.xy) * (1.0f - abs(s.yx)); + + return s.xy * 0.5f + 0.5f; +} + [shader("vertex")] SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) { @@ -46,30 +58,16 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) SInterpolants o; o.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj, pos); - o.meta = mul(pc.matrices.normal, dir); - o.triParity = (VertexIndex & 1u); + o.latDir = dir; - // but we want to sample centers - o.uv = float32_t2( - (float32_t(i) + 0.5f) / float32_t(W), - (float32_t(j) + 0.5f) / float32_t(H) - ); return o; } [shader("pixel")] float32_t4 SphereFS(SInterpolants input) : SV_Target0 { - const float32_t2 uv = input.uv; - const int32_t2 cell = int32_t2(floor(uv * float32_t2(pc.resX, pc.resY))); - const int parity = (cell.x + cell.y) & 1; - const float32_t3 base = parity == 0 ? float32_t3(0.88f,0.88f,0.88f) : float32_t3(0.68f,0.68f,0.68f); - - float32_t3 N = normalize(input.meta); - float32_t nview = saturate(0.5f + 0.5f * N.z); - float32_t grad = pow(nview, 0.5f); - float32_t rim = pow(1.0f - nview, 2.0f) * 0.25f; - - float32_t3 col = base * (0.2f + 0.8f * grad) + rim; - return float32_t4(col, 1.0f); + float32_t2 uv = iesDirToUv(input.latDir); + float32_t candela = inIESCandelaImage[pc.texID].Sample(generalSampler, uv).r; + float32_t v = 1.0f - exp(-candela); + return float32_t4(v,v,v,1); } \ No newline at end of file diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp index d8c15de00..cbb3ee535 100644 --- a/50.IESViewer/main.cpp +++ b/50.IESViewer/main.cpp @@ -7,7 +7,12 @@ // TODO #define APP_WINDOW_WIDTH 669*2u #define APP_WINDOW_HEIGHT APP_WINDOW_WIDTH + +#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY +#define APP_DEPTH_BUFFER_FORMAT EF_D16_UNORM +#else #define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN +#endif IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), From 16f06ed43909092d43dbe2ea30f10aa1b8e7e5a1 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 21 Nov 2025 15:27:19 +0100 Subject: [PATCH 064/219] Removed core::matrixSIMD from example 09 --- 09_GeometryCreator/main.cpp | 21 +++--- .../include/nbl/examples/cameras/CCamera.hpp | 64 +++++++++++-------- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index cb3c21f4d..c54355e17 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -2,6 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include +#include #include "common.hpp" @@ -71,10 +73,10 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes // camera { - core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); - core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x)/float(m_initialResolution.y), 0.1, 10000); - camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + hlsl::float32_t3 cameraPosition(-5.81655884, 2.58630896, -4.23974705); + hlsl::float32_t3 cameraTarget(-0.349590302, -0.213266611, 0.317821503); + float32_t4x4 projectionMatrix = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x) / m_initialResolution.y, 0.1f, 10000.0f); + camera = Camera(core::constructVecorSIMDFromHLSLVector(cameraPosition), core::constructVecorSIMDFromHLSLVector(cameraTarget), projectionMatrix, 1.069f, 0.4f); } onAppInitializedFinish(); @@ -139,13 +141,8 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); - memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); - } + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); // tear down scene every frame @@ -251,7 +248,7 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes InputSystem::ChannelReader keyboard; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); uint16_t gcIndex = {}; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 3b3cd38d8..7d527e4b0 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -12,12 +12,14 @@ #include #include +#include +#include class Camera { public: Camera() = default; - Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::core::matrix4SIMD& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) + Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::hlsl::float32_t4x4& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) : position(position) , initialPosition(position) , target(lookat) @@ -63,11 +65,11 @@ class Camera inline void mapKeysCustom(std::array& map) { keysMap = map; } - inline const nbl::core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } - inline const nbl::core::matrix3x4SIMD& getViewMatrix() const { return viewMatrix; } - inline const nbl::core::matrix4SIMD& getConcatenatedMatrix() const { return concatMatrix; } + inline const nbl::hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } + inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } + inline const nbl::hlsl::float32_t4x4& getConcatenatedMatrix() const { return concatMatrix; } - inline void setProjectionMatrix(const nbl::core::matrix4SIMD& projection) + inline void setProjectionMatrix(const nbl::hlsl::float32_t4x4& projection) { projMatrix = projection; @@ -75,7 +77,7 @@ class Camera { leftHanded = nbl::hlsl::determinant(hlslMatMap) < 0.f; } - concatMatrix = nbl::core::matrix4SIMD::concatenateBFollowedByAPrecisely(projMatrix, nbl::core::matrix4SIMD(viewMatrix)); + concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::getMatrix3x4As4x4(viewMatrix)); } inline void setPosition(const nbl::core::vectorSIMDf& pos) @@ -112,22 +114,25 @@ class Camera inline void recomputeViewMatrix() { - nbl::core::vectorSIMDf pos = position; - nbl::core::vectorSIMDf localTarget = nbl::core::normalize(target - pos); + nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(position).xyz; + nbl::hlsl::float32_t3 localTarget = (nbl::core::convertToHLSLVector(nbl::core::normalize(target)).xyz - pos).xyz; + // TODO: remove completely when removing vectorSIMD + nbl::hlsl::float32_t3 _target = nbl::core::convertToHLSLVector(target).xyz; // if upvector and vector to the target are the same, we have a // problem. so solve this problem: - nbl::core::vectorSIMDf up = nbl::core::normalize(upVector); - nbl::core::vectorSIMDf cross = nbl::core::cross(localTarget, up); - bool upVectorNeedsChange = nbl::core::lengthsquared(cross)[0] == 0; + nbl::hlsl::float32_t3 up = nbl::core::convertToHLSLVector(nbl::core::normalize(upVector)).xyz; + nbl::hlsl::float32_t3 cross = nbl::hlsl::cross(localTarget, up); + const bool upVectorNeedsChange = nbl::hlsl::lengthsquared(cross) == 0; if (upVectorNeedsChange) - up = nbl::core::normalize(backupUpVector); + up = nbl::core::convertToHLSLVector(nbl::core::normalize(backupUpVector)); if (leftHanded) - viewMatrix = nbl::core::matrix3x4SIMD::buildCameraLookAtMatrixLH(pos, target, up); + viewMatrix = nbl::hlsl::buildCameraLookAtMatrixLH(pos, _target, up); else - viewMatrix = nbl::core::matrix3x4SIMD::buildCameraLookAtMatrixRH(pos, target, up); - concatMatrix = nbl::core::matrix4SIMD::concatenateBFollowedByAPrecisely(projMatrix, nbl::core::matrix4SIMD(viewMatrix)); + viewMatrix = nbl::hlsl::buildCameraLookAtMatrixRH(pos, _target, up); + + concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::getMatrix3x4As4x4(viewMatrix)); } inline bool getLeftHanded() const { return leftHanded; } @@ -148,14 +153,14 @@ class Camera if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { - nbl::core::vectorSIMDf pos = getPosition(); - nbl::core::vectorSIMDf localTarget = getTarget() - pos; + nbl::hlsl::float32_t4 pos = nbl::core::convertToHLSLVector(getPosition()); + nbl::hlsl::float32_t4 localTarget = nbl::core::convertToHLSLVector(getTarget()) - pos; // Get Relative Rotation for localTarget in Radians float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.X, localTarget.Z); - const double z1 = nbl::core::sqrt(localTarget.X*localTarget.X + localTarget.Z*localTarget.Z); - relativeRotationX = atan2(z1, localTarget.Y) - nbl::core::PI()/2; + relativeRotationY = atan2(localTarget.x, localTarget.z); + const double z1 = nbl::core::sqrt(localTarget.x*localTarget.x + localTarget.z*localTarget.z); + relativeRotationX = atan2(z1, localTarget.y) - nbl::core::PI()/2; constexpr float RotateSpeedScale = 0.003f; relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; @@ -174,13 +179,16 @@ class Camera if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) relativeRotationX = MaxVerticalAngle; - localTarget.set(0,0, nbl::core::max(1.f, nbl::core::length(pos)[0]), 1.f); + localTarget = nbl::hlsl::float32_t4(0, 0, nbl::core::max(1.f, nbl::hlsl::length(pos)), 1.0f); - nbl::core::matrix3x4SIMD mat; - mat.setRotation(nbl::core::quaternion(relativeRotationX, relativeRotationY, 0)); - mat.transformVect(localTarget); - - setTarget(localTarget + pos); + nbl::hlsl::float32_t3x4 mat; + nbl::hlsl::setRotation(mat, nbl::hlsl::quaternion::create(relativeRotationX, relativeRotationY, 0)); + + localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 0.0f); + + nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(localTarget + pos); + finalTarget.w = 0.0f; + setTarget(finalTarget); } } } @@ -311,8 +319,8 @@ class Camera private: nbl::core::vectorSIMDf initialPosition, initialTarget, position, target, upVector, backupUpVector; // TODO: make first 2 const + add default copy constructor - nbl::core::matrix3x4SIMD viewMatrix; - nbl::core::matrix4SIMD concatMatrix, projMatrix; + nbl::hlsl::float32_t3x4 viewMatrix; + nbl::hlsl::float32_t4x4 concatMatrix, projMatrix; float moveSpeed, rotateSpeed; bool leftHanded, firstUpdate = true, mouseDown = false; From 65e0126cac320c10efe34ebbc144da45d587a54e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 24 Nov 2025 16:47:58 +0100 Subject: [PATCH 065/219] refactor 50. ex shaders, reduce from 5 -> 2 unified inputs, clear duplicated code (doesn't compile cpp yet) TODO: fix an ambiguity issue, update example's CPP files --- 50.IESViewer/CMakeLists.txt | 29 +-- 50.IESViewer/app_resources/common.hlsl | 91 +++---- 50.IESViewer/app_resources/compute.hlsl | 222 ------------------ 50.IESViewer/app_resources/ies.pcs.hlsl | 40 ---- 50.IESViewer/app_resources/ies.unified.hlsl | 176 ++++++++++++-- 50.IESViewer/app_resources/imgui.pixel.hlsl | 6 - 50.IESViewer/app_resources/imgui.unified.hlsl | 7 + 50.IESViewer/app_resources/imgui.vertex.hlsl | 5 - 50.IESViewer/app_resources/pixel.hlsl | 58 ----- 50.IESViewer/app_resources/vertex.hlsl | 6 - 10 files changed, 201 insertions(+), 439 deletions(-) delete mode 100644 50.IESViewer/app_resources/compute.hlsl delete mode 100644 50.IESViewer/app_resources/ies.pcs.hlsl delete mode 100644 50.IESViewer/app_resources/imgui.pixel.hlsl create mode 100644 50.IESViewer/app_resources/imgui.unified.hlsl delete mode 100644 50.IESViewer/app_resources/imgui.vertex.hlsl delete mode 100644 50.IESViewer/app_resources/pixel.hlsl delete mode 100644 50.IESViewer/app_resources/vertex.hlsl diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt index fc6226b91..70ec73cf7 100644 --- a/50.IESViewer/CMakeLists.txt +++ b/50.IESViewer/CMakeLists.txt @@ -19,46 +19,23 @@ target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS app_resources/common.hlsl - app_resources/compute.hlsl - app_resources/pixel.hlsl - app_resources/vertex.hlsl - app_resources/imgui.vertex.hlsl - app_resources/imgui.pixel.hlsl app_resources/imgui.opts.hlsl app_resources/ies.unified.hlsl - app_resources/ies.pcs.hlsl + app_resources/imgui.unified.hlsl ) target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) -# TODO: have only 2 inputs "ies.unified" & "imgui.unified" + one header for PCs - set(SM 6_8) set(JSON [=[ [ - { - "INPUT": "app_resources/compute.hlsl", - "KEY": "compute" - }, - { - "INPUT": "app_resources/pixel.hlsl", - "KEY": "pixel" - }, - { - "INPUT": "app_resources/vertex.hlsl", - "KEY": "vertex" - }, { "INPUT": "app_resources/ies.unified.hlsl", "KEY": "ies.unified" }, { - "INPUT": "app_resources/imgui.vertex.hlsl", - "KEY": "imgui.vertex" - }, - { - "INPUT": "app_resources/imgui.pixel.hlsl", - "KEY": "imgui.pixel" + "INPUT": "app_resources/imgui.unified.hlsl", + "KEY": "imgui.unified" } ] ]=]) diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index 9705c2282..fb133a226 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -3,65 +3,52 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -#ifdef __HLSL_VERSION -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#endif // __HLSL_VERSION - -// -> TODO: use NBL_CONTEXPR or something -#ifndef UINT16_MAX -#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do -#endif // UINT16_MAX -#ifndef M_PI -#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do -#endif // M_PI - -#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do #define QUANT_ERROR_ADMISSIBLE 1/1024 - #define WORKGROUP_SIZE 256u #define WORKGROUP_DIMENSION 16u -// <- + wipe whatever we already have - -// TODO: since NSC prebuilds into SPIRV - maybe could make it a CMake option with a default val #define MAX_IES_IMAGES 6969 -using namespace nbl::hlsl; - -struct PushConstants +namespace nbl +{ +namespace hlsl +{ +namespace this_example +{ +namespace ies { - uint64_t hAnglesBDA; - uint64_t vAnglesBDA; - uint64_t dataBDA; - float64_t maxIValue; - - uint32_t hAnglesCount; - uint32_t vAnglesCount; - uint32_t dataCount; - - uint32_t mode; - uint32_t texIx; - float32_t zAngleDegreeRotation; - - uint32_t dummy; - #ifdef __HLSL_VERSION - float64_t getHorizontalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(hAnglesBDA) + i).deref().load(); } - float64_t getVerticalAngle(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(vAnglesBDA) + i).deref().load(); } - float64_t getData(uint32_t i) { return (nbl::hlsl::bda::__ptr::create(dataBDA) + i).deref().load(); } - #endif // __HLSL_VERSION +struct SInstanceMatrices +{ + float32_t4x4 worldViewProj; + float32_t3x3 normal; }; -#ifdef __HLSL_VERSION -[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; -[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; -[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; -[[vk::push_constant]] struct PushConstants pc; -#endif // __HLSL_VERSION - +struct PushConstants +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; + + SInstanceMatrices matrices; + uint32_t positionView : 16; + uint32_t normalView : 16; + uint32_t mode : 8; + uint32_t symmetry : 8; + uint32_t texIx : 16; + + uint64_t hAnglesBDA; + uint64_t vAnglesBDA; + uint64_t dataBDA; + + uint32_t hAnglesCount; + uint32_t vAnglesCount; + uint32_t dataCount; + + float32_t maxIValue; + float32_t zAngleDegreeRotation; + float32_t sphereRadius; +}; + +} +} +} +} #endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESViewer/app_resources/compute.hlsl b/50.IESViewer/app_resources/compute.hlsl deleted file mode 100644 index cf22466fc..000000000 --- a/50.IESViewer/app_resources/compute.hlsl +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.hlsl" - -float32_t3 octahedronUVToDir(float64_t2 uv) -{ - float32_t3 position = float32_t3((uv * 2.0 - 1.0).xy, 0.0); - float32_t2 absP = float32_t2(abs(position.x), abs(position.y)); - - position.z = 1.0 - absP.x - absP.y; - - if (position.z < 0.0) - { - position.x = sign(position.x) * (1.0 - absP.y); - position.y = sign(position.y) * (1.0 - absP.x); - } - - // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" - if (pc.zAngleDegreeRotation != 0.0) - { - float64_t rDegree = pc.zAngleDegreeRotation; - - const float32_t zAngleRadians = float32_t(rDegree * M_PI / 180.0); - const float64_t cosineV = cos(zAngleRadians); - const float64_t sineV = sin(zAngleRadians); - - position = float32_t3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); - } - - return normalize(position); -} - -//! Returns spherical coordinates with physics convention in radians -/* - https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg - Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle - in [-PI, PI] range -*/ - -float32_t2 sphericalDirToRadians(float32_t3 direction) -{ - float32_t theta = acos(clamp(direction.z / length(direction), -1.0, 1.0)); - float32_t phi = atan2(direction.y, direction.x); // TODO: check it - - return float32_t2(theta, phi); -} - -uint32_t implGetVUB(const float64_t angle) -{ - for (uint32_t i = 0; i < pc.vAnglesCount; ++i) - if (pc.getVerticalAngle(i) > angle) - return i; - - return pc.vAnglesCount; -} - -uint32_t implGetHUB(const float64_t angle) -{ - for (uint32_t i = 0; i < pc.hAnglesCount; ++i) - if (pc.getHorizontalAngle(i) > angle) - return i; - - return pc.hAnglesCount; -} - -uint32_t getVLB(const float64_t angle) -{ - return uint32_t(max(int(implGetVUB(angle)) - 1, 0)); -} - -uint32_t getHLB(const float64_t angle) -{ - return uint32_t(max(int(implGetHUB(angle)) - 1, 0)); -} - -uint32_t getVUB(const float64_t angle) -{ - return uint32_t(min(int(implGetVUB(angle)), int(pc.vAnglesCount) - 1)); -} - -uint32_t getHUB(const float64_t angle) -{ - return uint32_t(min(int(implGetHUB(angle)), int(pc.hAnglesCount) - 1)); -} - -float64_t getValue(uint32_t i, uint32_t j) -{ - return pc.getData(pc.vAnglesCount * i + j); -} - -// symmetry -#define ISOTROPIC 0u -#define QUAD_SYMETRIC 1u -#define HALF_SYMETRIC 2u -#define NO_LATERAL_SYMMET 3u - -uint32_t getSymmetry() // TODO: to reduce check time we could pass it with PCs -{ - if (pc.hAnglesCount < 2) // careful here, somebody can break it by feeding us with too much data by mistake - return ISOTROPIC; - - const float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); - - if (hABack == 90) - return QUAD_SYMETRIC; - else if (hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here - return HALF_SYMETRIC; - else - return NO_LATERAL_SYMMET; -} - -float32_t wrapPhi(const float32_t phi, const uint32_t symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry -{ - switch (symmetry) - { - case ISOTROPIC: - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float32_t wrapPhi = abs(phi); //! first MIRROR - - if (wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: - { - if (phi < 0) - return phi + 2.0 * M_PI; - else - return phi; - } - } - - return 69; -} - -float64_t sampleI(const float32_t2 sphericalCoordinates, const uint32_t symmetry) -{ - const float64_t vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); - - float64_t vABack = pc.getVerticalAngle(pc.vAnglesCount - 1); - float64_t hABack = pc.getHorizontalAngle(pc.hAnglesCount - 1); - - if (vAngle > vABack) - return 0.0; - - // bilinear interpolation - uint32_t j0 = getVLB(vAngle); - uint32_t j1 = getVUB(vAngle); - uint32_t i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); - uint32_t i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); - - float64_t uReciprocal = i1 == i0 ? 1.0 : 1.0 / (pc.getHorizontalAngle(i1) - pc.getHorizontalAngle(i0)); - float64_t vReciprocal = j1 == j0 ? 1.0 : 1.0 / (pc.getVerticalAngle(j1) - pc.getVerticalAngle(j0)); - - float64_t u = (hAngle - pc.getHorizontalAngle(i0)) * uReciprocal; - float64_t v = (vAngle - pc.getVerticalAngle(j0)) * vReciprocal; - - float64_t s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); - float64_t s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); - - return s0 * (1.0 - u) + s1 * u; -} - -//! Checks if (x,y) /in [0,PI] x [-PI,PI] product -/* - IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) - we represent horizontal range as [-180, 180] given spherical coordinates -*/ - -bool isWithinSCDomain(const float64_t2 p) -{ - const float64_t2 lb = float64_t2(0, -M_PI); - const float64_t2 ub = float64_t2(M_PI, M_PI); - - return all(lb <= p) && all(p <= ub); -} - -[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] -[shader("compute")] -void main(uint32_t3 ID : SV_DispatchThreadID) -{ - uint32_t2 destinationSize; - outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); - const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); - - const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); - const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); - - if (all(pixelCoordinates < destinationSize)) - { - const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); - const float32_t3 direction = octahedronUVToDir(uv); - const float32_t2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped - - const float32_t normD = length(direction); - float32_t2 mask; - - if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) - mask.x = 1.0; // pass - else - mask.x = 0; - - if (isWithinSCDomain(sphericalCoordinates)) - mask.y = 1.0; // pass - else - mask.y = 0; - - outIESCandelaImage[pc.texIx][pixelCoordinates] = float32_t(sampleI(sphericalCoordinates, getSymmetry()) / pc.maxIValue); - outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sphericalCoordinates; - outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = direction; - outPassTMask[pc.texIx][pixelCoordinates] = mask; - } -} \ No newline at end of file diff --git a/50.IESViewer/app_resources/ies.pcs.hlsl b/50.IESViewer/app_resources/ies.pcs.hlsl deleted file mode 100644 index 40b563731..000000000 --- a/50.IESViewer/app_resources/ies.pcs.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ -#define _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ - - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - - -namespace nbl -{ -namespace hlsl -{ -namespace examples -{ -namespace ies -{ - -struct SInstanceMatrices -{ - float32_t4x4 worldViewProj; - float32_t3x3 normal; -}; - -struct SPushConstants -{ - NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; - - SInstanceMatrices matrices; - uint32_t positionView : 16; - uint32_t normalView : 16; - uint32_t resX : 16; - uint32_t resY : 16; - uint32_t texID; - float32_t radius; -}; - -} -} -} -} -#endif // _NBL_THIS_EXAMPLE_S_PUSH_CONSTANTS_HLSL_ diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index 70410e15a..260dc4a2b 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -1,16 +1,48 @@ -#include "ies.pcs.hlsl" +#include "common.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/math/octahedral.hlsl" +#include "nbl/builtin/hlsl/math/polar.hlsl" +#include "nbl/builtin/hlsl/ies/sampler.hlsl" +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" + using namespace nbl::hlsl; -using namespace nbl::hlsl::examples::ies; +using namespace nbl::hlsl::this_example::ies; +using namespace nbl::hlsl::ext::FullScreenTriangle; -// TODO: need better common headers -#define MAX_IES_IMAGES 6969 [[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; +[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; +[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; +[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; [[vk::binding(0 + 100, 0)]] SamplerState generalSampler; +[[vk::binding(0, 1)]] Buffer utbs[PushConstants::DescriptorCount]; +[[vk::push_constant]] PushConstants pc; + +struct Accessor +{ + using key_t = uint32_t; + using key_t2 = vector; + using value_t = float32_t; + using symmetry_t = nbl::hlsl::ies::ProfileProperties::LuminairePlanesSymmetry; -[[vk::binding(0, 1)]] Buffer utbs[SPushConstants::DescriptorCount]; -[[vk::push_constant]] SPushConstants pc; + static key_t vAnglesCount() { return pc.vAnglesCount; } + static key_t hAnglesCount() { return pc.hAnglesCount; } -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" + template) + static inline value_t vAngle(T j) { return (nbl::hlsl::bda::__ptr::create(pc.vAnglesBDA) + j).deref().load(); } + + template) + static inline value_t hAngle(T i) { return (nbl::hlsl::bda::__ptr::create(pc.hAnglesBDA) + i).deref().load(); } + + template) + static inline value_t value(T ij) { return (nbl::hlsl::bda::__ptr::create(pc.dataBDA) + vAnglesCount() * ij.x + ij.y).deref().load(); } + + static inline symmetry_t symmetry() { return (symmetry_t)pc.symmetry; } +}; struct SInterpolants { @@ -18,6 +50,27 @@ struct SInterpolants float32_t3 latDir : COLOR1; }; +using Octahedral = math::OctahedralTransform; +using Polar = math::Polar; +using CSampler = nbl::hlsl::ies::CandelaSampler; + +//! Checks if (x,y) /in [0,PI] x [-PI,PI] product +/* + IES vertical range is [0, 180] degrees + and horizontal range is [0, 360] degrees + but for easier computations (MIRROR & MIRROW_REPEAT operations) + we represent horizontal range as [-180, 180] given spherical coordinates +*/ + +bool domainPass(const float32_t2 p) +{ + NBL_CONSTEXPR float32_t M_PI = numbers::pi; + const float32_t2 lb = float32_t2(0, -M_PI); + const float32_t2 ub = float32_t2(M_PI, M_PI); + + return all(lb <= p) && all(p <= ub); +} + float32_t3 latLongDir(float32_t2 uv) { const float32_t phi = 6.28318530718f * uv.x; @@ -26,22 +79,13 @@ float32_t3 latLongDir(float32_t2 uv) return float32_t3(s * cos(phi), c, s * sin(phi)); } -// TODO: should be shared, duplicated code -float32_t2 iesDirToUv(float32_t3 dir) -{ - float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); - float32_t3 s = dir / sum; - - if (s.z < 0.0f) - s.xy = sign(s.xy) * (1.0f - abs(s.yx)); - - return s.xy * 0.5f + 0.5f; -} - [shader("vertex")] SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) { - const uint32_t W = pc.resX, H = pc.resY; + uint32_t2 resolution; + outIESCandelaImage[pc.texIx].GetDimensions(resolution.x, resolution.y); // optimal IES texture size + + const uint32_t W = resolution.x, H = resolution.y; const uint32_t i = VertexIndex % W, j = VertexIndex / W; // for sphere geometry created from our grid we need to make sure the surface is closed, aligned at U/V edges @@ -54,7 +98,7 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) const float32_t2 uvPos = float32_t2(uPos, vPos); const float32_t3 dir = latLongDir(uvPos); - const float32_t3 pos = pc.radius * dir; + const float32_t3 pos = pc.sphereRadius * dir; SInterpolants o; o.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj, pos); @@ -66,8 +110,92 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) [shader("pixel")] float32_t4 SphereFS(SInterpolants input) : SV_Target0 { - float32_t2 uv = iesDirToUv(input.latDir); - float32_t candela = inIESCandelaImage[pc.texID].Sample(generalSampler, uv).r; + float32_t2 uv = 0.5f * Octahedral::dirToNDC(input.latDir) + 0.5f; + float32_t candela = inIESCandelaImage[pc.texIx].Sample(generalSampler, uv).r; float32_t v = 1.0f - exp(-candela); return float32_t4(v,v,v,1); -} \ No newline at end of file +} + +[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] +[shader("compute")] +void CdcCS(uint32_t3 ID : SV_DispatchThreadID) +{ + uint32_t2 destinationSize; + outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); + const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); + + const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); + const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); + + if (all(pixelCoordinates < destinationSize)) + { + const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); + const float32_t3 dir = Octahedral::uvToDir(uv); + Polar polar = Polar::createFromCartesian(dir); + + const float32_t normD = length(dir); + float32_t2 mask; + + if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) + mask.x = 1.f; // pass + else + mask.x = 0.f; + + const float32_t2 sCoords = float32_t2(polar.phi, polar.theta); + if (domainPass(sCoords)) + mask.y = 1.f; // pass + else + mask.y = 0.f; + + Accessor accessor; + CSampler candelaSampler; + outIESCandelaImage[pc.texIx][pixelCoordinates] = candelaSampler.sample(accessor, polar) / pc.maxIValue; + outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sCoords; + outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = dir; + outPassTMask[pc.texIx][pixelCoordinates] = mask; + } +} + + +float32_t plot(float32_t cand, float32_t pct, float32_t bold) +{ + return smoothstep(pct-0.005*bold, pct, cand) - smoothstep(pct, pct+0.005*bold, cand); +} + +// vertical cut of IES (i.e. cut by plane x = 0) +float32_t f(float32_t2 uv) +{ + return inIESCandelaImage[pc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(normalize(float32_t3(uv.x, 0.001, uv.y))) + 0.5f)).x; +} + +// TODO: fix ambiguity for "inverse" call +// #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" + +[shader("pixel")] +float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 +{ + switch (pc.mode) + { + case 0: + { + float32_t2 ndc = input.uv * 2.f - 1.f; + float32_t dist = length(ndc) * 1.015625f; + float32_t p = plot(dist, 1.0f, 0.75f); + float32_t3 col = float32_t3(p, p, p); + + float32_t normalizedStrength = f(ndc); + if (dist < normalizedStrength) + col += float32_t3(1.0f, 0.0f, 0.0f); + + return float32_t4(col, 1.0f); + } + case 1: + return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); + case 2: + return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + case 3: + return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); + default: + return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + } +} diff --git a/50.IESViewer/app_resources/imgui.pixel.hlsl b/50.IESViewer/app_resources/imgui.pixel.hlsl deleted file mode 100644 index fe93c3a70..000000000 --- a/50.IESViewer/app_resources/imgui.pixel.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "imgui.opts.hlsl" -#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" diff --git a/50.IESViewer/app_resources/imgui.unified.hlsl b/50.IESViewer/app_resources/imgui.unified.hlsl new file mode 100644 index 000000000..03e5624b0 --- /dev/null +++ b/50.IESViewer/app_resources/imgui.unified.hlsl @@ -0,0 +1,7 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "imgui.opts.hlsl" // bindings info +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" // pixel entry point +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" // vertex entry point diff --git a/50.IESViewer/app_resources/imgui.vertex.hlsl b/50.IESViewer/app_resources/imgui.vertex.hlsl deleted file mode 100644 index 2063db84b..000000000 --- a/50.IESViewer/app_resources/imgui.vertex.hlsl +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/50.IESViewer/app_resources/pixel.hlsl b/50.IESViewer/app_resources/pixel.hlsl deleted file mode 100644 index 5fe452b2d..000000000 --- a/50.IESViewer/app_resources/pixel.hlsl +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.hlsl" -#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" -using namespace nbl::hlsl::ext::FullScreenTriangle; - -float32_t2 iesDirToUv(float32_t3 dir) -{ - float32_t sum = dot(float32_t3(1.0f, 1.0f, 1.0f), abs(dir)); - float32_t3 s = dir / sum; - - if (s.z < 0.0f) - s.xy = sign(s.xy) * (1.0f - abs(s.yx)); - - return s.xy * 0.5f + 0.5f; -} - -float32_t plot(float32_t cand, float32_t pct, float32_t bold) -{ - return smoothstep(pct-0.005*bold, pct, cand) - smoothstep( pct, pct+0.005*bold, cand); -} - -// vertical cut of IES (i.e. cut by plane x = 0) -float32_t f(float32_t2 uv) -{ - return inIESCandelaImage[pc.texIx].Sample(generalSampler, iesDirToUv(normalize(float32_t3(uv.x, 0.001, uv.y)))).x; -} - -[shader("pixel")] -float32_t4 PSMain(SVertexAttributes input) : SV_Target0 -{ - switch (pc.mode) - { - case 0: - { - float32_t2 ndc = input.uv * 2.f - 1.f; - float32_t dist = length(ndc) * 1.015625f; - float32_t p = plot(dist, 1.0f, 0.75f); - float32_t3 col = float32_t3(p, p, p); - - float32_t normalizedStrength = f(ndc); - if (dist < normalizedStrength) - col += float32_t3(1.0f, 0.0f, 0.0f); - - return float32_t4(col, 1.0f); - } - case 1: - return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); - case 2: - return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); - case 3: - return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); - default: - return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); - } -} diff --git a/50.IESViewer/app_resources/vertex.hlsl b/50.IESViewer/app_resources/vertex.hlsl deleted file mode 100644 index a0f565455..000000000 --- a/50.IESViewer/app_resources/vertex.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// small trick, temporary, we will have a separate rule for compiling this ext and embed into Nabla DLL -#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" From 1a9b50718aafe8a53229e1f5aa231b64441ac8f4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 24 Nov 2025 17:19:29 +0100 Subject: [PATCH 066/219] Fixed camera bug --- 09_GeometryCreator/main.cpp | 6 +++--- common/include/nbl/examples/cameras/CCamera.hpp | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index c54355e17..eaa1f78f4 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -73,10 +73,10 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes // camera { - hlsl::float32_t3 cameraPosition(-5.81655884, 2.58630896, -4.23974705); - hlsl::float32_t3 cameraTarget(-0.349590302, -0.213266611, 0.317821503); + core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); + core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); float32_t4x4 projectionMatrix = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x) / m_initialResolution.y, 0.1f, 10000.0f); - camera = Camera(core::constructVecorSIMDFromHLSLVector(cameraPosition), core::constructVecorSIMDFromHLSLVector(cameraTarget), projectionMatrix, 1.069f, 0.4f); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } onAppInitializedFinish(); diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 7d527e4b0..c0965b1e9 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -115,7 +115,7 @@ class Camera inline void recomputeViewMatrix() { nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(position).xyz; - nbl::hlsl::float32_t3 localTarget = (nbl::core::convertToHLSLVector(nbl::core::normalize(target)).xyz - pos).xyz; + nbl::hlsl::float32_t3 localTarget = nbl::hlsl::normalize(nbl::core::convertToHLSLVector(target).xyz - pos); // TODO: remove completely when removing vectorSIMD nbl::hlsl::float32_t3 _target = nbl::core::convertToHLSLVector(target).xyz; @@ -179,15 +179,16 @@ class Camera if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) relativeRotationX = MaxVerticalAngle; + pos.w = 0; localTarget = nbl::hlsl::float32_t4(0, 0, nbl::core::max(1.f, nbl::hlsl::length(pos)), 1.0f); nbl::hlsl::float32_t3x4 mat; nbl::hlsl::setRotation(mat, nbl::hlsl::quaternion::create(relativeRotationX, relativeRotationY, 0)); - localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 0.0f); + localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 1.0f); nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(localTarget + pos); - finalTarget.w = 0.0f; + finalTarget.w = 1.0f; setTarget(finalTarget); } } From e00a669c1f9914a850f13f8a0659edccb6350831 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 12:33:09 +0100 Subject: [PATCH 067/219] make 50. ex unified shaders compile --- 50.IESViewer/app_resources/ies.unified.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index 260dc4a2b..c95b161f1 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -168,8 +168,7 @@ float32_t f(float32_t2 uv) return inIESCandelaImage[pc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(normalize(float32_t3(uv.x, 0.001, uv.y))) + 0.5f)).x; } -// TODO: fix ambiguity for "inverse" call -// #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" [shader("pixel")] float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 From 2c24970a9d8cf3e88d886ee05e1224ccd8c263c4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 25 Nov 2025 13:52:21 +0100 Subject: [PATCH 068/219] Removed core::matrix from examples 12 and 61 --- 12_MeshLoaders/main.cpp | 14 +++++------- 61_UI/main.cpp | 49 +++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index d80fa8998..a012d0675 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -5,6 +5,7 @@ #include "common.hpp" #include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" +#include #ifdef NBL_BUILD_MITSUBA_LOADER #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" @@ -161,13 +162,8 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc } // draw scene { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); - } + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); m_renderer->render(cb, CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix)); } cb->endRenderPass(); @@ -456,7 +452,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc { const auto measure = hlsl::length(diagonal); const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); - camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); + camera.setProjectionMatrix(hlsl::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); camera.setMoveSpeed(measure * 0.04); } const auto pos = bound.maxVx + diagonal * distance; @@ -492,7 +488,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables std::string m_modelPath; diff --git a/61_UI/main.cpp b/61_UI/main.cpp index 643cab079..6b1643565 100644 --- a/61_UI/main.cpp +++ b/61_UI/main.cpp @@ -3,6 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include +#include /* Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. @@ -252,14 +254,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA } // draw scene { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - const auto& camera = interface.camera; - memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); - memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); - } + const auto& camera = interface.camera; + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); // tear down scene every frame @@ -570,21 +567,21 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - matrix4SIMD projection; + hlsl::float32_t4x4 projection; if (isPerspective) if(isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else { float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; if(isLH) - projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); } return projection; @@ -723,30 +720,30 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA // TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases static struct { - core::matrix4SIMD view, projection, model; + hlsl::float32_t4x4 view, projection, model; } imguizmoM16InOut; ImGuizmo::SetID(0u); - imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix())); - imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix()); - imguizmoM16InOut.model = core::transpose(matrix4SIMD(model)); + imguizmoM16InOut.view = hlsl::transpose(hlsl::getMatrix3x4As4x4(camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); + imguizmoM16InOut.model = hlsl::transpose(hlsl::getMatrix3x4As4x4(model)); { if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ - transformParams.editTransformDecomposition = true; - sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams); + transformParams.editTransformDecomposition = true; + sceneResolution = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); } - model = core::transpose(imguizmoM16InOut.model).extractSub3x4(); + model = hlsl::extractSub3x4From4x4Matrix(hlsl::transpose(imguizmoM16InOut.model)); // to Nabla + update camera & model matrices // TODO: make it more nicely, extract: // - Position by computing inverse of the view matrix and grabbing its translation // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position // But then set the view matrix this way anyway, because up-vector may not be compatible const auto& view = camera.getViewMatrix(); - const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + const_cast(view) = hlsl::extractSub3x4From4x4Matrix(hlsl::transpose(imguizmoM16InOut.view)); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) // update concatanated matrix const auto& projection = camera.getProjectionMatrix(); camera.setProjectionMatrix(projection); @@ -783,9 +780,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA ImGui::Separator(); }; - addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, model.pointer()); - addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer()); - addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false); + addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, &model[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, &view[0][0]); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, &projection[0][0], false); ImGui::End(); } @@ -867,9 +864,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA smart_refctd_ptr subAllocDS; SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables - core::matrix3x4SIMD model; + hlsl::float32_t3x4 model; std::string_view objectName; TransformRequestParams transformParams; uint16_t2 sceneResolution = {1280,720}; From 8e1e6a9a981ddcc93c2c1b3ad6b8492e578147cd Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 25 Nov 2025 14:53:12 +0100 Subject: [PATCH 069/219] Fixed example 61_UI --- 61_UI/main.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/61_UI/main.cpp b/61_UI/main.cpp index 6b1643565..88f41fe6b 100644 --- a/61_UI/main.cpp +++ b/61_UI/main.cpp @@ -4,7 +4,6 @@ #include "common.hpp" #include -#include /* Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. @@ -717,7 +716,6 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA * note it also modifies input view matrix but projection matrix is immutable */ -// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases static struct { hlsl::float32_t4x4 view, projection, model; @@ -866,7 +864,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA // Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables - hlsl::float32_t3x4 model; + hlsl::float32_t3x4 model = identity(); std::string_view objectName; TransformRequestParams transformParams; uint16_t2 sceneResolution = {1280,720}; From c256c8dd5984036d35af7a615eb27d9454eda431 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 25 Nov 2025 16:33:12 +0100 Subject: [PATCH 070/219] Removed core::matrixSIMD form example 70_FlipFludis --- 70_FLIPFluids/main.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 899d00ba4..a867bf353 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -6,6 +6,7 @@ #include "nbl/examples/examples.hpp" // TODO: why is it not in nabla.h ? #include "nbl/asset/metadata/CHLSLMetadata.h" +#include "nbl/builtin/hlsl/projection/projection.hlsl" using namespace nbl; using namespace nbl::core; @@ -231,7 +232,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso float zNear = 0.1f, zFar = 10000.f; core::vectorSIMDf cameraPosition(14, 8, 12); core::vectorSIMDf cameraTarget(0, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); + hlsl::float32_t4x4 projectionMatrix = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); m_pRenderParams.zNear = zNear; @@ -884,22 +885,20 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso const auto projectionMatrix = camera.getProjectionMatrix(); const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + hlsl::float32_t3x4 modelMatrix = hlsl::identity(); - core::matrix3x4SIMD modelViewMatrix = core::concatenateBFollowedByA(viewMatrix, modelMatrix); - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + hlsl::float32_t3x4 modelViewMatrix = viewMatrix; + hlsl::float32_t4x4 modelViewProjectionMatrix = viewProjectionMatrix; - auto modelMat = core::concatenateBFollowedByA(core::matrix4SIMD(), modelMatrix); + auto modelMat = hlsl::getMatrix3x4As4x4(modelMatrix); const core::vector3df camPos = camera.getPosition().getAsVector3df(); camPos.getAs4Values(camData.cameraPosition); - memcpy(camData.MVP, modelViewProjectionMatrix.pointer(), sizeof(camData.MVP)); - memcpy(camData.M, modelMat.pointer(), sizeof(camData.M)); - memcpy(camData.V, viewMatrix.pointer(), sizeof(camData.V)); - memcpy(camData.P, projectionMatrix.pointer(), sizeof(camData.P)); + memcpy(camData.MVP, &modelViewProjectionMatrix[0][0], sizeof(camData.MVP)); + memcpy(camData.M, &modelMat[0][0], sizeof(camData.M)); + memcpy(camData.V, &viewMatrix[0][0], sizeof(camData.V)); + memcpy(camData.P, &projectionMatrix[0][0], sizeof(camData.P)); { camDataRange.buffer = cameraBuffer; camDataRange.size = cameraBuffer->getSize(); @@ -1817,7 +1816,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - Camera camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), hlsl::float32_t4x4()); video::CDumbPresentationOracle oracle; bool m_shouldInitParticles = true; From 06bad177bdb18772c8b4c6c4289a22159e7c97c0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 26 Nov 2025 12:37:52 +0100 Subject: [PATCH 071/219] make 50.ex compile after updates, need to fix some runtime bugs & have octahedral map immutable --- 50.IESViewer/App.hpp | 2 +- 50.IESViewer/AppEvent.cpp | 4 +-- 50.IESViewer/AppGPU.cpp | 4 +-- 50.IESViewer/AppInit.cpp | 39 +++++++++++---------- 50.IESViewer/AppRender.cpp | 17 +++++---- 50.IESViewer/AppUI.cpp | 9 ++--- 50.IESViewer/CSimpleIESRenderer.hpp | 18 ++++------ 50.IESViewer/IES.cpp | 12 +++---- 50.IESViewer/IES.hpp | 2 +- 50.IESViewer/app_resources/common.hlsl | 12 +++---- 50.IESViewer/app_resources/ies.unified.hlsl | 2 +- 11 files changed, 57 insertions(+), 64 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index 9f750f02e..9a2cc3717 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -66,7 +66,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, bitflag usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT, bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); - smart_refctd_ptr createBuffer(const core::vector& in, std::string name); + smart_refctd_ptr createBuffer(const core::vector& in, std::string name); void uiListener(); }; diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index 60458f841..894a11d47 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -13,10 +13,10 @@ void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { auto& ies = m_assets[m_activeAssetIx]; - auto* profile = ies.getProfile(); + const auto& accessor = ies.getProfile()->getAccessor(); auto impulse = ev.scrollEvent.verticalScroll * 0.02f; - ies.zDegree = std::clamp(ies.zDegree + impulse, profile->getHoriAngles().front(), profile->getHoriAngles().back()); + ies.zDegree = std::clamp(ies.zDegree + impulse, accessor.hAngles.front(), accessor.hAngles.back()); } } } diff --git a/50.IESViewer/AppGPU.cpp b/50.IESViewer/AppGPU.cpp index 033c5d63b..1dfff8648 100644 --- a/50.IESViewer/AppGPU.cpp +++ b/50.IESViewer/AppGPU.cpp @@ -53,11 +53,11 @@ core::smart_refctd_ptr IESViewer::createImageView(const size_t wi return imageView; } -core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) +core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) { IGPUBuffer::SCreationParams bufferParams = {}; bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; - bufferParams.size = sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * in.size(); + bufferParams.size = sizeof(float) * in.size(); auto buffer = m_device->createBuffer(std::move(bufferParams)); buffer->setObjectDebugName(name.c_str()); diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 07694cb27..68605ab51 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -67,7 +67,8 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) for (auto& ies : m_assets) { const auto* profile = ies.getProfile(); - const auto resolution = profile->getOptimalIESResolution(); + const auto& accessor = profile->getAccessor(); + const auto& resolution = accessor.properties.optimalIESResolution; #define CREATE_VIEW(VIEW, FORMAT, NAME) \ if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; @@ -80,9 +81,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) #define CREATE_BUFFER(BUFFER, DATA, NAME) \ if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; - CREATE_BUFFER(ies.buffers.vAngles, profile->getVertAngles(), "IES Vertical Angles Buffer: ") - CREATE_BUFFER(ies.buffers.hAngles, profile->getHoriAngles(), "IES Horizontal Angles Buffer: ") - CREATE_BUFFER(ies.buffers.data, profile->getData(), "IES Data Buffer: ") + CREATE_BUFFER(ies.buffers.vAngles, accessor.vAngles, "IES Vertical Angles Buffer: ") + CREATE_BUFFER(ies.buffers.hAngles, accessor.hAngles, "IES Horizontal Angles Buffer: ") + CREATE_BUFFER(ies.buffers.data, accessor.data, "IES Data Buffer: ") } auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); @@ -119,15 +120,15 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (!(SHADER = createShader.template operator()() )) return false; m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); - smart_refctd_ptr compute, pixel, vertex, ies, imguiVertex, imguiPixel; + + struct + { + smart_refctd_ptr ies, imgui; + } shaders; { auto start = std::chrono::high_resolution_clock::now(); - CREATE_SHADER(compute, "compute") - CREATE_SHADER(pixel, "pixel") - CREATE_SHADER(vertex, "vertex") - CREATE_SHADER(ies, "ies.unified") - CREATE_SHADER(imguiVertex, "imgui.vertex") - CREATE_SHADER(imguiPixel, "imgui.pixel") + CREATE_SHADER(shaders.ies, "ies.unified") + CREATE_SHADER(shaders.imgui, "imgui.unified") auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); @@ -188,7 +189,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (not descriptorSetLayout) return logFail("Failed to create descriptor set layout!"); - auto range = std::to_array({ {StageFlags.value, 0u, sizeof(PushConstants)} }); + auto range = std::to_array({ {StageFlags.value, 0u, sizeof(nbl::hlsl::this_example::ies::PushConstants)} }); auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); if (not pipelineLayout) @@ -198,8 +199,8 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { auto params = std::to_array({ {} });; params[0].layout = pipelineLayout.get(); - params[0].shader.shader = compute.get(); - params[0].shader.entryPoint = "main"; + params[0].shader.shader = shaders.ies.get(); + params[0].shader.entryPoint = "CdcCS"; if (!m_device->createComputePipelines(nullptr, params, &m_computePipeline)) return logFail("Failed to create compute pipeline!"); @@ -213,8 +214,8 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = { - {.shader = vertex.get(), .entryPoint = "main", .entries = &specConstants }, - {.shader = pixel.get(), .entryPoint = "PSMain" } + {.shader = shaders.ies.get(), .entryPoint = "main", .entries = &specConstants }, + {.shader = shaders.ies.get(), .entryPoint = "CdcPS" } }; auto params = std::to_array({ {} }); @@ -352,7 +353,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) for (auto i = 0u; i < m_assets.size(); ++i) { auto& ies = m_assets[i]; - auto resolution = ies.getProfile()->getOptimalIESResolution(); + const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; auto name = "Grid " + std::to_string(i); addGeometry(name.c_str(), creator->createGrid({ resolution.x, resolution.y })); } @@ -372,7 +373,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) const auto& geometries = m_scene->getInitParams().geometries; - m_renderer = CSimpleIESRenderer::create(ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); + m_renderer = CSimpleIESRenderer::create(shaders.ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) return logFail("Could not create 3D Plot Renderer!"); @@ -410,7 +411,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) params.pipelineCache = nullptr; using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; - params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = imguiVertex, .fragment = imguiPixel }); + params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = shaders.imgui, .fragment = shaders.imgui }); auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); if (not imgui) diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 731d07b83..10bfc92c4 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -52,19 +52,20 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto& ies = m_assets[m_activeAssetIx]; const auto* profile = ies.getProfile(); - PushConstants pc; + nbl::hlsl::this_example::ies::PushConstants pc; { pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); pc.dataBDA = ies.buffers.data->getDeviceAddress(); - pc.maxIValue = profile->getMaxCandelaValue(); - pc.vAnglesCount = profile->getVertAngles().size(); - pc.hAnglesCount = profile->getHoriAngles().size(); - pc.dataCount = profile->getData().size(); + const auto& accessor = profile->getAccessor(); + pc.maxIValue = accessor.properties.maxCandelaValue; + pc.vAnglesCount = accessor.vAnglesCount(); + pc.hAnglesCount = accessor.hAnglesCount(); pc.zAngleDegreeRotation = ies.zDegree; pc.mode = ies.mode; + pc.symmetry = static_cast(accessor.symmetry()); pc.texIx = m_activeAssetIx; } @@ -89,7 +90,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->bindComputePipeline(m_computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); - const auto xGroups = (ies.getProfile()->getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; + const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / WORKGROUP_DIMENSION + 1u; cb->dispatch(xGroups, xGroups, 1); cb->endDebugMarker(); } @@ -168,9 +169,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - - auto resolution = profile->getOptimalIESResolution(); - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .resX = resolution.x, .resY = resolution.y, .ds = m_descriptors[0u].get(), .texID = (uint32_t)m_activeAssetIx }); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint32_t)m_activeAssetIx }); // tear down scene every frame m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 40e9a364a..0115da42b 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -18,14 +18,15 @@ void IESViewer::uiListener() auto& ies = m_assets[m_activeAssetIx]; const auto name = path(ies.key).filename().string(); auto* profile = ies.getProfile(); - const float lowerBound = (float)profile->getHoriAngles().front(); - const float upperBound = (float)profile->getHoriAngles().back(); + const auto& accessor = profile->getAccessor(); + + const float lowerBound = accessor.hAngles.front(); + const float upperBound = accessor.hAngles.back(); const bool singleAngle = (upperBound == lowerBound); auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); const ImGuiViewport* vp = ImGui::GetMainViewport(); const ImVec2 imageSize(640.f, 640.f); - // 2D Plot { ImDrawList* fg = ImGui::GetForegroundDrawList(); @@ -35,7 +36,7 @@ void IESViewer::uiListener() fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(ies.mode)); y += ImGui::GetTextLineHeightWithSpacing(); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(profile->getSymmetry())); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(accessor.symmetry())); y += ImGui::GetTextLineHeightWithSpacing(); fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index f93218e91..58ea5e9d4 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -5,7 +5,7 @@ #include "nbl/examples/examples.hpp" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "app_resources/ies.pcs.hlsl" +#include "app_resources/common.hlsl" namespace nbl::examples { @@ -35,7 +35,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted inline auto computeForInstance(hlsl::float32_t3x4 world) const { using namespace nbl::hlsl; - hlsl::examples::ies::SInstanceMatrices retval = { + hlsl::this_example::ies::SInstanceMatrices retval = { .worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj),float64_t3x4(world))) }; const auto sub3x3 = mul(float64_t3x3(viewProj),float64_t3x3(world)); @@ -51,8 +51,6 @@ class CSimpleIESRenderer final : public core::IReferenceCounted struct SIESParams { hlsl::float32_t radius = 1.f; - uint32_t resX : 16; - uint32_t resY : 16; IGPUDescriptorSet* ds = nullptr; uint32_t texID; }; @@ -63,7 +61,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted asset::SBufferBinding indexBuffer = {}; uint32_t elementCount = 0; // indices into the descriptor set - constexpr static inline auto MissingView = hlsl::examples::ies::SPushConstants::DescriptorCount; + constexpr static inline auto MissingView = hlsl::this_example::ies::PushConstants::DescriptorCount; uint16_t positionView = MissingView; uint16_t normalView = MissingView; asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN; @@ -71,7 +69,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted // struct SInstance { - using SPushConstants = hlsl::examples::ies::SPushConstants; + using SPushConstants = hlsl::this_example::ies::PushConstants; inline SPushConstants computePushConstants(const SViewParams& viewParams, const SIESParams& iesParams) const { using namespace hlsl; @@ -79,10 +77,8 @@ class CSimpleIESRenderer final : public core::IReferenceCounted .matrices = viewParams.computeForInstance(world), .positionView = packedGeo->positionView, .normalView = packedGeo->normalView, - .resX = iesParams.resX, - .resY = iesParams.resY, - .texID = iesParams.texID, - .radius = iesParams.radius + .texIx = iesParams.texID, + .sphereRadius = iesParams.radius }; } @@ -167,7 +163,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted { IGPUGraphicsPipeline::SCreationParams params[pipeline_e::Count] = {}; params[pipeline_e::SphereTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; - params[pipeline_e::SphereTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "SphereFS" }; + params[pipeline_e::SphereTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "SpherePS" }; for (auto i=0; i requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index fb133a226..12dda0281 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -28,20 +28,16 @@ struct PushConstants NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; SInstanceMatrices matrices; + uint64_t hAnglesBDA; + uint64_t vAnglesBDA; + uint64_t dataBDA; uint32_t positionView : 16; uint32_t normalView : 16; uint32_t mode : 8; uint32_t symmetry : 8; uint32_t texIx : 16; - - uint64_t hAnglesBDA; - uint64_t vAnglesBDA; - uint64_t dataBDA; - - uint32_t hAnglesCount; + uint32_t hAnglesCount; uint32_t vAnglesCount; - uint32_t dataCount; - float32_t maxIValue; float32_t zAngleDegreeRotation; float32_t sphereRadius; diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index c95b161f1..aeca9c3fb 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -108,7 +108,7 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) } [shader("pixel")] -float32_t4 SphereFS(SInterpolants input) : SV_Target0 +float32_t4 SpherePS(SInterpolants input) : SV_Target0 { float32_t2 uv = 0.5f * Octahedral::dirToNDC(input.latDir) + 0.5f; float32_t candela = inIESCandelaImage[pc.texIx].Sample(generalSampler, uv).r; From 292cbbfb7f9007f00b92b8a6ae0c23e5a3a07687 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:08:12 +0700 Subject: [PATCH 072/219] refactor link debug_draw lib --- 34_DebugDraw/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt index 89d0bc8e3..8d78f3de9 100644 --- a/34_DebugDraw/CMakeLists.txt +++ b/34_DebugDraw/CMakeLists.txt @@ -5,7 +5,5 @@ if(NBL_BUILD_DEBUG_DRAW) nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) endif() From 8af66823a545c281582a053b095f8a305769f784 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:57:43 +0700 Subject: [PATCH 073/219] refactor link debug_draw lib in ex 12 --- 12_MeshLoaders/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/12_MeshLoaders/CMakeLists.txt b/12_MeshLoaders/CMakeLists.txt index da032293b..709b7d40b 100644 --- a/12_MeshLoaders/CMakeLists.txt +++ b/12_MeshLoaders/CMakeLists.txt @@ -18,9 +18,7 @@ nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_L # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) if (NBL_BUILD_DEBUG_DRAW) - add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) endif() From cc341e74bcc7fdf70fa6dbc312c4a6042a8eafff Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 16:47:38 +0700 Subject: [PATCH 074/219] refactor draw aabb usage --- 12_MeshLoaders/main.cpp | 9 ++++++--- 34_DebugDraw/main.cpp | 13 ++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 072fbff04..b69f261ed 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -196,7 +196,10 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc if (m_drawBBs) { const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - m_drawAABB->render(cb, drawFinished, m_aabbInstances, viewProjMatrix); + ext::debug_draw::DrawAABB::DrawParameters drawParams; + drawParams.commandBuffer = cb; + drawParams.cameraMat = viewProjMatrix; + m_drawAABB->render(drawParams, drawFinished, m_aabbInstances); } #endif cb->endRenderPass(); @@ -471,14 +474,14 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc #ifdef NBL_BUILD_DEBUG_DRAW auto& inst = m_aabbInstances[i]; const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); - hlsl::float32_t4x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); + hlsl::float32_t3x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); inst.color = { 1,1,1,1 }; inst.transform[0] = tmpWorld[0]; inst.transform[1] = tmpWorld[1]; inst.transform[2] = tmpWorld[2]; inst.transform[3] = float32_t4(0, 0, 0, 1); - inst.transform = hlsl::mul(inst.transform, instanceTransform); + inst.transform = math::linalg::promoted_mul(inst.transform, instanceTransform); #endif } printAABB(bound,"Total"); diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 3897bcc14..153a387c5 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -235,8 +235,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - drawAABB->renderSingle(cmdbuf, testAABB, float32_t4{1, 0, 0, 1}, viewProjectionMatrix); - + ext::debug_draw::DrawAABB::DrawParameters drawParams; + drawParams.commandBuffer = cmdbuf; + drawParams.cameraMat = viewProjectionMatrix; + + drawAABB->renderSingle(drawParams, testAABB, float32_t4{ 1, 0, 0, 1 }); { using aabb_t = hlsl::shapes::AABB<3, float>; using point_t = aabb_t::point_t; @@ -257,12 +260,12 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti auto& instance = aabbInstances[i]; instance.color = { color_dis(gen),color_dis(gen),color_dis(gen),1 }; - hlsl::float32_t4x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(aabb); - instance.transform = instanceTransform; + hlsl::float32_t3x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(aabb); + instance.transform = math::linalg::promoted_mul(float32_t4x4(1), instanceTransform); } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(cmdbuf, drawFinished, aabbInstances, viewProjectionMatrix); + drawAABB->render(drawParams, drawFinished, aabbInstances); } cmdbuf->endRenderPass(); From 7a3af0a3efb1dce923bb7f3b63b61c90c0b09543 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 29 Nov 2025 18:57:11 +0100 Subject: [PATCH 075/219] Refactored ITester.h --- 73_Mortons/CTester.h | 457 ++++++++--------- 73_Mortons/app_resources/testCommon.hlsl | 481 +++++++++--------- 73_Mortons/main.cpp | 6 +- .../include/nbl/examples/Tester}/ITester.h | 152 ++++-- .../nbl/examples/Tester}/test.comp.hlsl | 13 +- 5 files changed, 593 insertions(+), 516 deletions(-) rename {73_Mortons => common/include/nbl/examples/Tester}/ITester.h (71%) rename {73_Mortons/app_resources => common/include/nbl/examples/Tester}/test.comp.hlsl (50%) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index c47e94376..763307b53 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -3,14 +3,20 @@ #include #include "app_resources/testCommon.hlsl" -#include "ITester.h" +#include "../common/include/nbl/examples/Tester/ITester.h" using namespace nbl; -class CTester final : public ITester +class CTester final : public ITester { + using base_t = ITester; + public: - void performTests() + CTester(const uint32_t testIterationCount) + : base_t(testIterationCount) {}; + +private: + InputTestValues generateInputTestValues() override { std::random_device rd; std::mt19937 mt(rd()); @@ -19,246 +25,229 @@ class CTester final : public ITester std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - uint64_t generatedA = longDistribution(mt); - uint64_t generatedB = longDistribution(mt); - - testInput.generatedA = generatedA; - testInput.generatedB = generatedB; - - expected.emulatedAnd = _static_cast(generatedA & generatedB); - expected.emulatedOr = _static_cast(generatedA | generatedB); - expected.emulatedXor = _static_cast(generatedA ^ generatedB); - expected.emulatedNot = _static_cast(~generatedA); - expected.emulatedPlus = _static_cast(generatedA + generatedB); - expected.emulatedMinus = _static_cast(generatedA - generatedB); - expected.emulatedLess = uint32_t(generatedA < generatedB); - expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); - expected.emulatedGreater = uint32_t(generatedA > generatedB); - expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); - - expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); - expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); - expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); - } - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); - - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - - uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2 ); - uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2 ); - uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); - uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); - uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); - uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); - - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - - uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); - uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); - uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); - uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); - uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); - uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); - - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - - uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); - uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); - uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); - uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); - uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); - uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); - - // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them - // so their highest bits are all 0s or 1s depending on the sign of the number they encode - - int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - - int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - - int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - - // Plus - expected.mortonPlus_small_2 = morton::code::create(Vec2ASmall + Vec2BSmall); - expected.mortonPlus_medium_2 = morton::code::create(Vec2AMedium + Vec2BMedium); - expected.mortonPlus_full_2 = morton::code::create(Vec2AFull + Vec2BFull); - expected.mortonPlus_emulated_2 = morton::code::create(Vec2AFull + Vec2BFull); - - expected.mortonPlus_small_3 = morton::code::create(Vec3ASmall + Vec3BSmall); - expected.mortonPlus_medium_3 = morton::code::create(Vec3AMedium + Vec3BMedium); - expected.mortonPlus_full_3 = morton::code::create(Vec3AFull + Vec3BFull); - expected.mortonPlus_emulated_3 = morton::code::create(Vec3AFull + Vec3BFull); - - expected.mortonPlus_small_4 = morton::code::create(Vec4ASmall + Vec4BSmall); - expected.mortonPlus_medium_4 = morton::code::create(Vec4AMedium + Vec4BMedium); - expected.mortonPlus_full_4 = morton::code::create(Vec4AFull + Vec4BFull); - expected.mortonPlus_emulated_4 = morton::code::create(Vec4AFull + Vec4BFull); - - // Minus - expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); - expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); - expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); - expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); - - expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); - expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); - expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); - expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); - - expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); - expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); - expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); - expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); - - // Coordinate-wise equality - expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); - expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); - expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - - expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); - expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); - expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - - expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); - expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); - expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); - expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); - expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - - expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); - expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); - expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - - expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); - expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); - expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - // Coordinate-wise signed inequality - expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); - expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); - expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - - expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); - expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); - expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - - expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); - expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); - expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(generatedShift); - // Left-shift - expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - - expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - - expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - - // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2))& uint32_t(fullBitsMask_2)); - - expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3))& uint32_t(fullBitsMask_3)); - - expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); - - // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; -private: - inline static constexpr int Iterations = 100u; + testInput.generatedA = longDistribution(mt); + testInput.generatedB = longDistribution(mt); - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues cpuTestValues; + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; - fillTestValues(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + return testInput; } - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + TestValues determineExpectedResults(const InputTestValues& testInput) override { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + { + const uint64_t generatedA = testInput.generatedA; + const uint64_t generatedB = testInput.generatedB; + const uint32_t generatedShift = testInput.shift; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2); + uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2); + uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); + uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); + uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); + uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); + uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); + uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); + uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); + uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); + uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); + uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); + uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); + uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); + uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); + uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + + int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + + int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + + // Plus + expected.mortonPlus_small_2 = morton::code::create(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = morton::code::create(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = morton::code::create(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = morton::code::create(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = morton::code::create(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = morton::code::create(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = morton::code::create(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = morton::code::create(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = morton::code::create(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = morton::code::create(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = morton::code::create(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = morton::code::create(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(testInput.shift); + // Left-shift + expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + + expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + + expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + } + + return expected; } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) override { verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); diff --git a/73_Mortons/app_resources/testCommon.hlsl b/73_Mortons/app_resources/testCommon.hlsl index 9ff9a4fa8..43c43f7c9 100644 --- a/73_Mortons/app_resources/testCommon.hlsl +++ b/73_Mortons/app_resources/testCommon.hlsl @@ -1,242 +1,245 @@ #include "common.hlsl" -void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +struct TestExecutor { - emulated_uint64_t emulatedA = _static_cast(input.generatedA); - emulated_uint64_t emulatedB = _static_cast(input.generatedB); - - // Emulated int tests - output.emulatedAnd = emulatedA & emulatedB; - output.emulatedOr = emulatedA | emulatedB; - output.emulatedXor = emulatedA ^ emulatedB; - output.emulatedNot = emulatedA.operator~(); - output.emulatedPlus = emulatedA + emulatedB; - output.emulatedMinus = emulatedA - emulatedB; - output.emulatedLess = uint32_t(emulatedA < emulatedB); - output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); - output.emulatedGreater = uint32_t(emulatedA > emulatedB); - output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); - - left_shift_operator leftShift; - output.emulatedLeftShifted = leftShift(emulatedA, input.shift); - - arithmetic_right_shift_operator unsignedRightShift; - output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); - - arithmetic_right_shift_operator signedRightShift; - output.emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); - - // Morton tests - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - int64_t2 Vec2ASigned = int64_t2(Vec2A); - int64_t2 Vec2BSigned = int64_t2(Vec2B); - - int64_t3 Vec3ASigned = int64_t3(Vec3A); - int64_t3 Vec3BSigned = int64_t3(Vec3B); - - int64_t4 Vec4ASigned = int64_t4(Vec4A); - int64_t4 Vec4BSigned = int64_t4(Vec4B); - - morton::code morton_small_2A = morton::code::create(Vec2A); - morton::code morton_medium_2A = morton::code::create(Vec2A); - morton::code morton_full_2A = morton::code::create(Vec2A); - morton::code morton_emulated_2A = morton::code::create(Vec2A); - morton::code morton_small_2B = morton::code::create(Vec2B); - morton::code morton_medium_2B = morton::code::create(Vec2B); - morton::code morton_full_2B = morton::code::create(Vec2B); - morton::code morton_emulated_2B = morton::code::create(Vec2B); - - morton::code morton_small_3A = morton::code::create(Vec3A); - morton::code morton_medium_3A = morton::code::create(Vec3A); - morton::code morton_full_3A = morton::code::create(Vec3A); - morton::code morton_emulated_3A = morton::code::create(Vec3A); - morton::code morton_small_3B = morton::code::create(Vec3B); - morton::code morton_medium_3B = morton::code::create(Vec3B); - morton::code morton_full_3B = morton::code::create(Vec3B); - morton::code morton_emulated_3B = morton::code::create(Vec3B); - - morton::code morton_small_4A = morton::code::create(Vec4A); - morton::code morton_medium_4A = morton::code::create(Vec4A); - morton::code morton_full_4A = morton::code::create(Vec4A); - morton::code morton_emulated_4A = morton::code::create(Vec4A); - morton::code morton_small_4B = morton::code::create(Vec4B); - morton::code morton_medium_4B = morton::code::create(Vec4B); - morton::code morton_full_4B = morton::code::create(Vec4B); - morton::code morton_emulated_4B = morton::code::create(Vec4B); - - morton::code morton_small_2_signed = morton::code::create(Vec2ASigned); - morton::code morton_medium_2_signed = morton::code::create(Vec2ASigned); - morton::code morton_full_2_signed = morton::code::create(Vec2ASigned); - - morton::code morton_small_3_signed = morton::code::create(Vec3ASigned); - morton::code morton_medium_3_signed = morton::code::create(Vec3ASigned); - morton::code morton_full_3_signed = morton::code::create(Vec3ASigned); - - morton::code morton_small_4_signed = morton::code::create(Vec4ASigned); - morton::code morton_medium_4_signed = morton::code::create(Vec4ASigned); - morton::code morton_full_4_signed = morton::code::create(Vec4ASigned); - - // Plus - output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; - output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; - output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; - output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; - - output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; - output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; - output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; - output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; - - output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; - output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; - output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; - output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - - // Minus - output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; - output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; - output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; - output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; - - output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; - output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; - output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; - output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; - - output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; - output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; - output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; - output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - - // Coordinate-wise equality - output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); - output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); - - output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); - output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); - - output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); - - // Coordinate-wise unsigned inequality (just testing with less) - output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); - output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); - - output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); - output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); - - output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); - - // Coordinate-wise signed inequality - output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2BSigned))); - output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2BSigned))); - output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2BSigned))); - - output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3BSigned))); - output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3BSigned))); - output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3BSigned))); - - output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4BSigned))); - output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4BSigned))); - output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4BSigned))); - - // Cast to uint16_t which is what left shift for Mortons expect - uint16_t castedShift = uint16_t(input.shift); - // Each left shift clamps to correct bits so the result kinda makes sense - // Left-shift - left_shift_operator > leftShiftSmall2; - output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); - left_shift_operator > leftShiftMedium2; - output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - left_shift_operator > leftShiftFull2; - output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); - left_shift_operator > leftShiftEmulated2; - output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - left_shift_operator > leftShiftSmall3; - output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); - left_shift_operator > leftShiftMedium3; - output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - left_shift_operator > leftShiftFull3; - output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); - left_shift_operator > leftShiftEmulated3; - output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - left_shift_operator > leftShiftSmall4; - output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); - left_shift_operator > leftShiftMedium4; - output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - left_shift_operator > leftShiftFull4; - output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); - left_shift_operator > leftShiftEmulated4; - output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Unsigned right-shift - arithmetic_right_shift_operator > rightShiftSmall2; - output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftMedium2; - output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftFull2; - output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftEmulated2; - output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSmall3; - output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftMedium3; - output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftFull3; - output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftEmulated3; - output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSmall4; - output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftMedium4; - output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftFull4; - output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); - arithmetic_right_shift_operator > rightShiftEmulated4; - output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Signed right-shift - arithmetic_right_shift_operator > rightShiftSignedSmall2; - output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftSignedMedium2; - output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftSignedFull2; - output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSignedSmall3; - output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftSignedMedium3; - output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftSignedFull3; - output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSignedSmall4; - output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftSignedMedium4; - output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftSignedFull4; - output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); -} \ No newline at end of file + void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) + { + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + int64_t2 Vec2ASigned = int64_t2(Vec2A); + int64_t2 Vec2BSigned = int64_t2(Vec2B); + + int64_t3 Vec3ASigned = int64_t3(Vec3A); + int64_t3 Vec3BSigned = int64_t3(Vec3B); + + int64_t4 Vec4ASigned = int64_t4(Vec4A); + int64_t4 Vec4BSigned = int64_t4(Vec4B); + + morton::code morton_small_2A = morton::code::create(Vec2A); + morton::code morton_medium_2A = morton::code::create(Vec2A); + morton::code morton_full_2A = morton::code::create(Vec2A); + morton::code morton_emulated_2A = morton::code::create(Vec2A); + morton::code morton_small_2B = morton::code::create(Vec2B); + morton::code morton_medium_2B = morton::code::create(Vec2B); + morton::code morton_full_2B = morton::code::create(Vec2B); + morton::code morton_emulated_2B = morton::code::create(Vec2B); + + morton::code morton_small_3A = morton::code::create(Vec3A); + morton::code morton_medium_3A = morton::code::create(Vec3A); + morton::code morton_full_3A = morton::code::create(Vec3A); + morton::code morton_emulated_3A = morton::code::create(Vec3A); + morton::code morton_small_3B = morton::code::create(Vec3B); + morton::code morton_medium_3B = morton::code::create(Vec3B); + morton::code morton_full_3B = morton::code::create(Vec3B); + morton::code morton_emulated_3B = morton::code::create(Vec3B); + + morton::code morton_small_4A = morton::code::create(Vec4A); + morton::code morton_medium_4A = morton::code::create(Vec4A); + morton::code morton_full_4A = morton::code::create(Vec4A); + morton::code morton_emulated_4A = morton::code::create(Vec4A); + morton::code morton_small_4B = morton::code::create(Vec4B); + morton::code morton_medium_4B = morton::code::create(Vec4B); + morton::code morton_full_4B = morton::code::create(Vec4B); + morton::code morton_emulated_4B = morton::code::create(Vec4B); + + morton::code morton_small_2_signed = morton::code::create(Vec2ASigned); + morton::code morton_medium_2_signed = morton::code::create(Vec2ASigned); + morton::code morton_full_2_signed = morton::code::create(Vec2ASigned); + + morton::code morton_small_3_signed = morton::code::create(Vec3ASigned); + morton::code morton_medium_3_signed = morton::code::create(Vec3ASigned); + morton::code morton_full_3_signed = morton::code::create(Vec3ASigned); + + morton::code morton_small_4_signed = morton::code::create(Vec4ASigned); + morton::code morton_medium_4_signed = morton::code::create(Vec4ASigned); + morton::code morton_full_4_signed = morton::code::create(Vec4ASigned); + + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2BSigned))); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2BSigned))); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2BSigned))); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3BSigned))); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3BSigned))); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3BSigned))); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4BSigned))); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4BSigned))); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4BSigned))); + + // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + } +}; \ No newline at end of file diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index 6034e3469..d5e9ebb55 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -47,10 +47,10 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester; - pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + CTester mortonTester(100); + pplnSetupData.testCommonDataPath = "testCommon.hlsl"; mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTests(); + mortonTester.performTestsAndVerifyResults(); } return true; diff --git a/73_Mortons/ITester.h b/common/include/nbl/examples/Tester/ITester.h similarity index 71% rename from 73_Mortons/ITester.h rename to common/include/nbl/examples/Tester/ITester.h index a0c76ac75..907a7889d 100644 --- a/73_Mortons/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -1,13 +1,12 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ +#ifndef _NBL_COMMON_I_TESTER_INCLUDED_ +#define _NBL_COMMON_I_TESTER_INCLUDED_ #include -#include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" using namespace nbl; -class ITester +template +class ITester { public: virtual ~ITester() @@ -17,7 +16,7 @@ class ITester struct PipelineSetupData { - std::string testShaderPath; + std::string testCommonDataPath; core::smart_refctd_ptr device; core::smart_refctd_ptr api; @@ -48,8 +47,8 @@ class ITester { asset::IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); + lp.workingDirectory = "nbl/examples"; // virtual root + auto assetBundle = m_assetMgr->getAsset("Tester/test.comp.hlsl", lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); @@ -58,7 +57,15 @@ class ITester assert(assets.size() == 1); core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - shader = m_device->compileShader({source.get()}); + // TODO: `pipleineSetupData.testCommonDataPath` is a path to a custom user provided file containing implementation of structures needed for the shader to work, this file need to be included somehow + // to the test shader + + auto overridenSource = asset::CHLSLCompiler::createOverridenCopy( + source.get(), "#define WORKGROUP_SIZE %d\n#define TEST_COUNT %d\n", + m_WorkgroupSize, m_testIterationCount + ); + + shader = m_device->compileShader({overridenSource.get()}); } if (!shader) @@ -100,7 +107,7 @@ class ITester // Allocate memory of the input buffer { - constexpr size_t BufferSize = sizeof(InputStruct); + const size_t BufferSize = sizeof(InputStruct) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -135,7 +142,7 @@ class ITester // Allocate memory of the output buffer { - constexpr size_t BufferSize = sizeof(OutputStruct); + const size_t BufferSize = sizeof(OutputStruct) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -180,33 +187,47 @@ class ITester m_queue = m_device->getQueue(m_queueFamily, 0); } - enum class TestType + void performTestsAndVerifyResults() { - CPU, - GPU - }; + core::vector inputTestValues; + core::vector exceptedTestResults; - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) - { - if (expectedVal == testVal) - return; + inputTestValues.reserve(m_testIterationCount); + exceptedTestResults.reserve(m_testIterationCount); - std::stringstream ss; - switch (testType) + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < m_testIterationCount; ++i) { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput = generateInputTestValues(); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected = determineExpectedResults(testInput); + + inputTestValues.push_back(testInput); + exceptedTestResults.push_back(expected); } - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + core::vector cpuTestResults = performCpuTests(inputTestValues); + core::vector gpuTestResults = performGpuTests(inputTestValues); - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); + + m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); } +protected: + enum class TestType + { + CPU, + GPU + }; + + virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, TestType testType) = 0; + + virtual InputTestValues generateInputTestValues() = 0; + + virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; + protected: uint32_t m_queueFamily; core::smart_refctd_ptr m_device; @@ -225,8 +246,10 @@ class ITester video::IQueue* m_queue; uint64_t m_semaphoreCounter; - template - OutputStruct dispatch(const InputStruct& input) + ITester(const uint32_t testIterationCount) + : m_testIterationCount(testIterationCount) {}; + + void dispatchGpuTests(const core::vector& input, core::vector& output) { // Update input buffer if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) @@ -236,17 +259,20 @@ class ITester if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) m_device->invalidateMappedMemoryRanges(1, &memoryRange); - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); + assert(m_testIterationCount == input.size()); + const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); m_inputBufferAllocation.memory->unmap(); // record command buffer + const uint32_t dispatchSizeX = (m_testIterationCount + (m_WorkgroupSize - 1)) / m_WorkgroupSize; m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); m_cmdbuf->bindComputePipeline(m_pipeline.get()); m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); + m_cmdbuf->dispatch(dispatchSizeX, 1, 1); m_cmdbuf->endDebugMarker(); m_cmdbuf->end(); @@ -261,11 +287,34 @@ class ITester m_api->endCapture(); m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); + + // save test results + assert(m_testIterationCount == output.size()); + const size_t outputDataSize = sizeof(InputTestValues) * m_testIterationCount; + std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); + m_device->waitIdle(); + } - return output; + template + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) + { + if (expectedVal == testVal) + return; + + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + break; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + + m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } private: @@ -275,6 +324,37 @@ class ITester m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); exit(-1); } + + core::vector performCpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + TestExecutor testExecutor; + + for (int i = 0; i < m_testIterationCount; ++i) + testExecutor(inputTestValues[i], output[i]); + + return output; + } + + core::vector performGpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + dispatchGpuTests(inputTestValues, output); + + return output; + } + + void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + { + for (int i = 0; i < m_testIterationCount; ++i) + { + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], ITester::TestType::CPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], ITester::TestType::GPU); + } + } + + const size_t m_testIterationCount; + static constexpr size_t m_WorkgroupSize = 32u; }; #endif \ No newline at end of file diff --git a/73_Mortons/app_resources/test.comp.hlsl b/common/include/nbl/examples/Tester/test.comp.hlsl similarity index 50% rename from 73_Mortons/app_resources/test.comp.hlsl rename to common/include/nbl/examples/Tester/test.comp.hlsl index d1010aeb0..e3591936a 100644 --- a/73_Mortons/app_resources/test.comp.hlsl +++ b/common/include/nbl/examples/Tester/test.comp.hlsl @@ -2,15 +2,20 @@ //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h -#include "testCommon.hlsl" +// TODO: this is obviously not right, contents of this file should be appended to this shader runtime via source modification +#include "D:/repos/Nabla/examples_tests/73_Mortons/app_resources/testCommon.hlsl" [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] +[numthreads(WORKGROUP_SIZE, 1, 1)] [shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { - if (invocationID.x == 0) - fillTestValues(inputTestValues[0], outputTestValues[0]); + const uint invID = invocationID.x; + if (invID >= TEST_COUNT) + return; + + TestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); } From b659f1a3e624bd4d6b87629f2740d048c2db8b17 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 15:41:26 +0100 Subject: [PATCH 076/219] fix push constants, split into 2 structs, update layouts with offsets, use in one input with multiple SPIRV entry points --- 50.IESViewer/AppInit.cpp | 4 +- 50.IESViewer/AppRender.cpp | 34 ++++++++-------- 50.IESViewer/CSimpleIESRenderer.hpp | 14 +++---- 50.IESViewer/app_resources/common.hlsl | 26 ++++++++---- 50.IESViewer/app_resources/ies.unified.hlsl | 44 ++++++++++----------- 5 files changed, 66 insertions(+), 56 deletions(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 68605ab51..087f0c17f 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -140,7 +140,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) using stage_flags_t = asset::IShader::E_SHADER_STAGE; static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); - static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_COMPUTE; + static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_VERTEX | stage_flags_t::ESS_COMPUTE; //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } @@ -189,7 +189,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (not descriptorSetLayout) return logFail("Failed to create descriptor set layout!"); - auto range = std::to_array({ {StageFlags.value, 0u, sizeof(nbl::hlsl::this_example::ies::PushConstants)} }); + auto range = std::to_array({ {StageFlags.value, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(nbl::hlsl::this_example::ies::CdcPC)} }); auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); if (not pipelineLayout) diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 10bfc92c4..301a0ed53 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -52,22 +52,20 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto& ies = m_assets[m_activeAssetIx]; const auto* profile = ies.getProfile(); - nbl::hlsl::this_example::ies::PushConstants pc; - { - pc.vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(); - pc.hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(); - pc.dataBDA = ies.buffers.data->getDeviceAddress(); - - const auto& accessor = profile->getAccessor(); - pc.maxIValue = accessor.properties.maxCandelaValue; - pc.vAnglesCount = accessor.vAnglesCount(); - pc.hAnglesCount = accessor.hAnglesCount(); - - pc.zAngleDegreeRotation = ies.zDegree; - pc.mode = ies.mode; - pc.symmetry = static_cast(accessor.symmetry()); - pc.texIx = m_activeAssetIx; - } + const auto& accessor = profile->getAccessor(); + const auto pc = nbl::hlsl::this_example::ies::CdcPC + { + .hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(), + .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), + .dataBDA = ies.buffers.data->getDeviceAddress(), + .mode = ies.mode, + .symmetry = (uint32_t)accessor.symmetry(), + .texIx = (uint32_t)m_activeAssetIx, + .hAnglesCount = accessor.hAnglesCount(), + .vAnglesCount = accessor.vAnglesCount(), + .maxIValue = accessor.properties.maxCandelaValue, + .zAngleDegreeRotation = ies.zDegree + }; for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity { @@ -89,7 +87,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto* layout = m_computePipeline->getLayout(); cb->bindComputePipeline(m_computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(pc), &pc); const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / WORKGROUP_DIMENSION + 1u; cb->dispatch(xGroups, xGroups, 1); cb->endDebugMarker(); @@ -169,7 +167,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint32_t)m_activeAssetIx }); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx }); // tear down scene every frame m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index 58ea5e9d4..7e75240b8 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -52,7 +52,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted { hlsl::float32_t radius = 1.f; IGPUDescriptorSet* ds = nullptr; - uint32_t texID; + uint16_t texID; }; // struct SPackedGeometry @@ -61,7 +61,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted asset::SBufferBinding indexBuffer = {}; uint32_t elementCount = 0; // indices into the descriptor set - constexpr static inline auto MissingView = hlsl::this_example::ies::PushConstants::DescriptorCount; + constexpr static inline auto MissingView = hlsl::this_example::ies::SpherePC::DescriptorCount; uint16_t positionView = MissingView; uint16_t normalView = MissingView; asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN; @@ -69,7 +69,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted // struct SInstance { - using SPushConstants = hlsl::this_example::ies::PushConstants; + using SPushConstants = hlsl::this_example::ies::SpherePC; inline SPushConstants computePushConstants(const SViewParams& viewParams, const SIESParams& iesParams) const { using namespace hlsl; @@ -77,8 +77,8 @@ class CSimpleIESRenderer final : public core::IReferenceCounted .matrices = viewParams.computeForInstance(world), .positionView = packedGeo->positionView, .normalView = packedGeo->normalView, - .texIx = iesParams.texID, - .sphereRadius = iesParams.radius + .radius = iesParams.radius, + .texIx = iesParams.texID }; } @@ -153,7 +153,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted // create pipeline layout const SPushConstantRange ranges[] = {{ .stageFlags = hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, + .offset = offsetof(hlsl::this_example::ies::PushConstants, sphere), .size = sizeof(SInstance::SPushConstants), }}; init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr(iesDSLayout), smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); @@ -376,7 +376,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted const auto* geo = instance.packedGeo; cmdbuf->bindGraphicsPipeline(geo->pipeline.get()); const auto pc = instance.computePushConstants(viewParams, iesParams); - cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(pc),&pc); + cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,offsetof(hlsl::this_example::ies::PushConstants, sphere),sizeof(pc),&pc); if (geo->indexBuffer) { cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType); diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index 12dda0281..5bd00ca4a 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -23,16 +23,11 @@ struct SInstanceMatrices float32_t3x3 normal; }; -struct PushConstants +struct CdcPC { - NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; - - SInstanceMatrices matrices; uint64_t hAnglesBDA; uint64_t vAnglesBDA; uint64_t dataBDA; - uint32_t positionView : 16; - uint32_t normalView : 16; uint32_t mode : 8; uint32_t symmetry : 8; uint32_t texIx : 16; @@ -40,7 +35,24 @@ struct PushConstants uint32_t vAnglesCount; float32_t maxIValue; float32_t zAngleDegreeRotation; - float32_t sphereRadius; + + uint32_t pad; +}; + +struct SpherePC +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; + SInstanceMatrices matrices; + uint32_t positionView : 16; + uint32_t normalView : 16; + float32_t radius; + uint16_t texIx; +}; + +struct PushConstants +{ + CdcPC cdc; + SpherePC sphere; }; } diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index aeca9c3fb..c9b64a9ec 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -19,7 +19,7 @@ using namespace nbl::hlsl::ext::FullScreenTriangle; [[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; [[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; [[vk::binding(0 + 100, 0)]] SamplerState generalSampler; -[[vk::binding(0, 1)]] Buffer utbs[PushConstants::DescriptorCount]; +[[vk::binding(0, 1)]] Buffer utbs[SpherePC::DescriptorCount]; [[vk::push_constant]] PushConstants pc; struct Accessor @@ -29,19 +29,19 @@ struct Accessor using value_t = float32_t; using symmetry_t = nbl::hlsl::ies::ProfileProperties::LuminairePlanesSymmetry; - static key_t vAnglesCount() { return pc.vAnglesCount; } - static key_t hAnglesCount() { return pc.hAnglesCount; } + static key_t vAnglesCount() { return pc.cdc.vAnglesCount; } + static key_t hAnglesCount() { return pc.cdc.hAnglesCount; } template) - static inline value_t vAngle(T j) { return (nbl::hlsl::bda::__ptr::create(pc.vAnglesBDA) + j).deref().load(); } + static inline value_t vAngle(T j) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.vAnglesBDA) + j).deref().load(); } template) - static inline value_t hAngle(T i) { return (nbl::hlsl::bda::__ptr::create(pc.hAnglesBDA) + i).deref().load(); } + static inline value_t hAngle(T i) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.hAnglesBDA) + i).deref().load(); } template) - static inline value_t value(T ij) { return (nbl::hlsl::bda::__ptr::create(pc.dataBDA) + vAnglesCount() * ij.x + ij.y).deref().load(); } + static inline value_t value(T ij) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.dataBDA) + vAnglesCount() * ij.x + ij.y).deref().load(); } - static inline symmetry_t symmetry() { return (symmetry_t)pc.symmetry; } + static inline symmetry_t symmetry() { return (symmetry_t)pc.cdc.symmetry; } }; struct SInterpolants @@ -83,7 +83,7 @@ float32_t3 latLongDir(float32_t2 uv) SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) { uint32_t2 resolution; - outIESCandelaImage[pc.texIx].GetDimensions(resolution.x, resolution.y); // optimal IES texture size + inIESCandelaImage[pc.sphere.texIx].GetDimensions(resolution.x, resolution.y); const uint32_t W = resolution.x, H = resolution.y; const uint32_t i = VertexIndex % W, j = VertexIndex / W; @@ -98,10 +98,10 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) const float32_t2 uvPos = float32_t2(uPos, vPos); const float32_t3 dir = latLongDir(uvPos); - const float32_t3 pos = pc.sphereRadius * dir; + const float32_t3 pos = pc.sphere.radius * dir; SInterpolants o; - o.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj, pos); + o.ndc = math::linalg::promoted_mul(pc.sphere.matrices.worldViewProj, pos); o.latDir = dir; return o; @@ -111,7 +111,7 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) float32_t4 SpherePS(SInterpolants input) : SV_Target0 { float32_t2 uv = 0.5f * Octahedral::dirToNDC(input.latDir) + 0.5f; - float32_t candela = inIESCandelaImage[pc.texIx].Sample(generalSampler, uv).r; + float32_t candela = inIESCandelaImage[pc.sphere.texIx].Sample(generalSampler, uv).r; float32_t v = 1.0f - exp(-candela); return float32_t4(v,v,v,1); } @@ -121,7 +121,7 @@ float32_t4 SpherePS(SInterpolants input) : SV_Target0 void CdcCS(uint32_t3 ID : SV_DispatchThreadID) { uint32_t2 destinationSize; - outIESCandelaImage[pc.texIx].GetDimensions(destinationSize.x, destinationSize.y); + outIESCandelaImage[pc.cdc.texIx].GetDimensions(destinationSize.x, destinationSize.y); const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); @@ -149,10 +149,10 @@ void CdcCS(uint32_t3 ID : SV_DispatchThreadID) Accessor accessor; CSampler candelaSampler; - outIESCandelaImage[pc.texIx][pixelCoordinates] = candelaSampler.sample(accessor, polar) / pc.maxIValue; - outSphericalCoordinatesImage[pc.texIx][pixelCoordinates] = sCoords; - outOUVProjectionDirectionImage[pc.texIx][pixelCoordinates] = dir; - outPassTMask[pc.texIx][pixelCoordinates] = mask; + outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = candelaSampler.sample(accessor, polar) / pc.cdc.maxIValue; + outSphericalCoordinatesImage[pc.cdc.texIx][pixelCoordinates] = sCoords; + outOUVProjectionDirectionImage[pc.cdc.texIx][pixelCoordinates] = dir; + outPassTMask[pc.cdc.texIx][pixelCoordinates] = mask; } } @@ -165,7 +165,7 @@ float32_t plot(float32_t cand, float32_t pct, float32_t bold) // vertical cut of IES (i.e. cut by plane x = 0) float32_t f(float32_t2 uv) { - return inIESCandelaImage[pc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(normalize(float32_t3(uv.x, 0.001, uv.y))) + 0.5f)).x; + return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(normalize(float32_t3(uv.x, 0.001, uv.y))) + 0.5f)).x; } #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" @@ -173,7 +173,7 @@ float32_t f(float32_t2 uv) [shader("pixel")] float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 { - switch (pc.mode) + switch (pc.cdc.mode) { case 0: { @@ -189,12 +189,12 @@ float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 return float32_t4(col, 1.0f); } case 1: - return float32_t4(inIESCandelaImage[pc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); + return float32_t4(inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); case 2: - return float32_t4(inSphericalCoordinatesImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + return float32_t4(inSphericalCoordinatesImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); case 3: - return float32_t4(inOUVProjectionDirectionImage[pc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); + return float32_t4(inOUVProjectionDirectionImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); default: - return float32_t4(inPassTMaskImage[pc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + return float32_t4(inPassTMaskImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); } } From d8f82f0d593d04b437ca64ddece9c32112ed5b12 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 16:21:01 +0100 Subject: [PATCH 077/219] make use of rotation degree PC field in pixel shader, I can now make octahedral maps immutable --- 50.IESViewer/app_resources/ies.unified.hlsl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index c9b64a9ec..3e35d268c 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -156,7 +156,6 @@ void CdcCS(uint32_t3 ID : SV_DispatchThreadID) } } - float32_t plot(float32_t cand, float32_t pct, float32_t bold) { return smoothstep(pct-0.005*bold, pct, cand) - smoothstep(pct, pct+0.005*bold, cand); @@ -165,7 +164,21 @@ float32_t plot(float32_t cand, float32_t pct, float32_t bold) // vertical cut of IES (i.e. cut by plane x = 0) float32_t f(float32_t2 uv) { - return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(normalize(float32_t3(uv.x, 0.001, uv.y))) + 0.5f)).x; + float32_t3 dir = normalize(float32_t3(uv.x, 0.001, uv.y)); + if (pc.cdc.zAngleDegreeRotation != 0.f) + { + float32_t rad = radians(pc.cdc.zAngleDegreeRotation); + float32_t s = sin(rad); + float32_t c = cos(rad); + + // rotate around Z axis + dir = float32_t3( + c * dir.x - s * dir.y, + s * dir.x + c * dir.y, + dir.z + ); + } + return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(dir) + 0.5f)).x; } #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" From 9c83531c63490bf743dee9bddcfbd5d729e1c916 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 18:05:29 +0100 Subject: [PATCH 078/219] fix CAssetConverter assert by making unique grid buffers, deduplicate objects --- 50.IESViewer/AppInit.cpp | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 087f0c17f..a4ef73e2b 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -350,13 +350,18 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { CGeometryCreatorScene::f_geometry_override_t injector = [&](auto* creator, auto addGeometry) { - for (auto i = 0u; i < m_assets.size(); ++i) - { - auto& ies = m_assets[i]; - const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; - auto name = "Grid " + std::to_string(i); - addGeometry(name.c_str(), creator->createGrid({ resolution.x, resolution.y })); - } + std::set> seen; + for (auto i = 0u; i < m_assets.size(); ++i) + { + auto& ies = m_assets[i]; + const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; + std::pair key{resolution.x, resolution.y}; + if (!seen.insert(key).second) + continue; + + auto name = "Grid (" + std::to_string(resolution.x) + " x " + std::to_string(resolution.y) + ")"; // (**) used to assing polygons! + addGeometry(name.c_str(), creator->createGrid({ resolution.x, resolution.y })); + } }; const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; @@ -371,10 +376,28 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) CSimpleIESRenderer::DefaultPolygonGeometryPatch ); - const auto& geometries = m_scene->getInitParams().geometries; - - m_renderer = CSimpleIESRenderer::create(shaders.ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &geometries.front().get(),geometries.size() }); - if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) + const auto& geoParams = m_scene->getInitParams(); + core::vector> polygons(m_assets.size()); + for (uint32_t i = 0u; i < m_assets.size(); ++i) + { + const auto& resolution = m_assets[i].getProfile()->getAccessor().properties.optimalIESResolution; + + for (uint32_t g = 0u; g < geoParams.geometryNames.size(); ++g) + { + uint32_t w = 0u, h = 0u; + std::sscanf(geoParams.geometryNames[g].c_str(), "Grid (%u x %u)", &w, &h); // (**) + + if (w == resolution.x && h == resolution.y) + { + polygons[i] = geoParams.geometries[g]; + break; + } + } + assert(polygons[i]); + } + + m_renderer = CSimpleIESRenderer::create(shaders.ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &polygons.front().get(),polygons.size() }); + if (!m_renderer || m_renderer->getGeometries().size() != polygons.size()) return logFail("Could not create 3D Plot Renderer!"); m_renderer->m_instances.resize(1); From 8fdd1c1138828318a2b2f554747fb3a49227781c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 10:21:32 +0100 Subject: [PATCH 079/219] Ditched idea of unifying the test shader, it was too much of a headache --- .../app_resources}/test.comp.hlsl | 3 +-- 73_Mortons/main.cpp | 6 +++--- common/include/nbl/examples/Tester/ITester.h | 13 ++++++------- 3 files changed, 10 insertions(+), 12 deletions(-) rename {common/include/nbl/examples/Tester => 73_Mortons/app_resources}/test.comp.hlsl (75%) diff --git a/common/include/nbl/examples/Tester/test.comp.hlsl b/73_Mortons/app_resources/test.comp.hlsl similarity index 75% rename from common/include/nbl/examples/Tester/test.comp.hlsl rename to 73_Mortons/app_resources/test.comp.hlsl index e3591936a..14fe785c1 100644 --- a/common/include/nbl/examples/Tester/test.comp.hlsl +++ b/73_Mortons/app_resources/test.comp.hlsl @@ -2,8 +2,7 @@ //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h -// TODO: this is obviously not right, contents of this file should be appended to this shader runtime via source modification -#include "D:/repos/Nabla/examples_tests/73_Mortons/app_resources/testCommon.hlsl" +#include "testCommon.hlsl" [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index d5e9ebb55..4eb28231e 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -47,9 +47,9 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester(100); - pplnSetupData.testCommonDataPath = "testCommon.hlsl"; - mortonTester.setupPipeline(pplnSetupData); + CTester mortonTester(1000); + pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + mortonTester.setupPipeline(pplnSetupData); mortonTester.performTestsAndVerifyResults(); } diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 907a7889d..ad7833cbe 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -16,7 +16,7 @@ class ITester struct PipelineSetupData { - std::string testCommonDataPath; + std::string testShaderPath; core::smart_refctd_ptr device; core::smart_refctd_ptr api; @@ -26,7 +26,6 @@ class ITester uint32_t computeFamilyIndex; }; - template void setupPipeline(const PipelineSetupData& pipleineSetupData) { // setting up pipeline in the constructor @@ -47,8 +46,8 @@ class ITester { asset::IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = "nbl/examples"; // virtual root - auto assetBundle = m_assetMgr->getAsset("Tester/test.comp.hlsl", lp); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); @@ -107,7 +106,7 @@ class ITester // Allocate memory of the input buffer { - const size_t BufferSize = sizeof(InputStruct) * m_testIterationCount; + const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -142,7 +141,7 @@ class ITester // Allocate memory of the output buffer { - const size_t BufferSize = sizeof(OutputStruct) * m_testIterationCount; + const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -182,7 +181,7 @@ class ITester if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) m_device->invalidateMappedMemoryRanges(1, &memoryRange); - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); + assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); m_queue = m_device->getQueue(m_queueFamily, 0); } From 158e58891d6395df2566013b3590fdfe475aae8d Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 13:09:38 +0100 Subject: [PATCH 080/219] Created utility class for converting test values to text --- 73_Mortons/CTester.h | 282 ++++++++++--------- 73_Mortons/app_resources/test.comp.hlsl | 8 +- 73_Mortons/main.cpp | 4 +- common/include/nbl/examples/Tester/ITester.h | 74 ++++- 4 files changed, 218 insertions(+), 150 deletions(-) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index 763307b53..eee0444dc 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -3,17 +3,32 @@ #include #include "app_resources/testCommon.hlsl" -#include "../common/include/nbl/examples/Tester/ITester.h" +#include "nbl/examples/Tester/ITester.h" using namespace nbl; +template +class TestValueToTextConverter> +{ + using value_t = hlsl::morton::code; +public: + std::string operator()(const value_t& value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + class CTester final : public ITester { using base_t = ITester; public: - CTester(const uint32_t testIterationCount) - : base_t(testIterationCount) {}; + /** + * @param testBatchCount one test batch is 128 tests + */ + CTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; private: InputTestValues generateInputTestValues() override @@ -244,144 +259,145 @@ class CTester final : public ITester expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); } + return {}; return expected; } - void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) override + void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); - - // Morton Plus - verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); - verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); - verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); - verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); - - verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); - - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); - - // Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); - - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); - - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); - - // Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); - - // Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); - - // Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); - - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); - - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - - // Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); - - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); - - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); - - // Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); - - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); - - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); + + //// Morton Plus + verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); + + //// Morton Minus + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); + + //// Morton coordinate-wise equality + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); + + //// Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); + + //// Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); + + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); + + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + + //// Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); + + //// Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); } }; diff --git a/73_Mortons/app_resources/test.comp.hlsl b/73_Mortons/app_resources/test.comp.hlsl index 14fe785c1..13b5a32f0 100644 --- a/73_Mortons/app_resources/test.comp.hlsl +++ b/73_Mortons/app_resources/test.comp.hlsl @@ -3,18 +3,16 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "testCommon.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; [numthreads(WORKGROUP_SIZE, 1, 1)] [shader("compute")] -void main(uint3 invocationID : SV_DispatchThreadID) +void main() { - const uint invID = invocationID.x; - if (invID >= TEST_COUNT) - return; - + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID(); TestExecutor executor; executor(inputTestValues[invID], outputTestValues[invID]); } diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index 4eb28231e..f5db65952 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -36,7 +36,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - + CTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; @@ -47,7 +47,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester(1000); + CTester mortonTester(1); // 4 * 128 = 512 tests pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; mortonTester.setupPipeline(pplnSetupData); mortonTester.performTestsAndVerifyResults(); diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index ad7833cbe..42c789a5a 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -5,6 +5,59 @@ using namespace nbl; +template +class TestValueToTextConverter +{ +public: + std::string operator()(const T& value) + { + return std::to_string(value); + } +}; + +template<> +class TestValueToTextConverter +{ +public: + std::string operator()(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +class TestValueToTextConverter +{ +public: + std::string operator()(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +class TestValueToTextConverter> +{ +public: + std::string operator()(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + TestValueToTextConverter vecComponentToTextConverter; + + output << vecComponentToTextConverter(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + template class ITester { @@ -56,9 +109,6 @@ class ITester assert(assets.size() == 1); core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - // TODO: `pipleineSetupData.testCommonDataPath` is a path to a custom user provided file containing implementation of structures needed for the shader to work, this file need to be included somehow - // to the test shader - auto overridenSource = asset::CHLSLCompiler::createOverridenCopy( source.get(), "#define WORKGROUP_SIZE %d\n#define TEST_COUNT %d\n", m_WorkgroupSize, m_testIterationCount @@ -221,7 +271,7 @@ class ITester GPU }; - virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, TestType testType) = 0; + virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; virtual InputTestValues generateInputTestValues() = 0; @@ -245,8 +295,8 @@ class ITester video::IQueue* m_queue; uint64_t m_semaphoreCounter; - ITester(const uint32_t testIterationCount) - : m_testIterationCount(testIterationCount) {}; + ITester(const uint32_t testBatchCount) + : m_testIterationCount(testBatchCount * m_WorkgroupSize) {}; void dispatchGpuTests(const core::vector& input, core::vector& output) { @@ -296,7 +346,7 @@ class ITester } template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType) { if (expectedVal == testVal) return; @@ -312,6 +362,10 @@ class ITester } ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + ss << "TEST ITERATION: " << testIteration << " SEED: " << seed << '\n'; + + TestValueToTextConverter toTextConverter; + ss << "EXPECTED VALUE: " << toTextConverter(expectedVal) << " TEST VALUE: " << toTextConverter(testVal) << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } @@ -347,13 +401,13 @@ class ITester { for (int i = 0; i < m_testIterationCount; ++i) { - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], ITester::TestType::CPU); - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], ITester::TestType::GPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, 0, ITester::TestType::CPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, 0, ITester::TestType::GPU); } } const size_t m_testIterationCount; - static constexpr size_t m_WorkgroupSize = 32u; + static constexpr size_t m_WorkgroupSize = 128u; }; #endif \ No newline at end of file From 8842299b81c2ab0a8951d042b1945372a930b863 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:13:35 +0100 Subject: [PATCH 081/219] Replaced `TestValueToTextConverter` with `nbl::system::to_string` --- 73_Mortons/CTester.h | 12 ---- 73_Mortons/main.cpp | 11 ++++ common/include/nbl/examples/Tester/ITester.h | 58 +------------------- 3 files changed, 13 insertions(+), 68 deletions(-) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index eee0444dc..5aeb722b8 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -7,18 +7,6 @@ using namespace nbl; -template -class TestValueToTextConverter> -{ - using value_t = hlsl::morton::code; -public: - std::string operator()(const value_t& value) - { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); - } -}; - class CTester final : public ITester { using base_t = ITester; diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index f5db65952..556a02900 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -12,6 +12,8 @@ #include "app_resources/common.hlsl" #include "CTester.h" +#include + using namespace nbl::core; using namespace nbl::hlsl; using namespace nbl::system; @@ -37,6 +39,15 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp if (!asset_base_t::onAppInitialized(std::move(system))) return false; + uint32_t3 a = { 1, 3, 5 }; + std::string astr = nbl::system::to_string(a); + m_logger->log(astr.c_str(), ILogger::ELL_ERROR); + + morton::code m; + m.value = 2; + astr = nbl::system::to_string(m); + m_logger->log(astr.c_str(), ILogger::ELL_ERROR); + CTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 42c789a5a..08c056565 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -2,62 +2,10 @@ #define _NBL_COMMON_I_TESTER_INCLUDED_ #include +#include using namespace nbl; -template -class TestValueToTextConverter -{ -public: - std::string operator()(const T& value) - { - return std::to_string(value); - } -}; - -template<> -class TestValueToTextConverter -{ -public: - std::string operator()(const hlsl::emulated_uint64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template<> -class TestValueToTextConverter -{ -public: - std::string operator()(const hlsl::emulated_int64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template -class TestValueToTextConverter> -{ -public: - std::string operator()(const hlsl::vector& value) - { - std::stringstream output; - output << "{ "; - for (int i = 0; i < N; ++i) - { - TestValueToTextConverter vecComponentToTextConverter; - - output << vecComponentToTextConverter(value[i]); - - if (i < N - 1) - output << ", "; - } - output << " }"; - - return output.str(); - } -}; - template class ITester { @@ -363,9 +311,7 @@ class ITester ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; ss << "TEST ITERATION: " << testIteration << " SEED: " << seed << '\n'; - - TestValueToTextConverter toTextConverter; - ss << "EXPECTED VALUE: " << toTextConverter(expectedVal) << " TEST VALUE: " << toTextConverter(testVal) << '\n'; + ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } From 8b69e0dfd179453f793664146d151a1786c31044 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:46:01 +0100 Subject: [PATCH 082/219] Modified msg for failed tests to print iteration index and seed --- 73_Mortons/CTester.h | 19 +++++--------- 73_Mortons/main.cpp | 23 ++++++++++------- common/include/nbl/examples/Tester/ITester.h | 27 +++++++++++++++++--- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index 5aeb722b8..4514ca609 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -21,26 +21,22 @@ class CTester final : public ITester private: InputTestValues generateInputTestValues() override { - std::random_device rd; - std::mt19937 mt(rd()); - - std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); // Set input thest values that will be used in both CPU and GPU tests InputTestValues testInput; - testInput.generatedA = longDistribution(mt); - testInput.generatedB = longDistribution(mt); + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); testInput.shift = generatedShift; - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); return testInput; } @@ -247,7 +243,6 @@ class CTester final : public ITester expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); } - return {}; return expected; } diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index 556a02900..03af6285b 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -33,21 +33,26 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp bool onAppInitialized(smart_refctd_ptr&& system) override { + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + + uint32_t a = intDistribution(mt); + uint32_t b = intDistribution(mt); + uint32_t c = intDistribution(mt); + + std::mt19937 mt2(rd()); + + a = intDistribution(mt2); + b = intDistribution(mt2); + c = intDistribution(mt2); + // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - uint32_t3 a = { 1, 3, 5 }; - std::string astr = nbl::system::to_string(a); - m_logger->log(astr.c_str(), ILogger::ELL_ERROR); - - morton::code m; - m.value = 2; - astr = nbl::system::to_string(m); - m_logger->log(astr.c_str(), ILogger::ELL_ERROR); - CTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 08c056565..bae891dd5 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -210,6 +210,7 @@ class ITester verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + reloadSeed(); } protected: @@ -225,6 +226,11 @@ class ITester virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; + std::mt19937& getRandomEngine() + { + return m_mersenneTwister; + } + protected: uint32_t m_queueFamily; core::smart_refctd_ptr m_device; @@ -244,7 +250,10 @@ class ITester uint64_t m_semaphoreCounter; ITester(const uint32_t testBatchCount) - : m_testIterationCount(testBatchCount * m_WorkgroupSize) {}; + : m_testIterationCount(testBatchCount * m_WorkgroupSize) + { + reloadSeed(); + }; void dispatchGpuTests(const core::vector& input, core::vector& output) { @@ -310,7 +319,7 @@ class ITester } ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; - ss << "TEST ITERATION: " << testIteration << " SEED: " << seed << '\n'; + ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); @@ -347,13 +356,23 @@ class ITester { for (int i = 0; i < m_testIterationCount; ++i) { - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, 0, ITester::TestType::CPU); - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, 0, ITester::TestType::GPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::GPU); } } + void reloadSeed() + { + std::random_device rd; + m_seed = rd(); + m_mersenneTwister = std::mt19937(m_seed); + } + const size_t m_testIterationCount; static constexpr size_t m_WorkgroupSize = 128u; + // seed will change after every call to performTestsAndVerifyResults() + std::mt19937 m_mersenneTwister; + uint32_t m_seed; }; #endif \ No newline at end of file From 44fdbe8d35a9505ac3474b708200cc7e039aae31 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 17:04:49 +0100 Subject: [PATCH 083/219] Parallelized execution of the CPU tests --- 73_Mortons/CTester.h | 6 +++--- 73_Mortons/main.cpp | 14 -------------- common/include/nbl/examples/Tester/ITester.h | 10 ++++++++-- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index 4514ca609..d44550862 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -333,9 +333,9 @@ class CTester final : public ITester verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); //// Morton left-shift verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); diff --git a/73_Mortons/main.cpp b/73_Mortons/main.cpp index 03af6285b..c11672e6c 100644 --- a/73_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -33,20 +33,6 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp bool onAppInitialized(smart_refctd_ptr&& system) override { - std::random_device rd; - std::mt19937 mt(rd()); - std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); - - uint32_t a = intDistribution(mt); - uint32_t b = intDistribution(mt); - uint32_t c = intDistribution(mt); - - std::mt19937 mt2(rd()); - - a = intDistribution(mt2); - b = intDistribution(mt2); - c = intDistribution(mt2); - // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index bae891dd5..23b4df46e 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -3,6 +3,7 @@ #include #include +#include using namespace nbl; @@ -338,8 +339,13 @@ class ITester core::vector output(m_testIterationCount); TestExecutor testExecutor; - for (int i = 0; i < m_testIterationCount; ++i) - testExecutor(inputTestValues[i], output[i]); + auto iterations = std::views::iota(0ull, m_testIterationCount); + std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), + [&](size_t i) + { + testExecutor(inputTestValues[i], output[i]); + } + ); return output; } From 92784f38d708b3577cfdff39341cd519052cfe9d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 3 Dec 2025 15:02:49 +0100 Subject: [PATCH 084/219] remove some bindings, use nbl::hlsl::bda deref_restrict on ies texture info, update compute shader with ies texel write api, ELPF_LOAD_METADATA_ONLY for assets TODO: now need to update cpps --- 50.IESViewer/AppInit.cpp | 1 + 50.IESViewer/app_resources/common.hlsl | 9 ++- 50.IESViewer/app_resources/ies.unified.hlsl | 77 +++------------------ 3 files changed, 16 insertions(+), 71 deletions(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index a4ef73e2b..2ccbc2fac 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -30,6 +30,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto start = std::chrono::high_resolution_clock::now(); size_t loaded = {}, total = out.inputList.size(); IAssetLoader::SAssetLoadParams lp = {}; + lp.loaderFlags = IAssetLoader::E_LOADER_PARAMETER_FLAGS::ELPF_LOAD_METADATA_ONLY; lp.logger = system::logger_opt_ptr(m_logger.get()); for (const auto& in : out.inputList) diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index 5bd00ca4a..ba3a28f7c 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -2,6 +2,7 @@ #define _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/ies/profile.hlsl" #define QUANT_ERROR_ADMISSIBLE 1/1024 #define WORKGROUP_SIZE 256u @@ -28,15 +29,13 @@ struct CdcPC uint64_t hAnglesBDA; uint64_t vAnglesBDA; uint64_t dataBDA; + uint64_t txtInfoBDA; uint32_t mode : 8; - uint32_t symmetry : 8; - uint32_t texIx : 16; + uint32_t texIx : 24; uint32_t hAnglesCount; uint32_t vAnglesCount; - float32_t maxIValue; float32_t zAngleDegreeRotation; - - uint32_t pad; + nbl::hlsl::ies::ProfileProperties properties; }; struct SpherePC diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index 3e35d268c..2be9e451b 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -1,9 +1,7 @@ #include "common.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/math/octahedral.hlsl" -#include "nbl/builtin/hlsl/math/polar.hlsl" -#include "nbl/builtin/hlsl/ies/sampler.hlsl" +#include "nbl/builtin/hlsl/ies/texture.hlsl" #include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" using namespace nbl::hlsl; @@ -11,14 +9,9 @@ using namespace nbl::hlsl::this_example::ies; using namespace nbl::hlsl::ext::FullScreenTriangle; [[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1, 0)]] Texture2D inSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2, 0)]] Texture2D inOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3, 0)]] Texture2D inPassTMaskImage[MAX_IES_IMAGES]; [[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(1 + 10, 0)]] RWTexture2D outSphericalCoordinatesImage[MAX_IES_IMAGES]; -[[vk::binding(2 + 10, 0)]] RWTexture2D outOUVProjectionDirectionImage[MAX_IES_IMAGES]; -[[vk::binding(3 + 10, 0)]] RWTexture2D outPassTMask[MAX_IES_IMAGES]; [[vk::binding(0 + 100, 0)]] SamplerState generalSampler; + [[vk::binding(0, 1)]] Buffer utbs[SpherePC::DescriptorCount]; [[vk::push_constant]] PushConstants pc; @@ -27,7 +20,6 @@ struct Accessor using key_t = uint32_t; using key_t2 = vector; using value_t = float32_t; - using symmetry_t = nbl::hlsl::ies::ProfileProperties::LuminairePlanesSymmetry; static key_t vAnglesCount() { return pc.cdc.vAnglesCount; } static key_t hAnglesCount() { return pc.cdc.hAnglesCount; } @@ -41,7 +33,7 @@ struct Accessor template) static inline value_t value(T ij) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.dataBDA) + vAnglesCount() * ij.x + ij.y).deref().load(); } - static inline symmetry_t symmetry() { return (symmetry_t)pc.cdc.symmetry; } + static inline nbl::hlsl::ies::ProfileProperties getProperties() { return pc.cdc.properties; } }; struct SInterpolants @@ -50,26 +42,8 @@ struct SInterpolants float32_t3 latDir : COLOR1; }; -using Octahedral = math::OctahedralTransform; -using Polar = math::Polar; -using CSampler = nbl::hlsl::ies::CandelaSampler; - -//! Checks if (x,y) /in [0,PI] x [-PI,PI] product -/* - IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) - we represent horizontal range as [-180, 180] given spherical coordinates -*/ - -bool domainPass(const float32_t2 p) -{ - NBL_CONSTEXPR float32_t M_PI = numbers::pi; - const float32_t2 lb = float32_t2(0, -M_PI); - const float32_t2 ub = float32_t2(M_PI, M_PI); - - return all(lb <= p) && all(p <= ub); -} +using octahedral_t = math::OctahedralTransform; +using texture_t = nbl::hlsl::ies::Texture; float32_t3 latLongDir(float32_t2 uv) { @@ -110,7 +84,7 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) [shader("pixel")] float32_t4 SpherePS(SInterpolants input) : SV_Target0 { - float32_t2 uv = 0.5f * Octahedral::dirToNDC(input.latDir) + 0.5f; + float32_t2 uv = 0.5f * octahedral_t::dirToNDC(input.latDir) + 0.5f; float32_t candela = inIESCandelaImage[pc.sphere.texIx].Sample(generalSampler, uv).r; float32_t v = 1.0f - exp(-candela); return float32_t4(v,v,v,1); @@ -123,36 +97,11 @@ void CdcCS(uint32_t3 ID : SV_DispatchThreadID) uint32_t2 destinationSize; outIESCandelaImage[pc.cdc.texIx].GetDimensions(destinationSize.x, destinationSize.y); const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); - - const float32_t VERTICAL_INVERSE = 1.0f / float32_t(destinationSize.x); - const float32_t HORIZONTAL_INVERSE = 1.0f / float32_t(destinationSize.y); - if (all(pixelCoordinates < destinationSize)) { - const float32_t2 uv = float32_t2((float32_t(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float32_t(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); - const float32_t3 dir = Octahedral::uvToDir(uv); - Polar polar = Polar::createFromCartesian(dir); - - const float32_t normD = length(dir); - float32_t2 mask; - - if (1.0f - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0f + QUANT_ERROR_ADMISSIBLE) - mask.x = 1.f; // pass - else - mask.x = 0.f; - - const float32_t2 sCoords = float32_t2(polar.phi, polar.theta); - if (domainPass(sCoords)) - mask.y = 1.f; // pass - else - mask.y = 0.f; - - Accessor accessor; - CSampler candelaSampler; - outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = candelaSampler.sample(accessor, polar) / pc.cdc.maxIValue; - outSphericalCoordinatesImage[pc.cdc.texIx][pixelCoordinates] = sCoords; - outOUVProjectionDirectionImage[pc.cdc.texIx][pixelCoordinates] = dir; - outPassTMask[pc.cdc.texIx][pixelCoordinates] = mask; + Accessor accessor; texture_t txt; + typename texture_t::SInfo info = (nbl::hlsl::bda::__ptr::create(pc.cdc.txtInfoBDA) + pc.cdc.texIx).deref_restrict().load(); + outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = txt.eval(accessor, info, pixelCoordinates); } } @@ -178,7 +127,7 @@ float32_t f(float32_t2 uv) dir.z ); } - return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * Octahedral::dirToNDC(dir) + 0.5f)).x; + return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * octahedral_t::dirToNDC(dir) + 0.5f)).x; } #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" @@ -203,11 +152,7 @@ float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 } case 1: return float32_t4(inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); - case 2: - return float32_t4(inSphericalCoordinatesImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); - case 3: - return float32_t4(inOUVProjectionDirectionImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xyz, 1.f); default: - return float32_t4(inPassTMaskImage[pc.cdc.texIx].Sample(generalSampler, input.uv).xy, 0.f, 1.f); + return float32_t4(0.f, 0.f, 0.f, 0.f); } } From f18160276e78f860f64c45111c874e3351b44ffb Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 3 Dec 2025 23:24:18 +0300 Subject: [PATCH 085/219] New example, copy of 61_UI, updated a lot, visualizer, still not "solid angle", rest should be shader work --- 72_SolidAngleVisualizer/CMakeLists.txt | 20 + 72_SolidAngleVisualizer/README.md | 0 .../hlsl/SolidAngleVis.frag.hlsl | 175 +++ .../app_resources/hlsl/common.hlsl | 14 + 72_SolidAngleVisualizer/config.json.template | 28 + 72_SolidAngleVisualizer/include/common.hpp | 20 + 72_SolidAngleVisualizer/include/transform.hpp | 172 +++ 72_SolidAngleVisualizer/main.cpp | 1105 +++++++++++++++++ 72_SolidAngleVisualizer/pipeline.groovy | 50 + 72_SolidAngleVisualizer/src/transform.cpp | 0 CMakeLists.txt | 1 + 11 files changed, 1585 insertions(+) create mode 100644 72_SolidAngleVisualizer/CMakeLists.txt create mode 100644 72_SolidAngleVisualizer/README.md create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl create mode 100644 72_SolidAngleVisualizer/config.json.template create mode 100644 72_SolidAngleVisualizer/include/common.hpp create mode 100644 72_SolidAngleVisualizer/include/transform.hpp create mode 100644 72_SolidAngleVisualizer/main.cpp create mode 100644 72_SolidAngleVisualizer/pipeline.groovy create mode 100644 72_SolidAngleVisualizer/src/transform.cpp diff --git a/72_SolidAngleVisualizer/CMakeLists.txt b/72_SolidAngleVisualizer/CMakeLists.txt new file mode 100644 index 000000000..5d0021f61 --- /dev/null +++ b/72_SolidAngleVisualizer/CMakeLists.txt @@ -0,0 +1,20 @@ +if(NBL_BUILD_IMGUI) + set(NBL_EXTRA_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" + ) + + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet + # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) +endif() \ No newline at end of file diff --git a/72_SolidAngleVisualizer/README.md b/72_SolidAngleVisualizer/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl new file mode 100644 index 000000000..d783a5b37 --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -0,0 +1,175 @@ +#pragma wave shader_stage(fragment) + +#include "common.hlsl" + +#include + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +[[vk::push_constant]] struct PushConstants pc; + +static const float CIRCLE_RADIUS = 0.45f; + +// --- Geometry Utils --- + +// Adjacency of edges to faces +static const int2 edgeToFaces[12] = { + {4,2}, {3,4}, {2,5}, {5,3}, + {2,0}, {0,3}, {1,2}, {3,1}, + {0,4}, {5,0}, {4,1}, {1,5} +}; + +static const float3 localNormals[6] = { + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) +}; + +static float3 corners[8]; +static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), + float3(0,0,0), float3(0,0,0), float3(0,0,0) }; +static float2 projCorners[8]; + + +// Converts UV into centered, aspect-corrected NDC circle space +float2 toCircleSpace(float2 uv) +{ + float aspect = pc.viewport.z / pc.viewport.w; + float2 centered = uv - 0.5f; + centered.x *= aspect; + return centered; +} + +// Distance to a 2D line segment +float sdSegment(float2 p, float2 a, float2 b) +{ + float2 pa = p - a; + float2 ba = b - a; + float h = clamp(dot(pa, ba) / dot(ba, ba), 0.0f, 1.0f); + return length(pa - ba * h); +} + +// TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) +float2 project(float3 p) +{ + return normalize(p).xy; +} + +void computeCubeGeo() +{ + for (int i = 0; i < 8; i++) + { + float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + + corners[i] = worldPos; + + faceCenters[i/4] += worldPos / 4.0f; + faceCenters[2+i%2] += worldPos / 4.0f; + faceCenters[4+(i/2)%2] += worldPos / 4.0f; + + float3 viewPos = worldPos; + projCorners[i] = project(viewPos); + } +} + +int getVisibilityCount(int2 faces, float3 cameraPos) +{ + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; + float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; + + // Face is visible if its outward normal points towards the origin (camera). + bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; + bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; + + // Determine Line Style: + bool isSilhouette = visible1 != visible2; // One face visible, the other hidden + bool isInner = visible1 && visible2; // Both faces visible + + int visibilityCount = 0; + if (isSilhouette) + { + visibilityCount = 1; + } + else if (isInner) + { + visibilityCount = 2; + } + + return visibilityCount; +} + +void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) +{ + if (visibilityCount > 0) + { + float3 A = corners[a]; + float3 B = corners[b]; + + float avgDepth = (length(A) + length(B)) * 0.5f; + float referenceDepth = 3.0f; + float depthScale = referenceDepth / avgDepth; + + float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; + float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; + float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red + + float width = min(baseWidth * depthScale, 0.03f); + + float dist = sdSegment(p, projCorners[a], projCorners[b]); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + color += edgeColor * alpha * intensity; + } +} + +void drawRing(float2 p, inout float4 color, float aaWidth) +{ + float positionLength = length(p); + + // Mask to cut off drawing outside the circle + // float circleMask = 1.0f - smoothstep(CIRCLE_RADIUS, CIRCLE_RADIUS + aaWidth, positionLength); + // color *= circleMask; + + // Add a white background circle ring + float ringWidth = 0.005f; + float ringDistance = abs(positionLength - CIRCLE_RADIUS); + float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + + // Ring color is now white + color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); +} + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float3 cameraPos = float3(0, 0, 0); // Camera at origin + float2 p = toCircleSpace(vx.uv); + float4 color = float4(0, 0, 0, 0); + + computeCubeGeo(); + + float aaWidth = max(fwidth(p.x), fwidth(p.y)); + + for (int j = 0; j < 12; j++) + { + int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + int b = a + (4 >> (j / 4)); + + int2 faces = edgeToFaces[j]; + int visibilityCount = getVisibilityCount(faces, cameraPos); + drawLine(p, a, b, visibilityCount, color, aaWidth); + } + + drawRing(p, color, aaWidth); + + return color; +} \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl new file mode 100644 index 000000000..80368d08f --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -0,0 +1,14 @@ +#ifndef _SOLID_ANGLE_VIS_COMMON_HLSL_ +#define _SOLID_ANGLE_VIS_COMMON_HLSL_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + + +struct PushConstants +{ + nbl::hlsl::float32_t3x4 modelMatrix; + nbl::hlsl::float32_t4 viewport; +}; + + +#endif // _SOLID_ANGLE_VIS_COMMON_HLSL_ diff --git a/72_SolidAngleVisualizer/config.json.template b/72_SolidAngleVisualizer/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/72_SolidAngleVisualizer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/common.hpp b/72_SolidAngleVisualizer/include/common.hpp new file mode 100644 index 000000000..2e8e985dd --- /dev/null +++ b/72_SolidAngleVisualizer/include/common.hpp @@ -0,0 +1,20 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +// the example's headers +#include "transform.hpp" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp new file mode 100644 index 000000000..002a9d215 --- /dev/null +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -0,0 +1,172 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + + +struct TransformRequestParams +{ + float camDistance = 8.f; + uint8_t sceneTexDescIx = ~0; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; +}; + +struct TransformReturnInfo +{ + nbl::hlsl::uint16_t2 sceneResolution = { 2048,1024 }; + bool isGizmoWindowHovered; + bool isGizmoBeingUsed; +}; + +TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ +// TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + TransformReturnInfo retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; + retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; + retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + retval.isGizmoBeingUsed = ImGuizmo::IsOver() || (ImGuizmo::IsUsing() && ImGui::IsMouseDown(ImGuiMouseButton_Left)); + + if(params.enableViewManipulate) + ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp new file mode 100644 index 000000000..b6d723e70 --- /dev/null +++ b/72_SolidAngleVisualizer/main.cpp @@ -0,0 +1,1105 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +#include "common.hpp" +#include "app_resources/hlsl/common.hlsl" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +/* +Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. + +Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations. +*/ +class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + + inline static std::string SolidAngleVisShaderPath = "app_resources/hlsl/SolidAngleVis.frag.hlsl"; +public: + inline SolidAngleVisualizer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ 2048,1024 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies + }, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch + ); + + // for the scene drawing pass + { + IGPURenderpass::SCreationParams params = {}; + const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp =*/ {IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/ {IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/ {IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/ {IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; + params.depthStencilAttachments = depthAttachments; + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + { + .format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp =*/ IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/ IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/ IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd + }; + params.colorAttachments = colorAttachments; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd + }; + subpasses[0].depthStencilAttachment = { {.render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}} }; + subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} }; + params.subpasses = subpasses; + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + params.dependencies = dependencies; + auto solidAngleRenderpassParams = params; + m_mainRenderpass = m_device->createRenderpass(std::move(params)); + if (!m_mainRenderpass) + return logFail("Failed to create Main Renderpass!"); + + m_solidAngleRenderpass = m_device->createRenderpass(std::move(solidAngleRenderpassParams)); + if (!m_solidAngleRenderpass) + return logFail("Failed to create Solid Angle Renderpass!"); + + } + + const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, { &geometries.front().get(),geometries.size() }); + // special case + { + const auto& pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto& name : m_scene->getInitParams().geometryNames) + { + if (name == "Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + // we'll only display one thing at a time + m_renderer->m_instances.resize(1); + + // Create graphics pipeline + { + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "") -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_FRAGMENT; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + core::vector defines; + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + + options.preprocessorOptions.extraDefines = defines; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(SolidAngleVisShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + const asset::SPushConstantRange ranges[] = { { + .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + } }; + + auto visualizationLayout = m_device->createPipelineLayout( + ranges, + nullptr, + nullptr, + nullptr, + nullptr + ); + m_visualizationPipeline = fsTriProtoPPln.createPipeline(fragSpec, visualizationLayout.get(), m_solidAngleRenderpass.get()); + if (!m_visualizationPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + + // Create ImGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = 0u,.bindingIx = TexturesImGUIBindingIndex }; + params.resources.samplersInfo = { .setIx = 0u,.bindingIx = 1u }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + interface.imGUI = ext::imgui::UI::create(std::move(params)); + if (!interface.imGUI) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + } + + // create rest of User Interface + { + auto* imgui = interface.imGUI.get(); + // create the suballocated descriptor set + { + // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + if (!interface.subAllocDS) + return logFail("Failed to create the descriptor set"); + // make sure Texture Atlas slot is taken for eternity + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + interface.subAllocDS->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(interface.imGUI->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({ &write,1 }, {})) + return logFail("Failed to write the descriptor set"); + } + imgui->registerListener([this]() {interface(); }); + } + + interface.camera.mapKeysToWASD(); + + onAppInitializedFinish(); + return true; + } + + // + virtual inline bool onAppTerminated() + { + SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId; + IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + interface.subAllocDS->multi_deallocate(dummy, TexturesImGUIBindingIndex, 1, &fontAtlasDescIx); + return device_base_t::onAppTerminated(); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + // CPU events + update(nextPresentationTimestamp); + + const auto& virtualWindowRes = interface.transformReturnInfo.sceneResolution; + // TODO: check main frame buffer too + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualWindowRes[1]) + recreateFramebuffer(virtualWindowRes); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + if (m_solidAngleViewFramebuffer) + { + cb->beginDebugMarker("Draw Circle View Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {virtualWindowRes[0],virtualWindowRes[1]} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + PushConstants pc{ + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = { 0.f,0.f,static_cast(virtualWindowRes[0]),static_cast(virtualWindowRes[1]) } + }; + auto pipeline = m_visualizationPipeline; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + //cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + // draw main view + if (m_mainViewFramebuffer) + { + cb->beginDebugMarker("Main Scene Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_mainViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {virtualWindowRes[0],virtualWindowRes[1]} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + const auto& camera = interface.camera; + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + auto& instance = m_renderer->m_instances[0]; + auto transposed = hlsl::transpose(interface.m_OBBModelMatrix); + memcpy(&instance.world, &transposed, sizeof(instance.world)); + instance.packedGeo = m_renderer->getGeometries().data();// +interface.gcIndex; + m_renderer->render(cb, viewParams); // draw the cube/OBB + + + // TODO: a better way to get identity matrix + float32_t3x4 origin = { + 0.2f,0.0f,0.0f,0.0f, + 0.0f,0.2f,0.0f,0.0f, + 0.0f,0.0f,0.2f,0.0f + }; + memcpy(&instance.world, &origin, sizeof(instance.world)); + instance.packedGeo = m_renderer->getGeometries().data() + 3; // sphere + m_renderer->render(cb, viewParams); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + { + cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw ImGUI + { + auto* imgui = interface.imGUI.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx + const auto* ds = interface.subAllocDS->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + // a timepoint in the future to release streaming resources for geometry + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + +private: + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + auto& camera = interface.camera; + camera.setMoveSpeed(interface.moveSpeed); + camera.setRotateSpeed(interface.rotateSpeed); + + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } uiEvents; + + // TODO: should be a member really + static std::chrono::microseconds previousEventTimestamp{}; + + // I think begin/end should always be called on camera, just events shouldn't be fed, why? + // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to + // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to + // `timeDiff` being computed since `lastVirtualUpTimeStamp` + camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (interface.move) + camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.mouse.emplace_back(e); + + //if (e.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + //{ + // interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); + // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + //} + } + }, + m_logger.get() + ); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + //if (interface.move) + camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.keyboard.emplace_back(e); + } + }, + m_logger.get() + ); + } + camera.endInputProcessing(nextPresentationTimestamp); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard + }; + + //interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + interface.imGUI->update(params); + } + + void recreateFramebuffer(const uint16_t2 resolution) + { + auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr + { + auto image = m_device->createImage({ { + .type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x,resolution.y,1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT + } }); + if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format + }; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; + + smart_refctd_ptr solidAngleView; + smart_refctd_ptr mainView; + // detect window minimization + if (resolution.x < 0x4000 && resolution.y < 0x4000) + { + solidAngleView = createImageAndView(finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(sceneRenderDepthFormat); + m_solidAngleViewFramebuffer = m_device->createFramebuffer({ { + .renderpass = m_solidAngleRenderpass, + .depthStencilAttachments = &solidAngleDepthView.get(), + .colorAttachments = &solidAngleView.get(), + .width = resolution.x, + .height = resolution.y + } }); + + mainView = createImageAndView(finalSceneRenderFormat); + auto mainDepthView = createImageAndView(sceneRenderDepthFormat); + m_mainViewFramebuffer = m_device->createFramebuffer({ { + .renderpass = m_mainRenderpass, + .depthStencilAttachments = &mainDepthView.get(), + .colorAttachments = &mainView.get(), + .width = resolution.x, + .height = resolution.y + } }); + + } + else + { + m_solidAngleViewFramebuffer = nullptr; + m_mainViewFramebuffer = nullptr; + } + + // release previous slot and its image + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + // + if (solidAngleView) + { + interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); + // update descriptor set + IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; + infos[0].desc = solidAngleView; + infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[1].desc = mainView; + infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)] + }, + { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .count = 1, + .info = &infos[1] + } + }; + m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); + } + interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; + } + + inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info) + { + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->setScissor(0, 1, &info.renderArea); + const SViewport viewport = { + .x = 0, + .y = 0, + .width = static_cast(info.renderArea.extent.width), + .height = static_cast(info.renderArea.extent.height) + }; + cb->setViewport(0u, 1u, &viewport); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; + constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB; + constexpr static inline auto TexturesImGUIBindingIndex = 0u; + // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes + constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_solidAngleRenderpass; + smart_refctd_ptr m_mainRenderpass; + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_solidAngleViewFramebuffer; + smart_refctd_ptr m_mainViewFramebuffer; + smart_refctd_ptr m_visualizationPipeline; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // UI stuff + struct CInterface + { + void cameraToHome() + { + core::vectorSIMDf cameraPosition(-3.0f, 3.0f, 6.0f); + core::vectorSIMDf cameraTarget(0.f, 0.f, 6.f); + const static core::vectorSIMDf up(0.f, 1.f, 0.f); + + camera.setPosition(cameraPosition); + camera.setTarget(cameraTarget); + camera.setBackupUpVector(up); + + camera.recomputeViewMatrix(); + } + + void operator()() + { + ImGuiIO& io = ImGui::GetIO(); + + // TODO: why is this a lambda and not just an assignment in a scope ? + camera.setProjectionMatrix([&]() + { + matrix4SIMD projection; + + if (isPerspective) + if (isLH) + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + { + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + + if (isLH) + projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + else + projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + } + + return projection; + }()); + + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Editor"); + + //if (ImGui::RadioButton("Full view", !transformParams.useWindow)) + // transformParams.useWindow = false; + + //ImGui::SameLine(); + + //if (ImGui::RadioButton("Window", transformParams.useWindow)) + // transformParams.useWindow = true; + + ImGui::Text("Camera"); + bool viewDirty = false; + + if (ImGui::RadioButton("LH", isLH)) + isLH = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("RH", !isLH)) + isLH = false; + + if (ImGui::RadioButton("Perspective", isPerspective)) + isPerspective = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("Orthographic", !isPerspective)) + isPerspective = false; + + ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); + //ImGui::Checkbox("Enable camera movement", &move); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + + // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case + + if (isPerspective) + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + else + ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); + + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + + viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); + + if (viewDirty || firstFrame) + { + cameraToHome(); + } + firstFrame = false; + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + /* + * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout + * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection + + - VIEW: + + ImGuizmo + + | X[0] Y[0] Z[0] 0.0f | + | X[1] Y[1] Z[1] 0.0f | + | X[2] Y[2] Z[2] 0.0f | + | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | + + Nabla + + | X[0] X[1] X[2] -Dot(X, eye) | + | Y[0] Y[1] Y[2] -Dot(Y, eye) | + | Z[0] Z[1] Z[2] -Dot(Z, eye) | + + = transpose(nbl::core::matrix4SIMD()) + + - PERSPECTIVE [PROJECTION CASE]: + + ImGuizmo + + | (temp / temp2) (0.0) (0.0) (0.0) | + | (0.0) (temp / temp3) (0.0) (0.0) | + | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | + | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | + + Nabla + + | w (0.0) (0.0) (0.0) | + | (0.0) -h (0.0) (0.0) | + | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | + | (0.0) (0.0) (-1.0) (0.0) | + + = transpose() + + * + * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, + * note it also modifies input view matrix but projection matrix is immutable + */ + + if (ImGui::IsKeyPressed(ImGuiKey_Home)) + { + cameraToHome(); + } + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_OBBModelMatrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 12.0f, 1.0f + }; + } + + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + // TODO: camera will return hlsl::float32_tMxN + auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); + imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); + imguizmoM16InOut.model = m_OBBModelMatrix; + + { + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + transformParams.editTransformDecomposition = true; + transformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + + // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used + move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || transformReturnInfo.isGizmoWindowHovered) && (!transformReturnInfo.isGizmoBeingUsed); + } + + // to Nabla + update camera & model matrices + // TODO: make it more nicely, extract: + // - Position by computing inverse of the view matrix and grabbing its translation + // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position + // But then set the view matrix this way anyway, because up-vector may not be compatible + //const auto& view = camera.getViewMatrix(); + //const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + m_OBBModelMatrix = imguizmoM16InOut.model; + + // object meta display + //{ + // ImGui::Begin("Object"); + // ImGui::Text("type: \"%s\"", objectName.data()); + // ImGui::End(); + //} + + // solid angle view window + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); + static bool isOpen = true; + ImGui::Begin("Solid angle view", &isOpen, 0); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); + ImGui::End(); + } + + // view matrices editor + { + ImGui::Begin("Matrices"); + + auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true) + { + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) + { + for (int y = 0; y < rows; ++y) + { + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) + { + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); + } + } + ImGui::EndTable(); + } + + if (withSeparator) + ImGui::Separator(); + }; + + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer()); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false); + + ImGui::End(); + } + + // Nabla Imgui backend MDI buffer info + // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, + // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. + { + auto* streaminingBuffer = imGUI->getStreamingBuffer(); + + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + + float freePercentage = 100.0f * (float)(freeSize) / (float)total; + float allocatedPercentage = (float)(consumedMemory) / (float)total; + + ImVec2 barSize = ImVec2(400, 30); + float windowPadding = 10.0f; + float verticalPadding = ImGui::GetStyle().FramePadding.y; + + ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); + ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); + + ImGui::Text("Total Allocated Size: %zu bytes", total); + ImGui::Text("In use: %zu bytes", consumedMemory); + ImGui::Text("Buffer Usage:"); + + ImGui::SetCursorPosX(windowPadding); + + if (freePercentage > 70.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + else if (freePercentage > 30.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + else + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + + ImGui::ProgressBar(allocatedPercentage, barSize, ""); + + ImGui::PopStyleColor(); + + ImDrawList* drawList = ImGui::GetWindowDrawList(); + + ImVec2 progressBarPos = ImGui::GetItemRectMin(); + ImVec2 progressBarSize = ImGui::GetItemRectSize(); + + const char* text = "%.2f%% free"; + char textBuffer[64]; + snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); + + ImVec2 textSize = ImGui::CalcTextSize(textBuffer); + ImVec2 textPos = ImVec2 + ( + progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f + ); + + ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + drawList->AddRectFilled + ( + ImVec2(textPos.x - 5, textPos.y - 2), + ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), + ImGui::GetColorU32(bgColor) + ); + + ImGui::SetCursorScreenPos(textPos); + ImGui::Text("%s", textBuffer); + + ImGui::Dummy(ImVec2(0.0f, verticalPadding)); + + ImGui::End(); + } + ImGui::End(); + } + + smart_refctd_ptr imGUI; + + // descriptor set + smart_refctd_ptr subAllocDS; + enum E_RENDER_VIEWS : uint8_t + { + ERV_MAIN_VIEW, + ERV_SOLID_ANGLE_VIEW, + Count + }; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = { SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value }; + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + float32_t4x4 m_OBBModelMatrix{ + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 12.0f, 1.0f + }; + + //std::string_view objectName; + TransformRequestParams transformParams; + TransformReturnInfo transformReturnInfo; + + float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 90.f / 180.f * 3.14159f; + float camXAngle = 0.f / 180.f * 3.14159f; + //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; + bool firstFrame = true; + } interface; +}; + +NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file diff --git a/72_SolidAngleVisualizer/pipeline.groovy b/72_SolidAngleVisualizer/pipeline.groovy new file mode 100644 index 000000000..7b7c9702a --- /dev/null +++ b/72_SolidAngleVisualizer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CUIBuilder extends IBuilder +{ + public CUIBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CUIBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/72_SolidAngleVisualizer/src/transform.cpp b/72_SolidAngleVisualizer/src/transform.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/CMakeLists.txt b/CMakeLists.txt index 574925e97..fddafdac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) + add_subdirectory(72_SolidAngleVisualizer) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 49b18aa4ec453fb53cf9bab6f28f83860818ff67 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 4 Dec 2025 15:31:14 +0100 Subject: [PATCH 086/219] SBufferBinding for texture info, wipe bindings not used anymore, mode as global state, createBuffer updates --- 50.IESViewer/App.hpp | 33 +++++++++++++++++- 50.IESViewer/AppEvent.cpp | 10 ++---- 50.IESViewer/AppGPU.cpp | 20 ++++++----- 50.IESViewer/AppInit.cpp | 47 ++++++++++++-------------- 50.IESViewer/AppRender.cpp | 10 +++--- 50.IESViewer/AppUI.cpp | 5 +-- 50.IESViewer/CSimpleIESRenderer.hpp | 1 - 50.IESViewer/IES.cpp | 26 ++++---------- 50.IESViewer/IES.hpp | 15 ++++---- 50.IESViewer/app_resources/common.hlsl | 2 ++ 10 files changed, 90 insertions(+), 79 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index 9a2cc3717..a1b0904cf 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -17,6 +17,18 @@ NBL_EXPOSE_NAMESPACES +template +concept AppIESByteCount = std::unsigned_integral; + +template +concept AppIESContainer = std::ranges::sized_range && + (std::same_as, float> || + std::same_as, IESTextureInfo>); +static_assert(alignof(IESTextureInfo) == 4u, "IESTextureInfo must be 4 byte aligned"); + +template +concept AppIESBufferCreationAllowed = AppIESByteCount || AppIESContainer; + class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication { using device_base_t = MonoWindowApplication; @@ -60,13 +72,32 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr descriptor; } ui; + IES::E_MODE mode = IES::EM_CDC; + void processMouse(const IMouseEventChannel::range_t& events); void processKeyboard(const IKeyboardEventChannel::range_t& events); smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, bitflag usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT, bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); - smart_refctd_ptr createBuffer(const core::vector& in, std::string name); + + template + requires AppIESBufferCreationAllowed + smart_refctd_ptr createBuffer(const T& in, std::string name, bool unmap = true) + { + const void* src = nullptr; size_t bytes = {}; + if constexpr (AppIESByteCount) + bytes = static_cast(in); + else if (AppIESContainer) + { + using element_t = std::ranges::range_value_t; + static_assert(alignof(element_t) == 4u, "IESViewer::createBuffer: AppIESContainer's \"T\" must be 4 byte aligned"); + bytes = sizeof(element_t) * in.size(); + src = static_cast(std::data(in)); + } + return implCreateBuffer(src, bytes, name, unmap); + } + smart_refctd_ptr implCreateBuffer(const void* src, size_t bytes, const std::string& name, bool unmap); void uiListener(); }; diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index 894a11d47..b02284c7f 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -37,15 +37,9 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e auto& ies = m_assets[m_activeAssetIx]; if (ev.keyCode == nbl::ui::EKC_C) - ies.mode = IES::EM_CDC; + mode = IES::EM_CDC; else if (ev.keyCode == nbl::ui::EKC_V) - ies.mode = IES::EM_IES_C; - else if (ev.keyCode == nbl::ui::EKC_S) - ies.mode = IES::EM_SPERICAL_C; - else if (ev.keyCode == nbl::ui::EKC_D) - ies.mode = IES::EM_DIRECTION; - else if (ev.keyCode == nbl::ui::EKC_M) - ies.mode = IES::EM_PASS_T_MASK; + mode = IES::EM_OCTAHEDRAL_MAP; if (ev.keyCode == nbl::ui::EKC_Q) m_running = false; diff --git a/50.IESViewer/AppGPU.cpp b/50.IESViewer/AppGPU.cpp index 1dfff8648..c9fa20e76 100644 --- a/50.IESViewer/AppGPU.cpp +++ b/50.IESViewer/AppGPU.cpp @@ -53,11 +53,11 @@ core::smart_refctd_ptr IESViewer::createImageView(const size_t wi return imageView; } -core::smart_refctd_ptr IESViewer::createBuffer(const core::vector& in, std::string name) +core::smart_refctd_ptr IESViewer::implCreateBuffer(const void* src, size_t bytes, const std::string& name, bool unmap) { IGPUBuffer::SCreationParams bufferParams = {}; - bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT /*TODO: <- double check*/;; - bufferParams.size = sizeof(float) * in.size(); + bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT; + bufferParams.size = bytes; auto buffer = m_device->createBuffer(std::move(bufferParams)); buffer->setObjectDebugName(name.c_str()); @@ -88,13 +88,15 @@ core::smart_refctd_ptr IESViewer::createBuffer(const core::vectorgetSize()); + if(src) + memcpy(mappedPointer, src, buffer->getSize()); - if (not allocation.memory->unmap()) - { - m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); - return nullptr; - } + if(unmap) + if (not allocation.memory->unmap()) + { + m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } return buffer; } \ No newline at end of file diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 2ccbc2fac..cb51bf87a 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -65,19 +65,25 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); { auto start = std::chrono::high_resolution_clock::now(); - for (auto& ies : m_assets) + + auto textureInfos = createBuffer(m_assets.size() * sizeof(IESTextureInfo), "IES Textures Info", false); + if(!textureInfos) return false; + auto* textureInfosMapped = static_cast(textureInfos->getBoundMemory().memory->getMappedPointer()); + + for (size_t i = 0u; i < m_assets.size(); ++i) { + auto& ies = m_assets[i]; const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); const auto& resolution = accessor.properties.optimalIESResolution; + textureInfosMapped[i] = CIESProfile::texture_t::createInfo(accessor, resolution, 0.f, true); + ies.buffers.textureInfo.buffer = textureInfos; + ies.buffers.textureInfo.offset = i * sizeof(IESTextureInfo); #define CREATE_VIEW(VIEW, FORMAT, NAME) \ if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; - CREATE_VIEW(ies.views.candela, asset::EF_R16_UNORM, "IES Candela Data Image: ") - CREATE_VIEW(ies.views.spherical, asset::EF_R32G32_SFLOAT, "IES Spherical Data Image: ") - CREATE_VIEW(ies.views.direction, asset::EF_R32G32B32A32_SFLOAT, "IES Direction Data Image: ") - CREATE_VIEW(ies.views.mask, asset::EF_R8G8_UNORM, "IES Mask Data Image: ") + CREATE_VIEW(ies.views.candelaOctahedralMap, asset::EF_R16_UNORM, "IES Candela Octahedral Map Image: ") #define CREATE_BUFFER(BUFFER, DATA, NAME) \ if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; @@ -148,10 +154,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } static constexpr auto bindings = std::to_array ({ - BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela - BINDING_TEXTURE(1u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(1u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // spherical - BINDING_TEXTURE(2u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(2u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // direction - BINDING_TEXTURE(3u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(3u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // mask + BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela octahedral map BINDING_SAMPLER(0u + 100u) }); @@ -245,8 +248,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), m_descriptors.data()); { - std::array, 4u + 1u> infos; -#define FILL_INFO(DESC, IX) \ + constexpr auto ViewsCount = 1u; // used to be 4u with debug maps (counted x2 for RO & RW binding but one descriptor) + std::array, ViewsCount + 1u> infos; + #define FILL_INFO(DESC, IX) \ { \ auto& info = infos[IX].emplace_back(); \ info.desc = DESC; \ @@ -257,17 +261,14 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { auto& ies = m_assets[i]; - FILL_INFO(ies.views.candela, 0u) - FILL_INFO(ies.views.spherical, 1u) - FILL_INFO(ies.views.direction, 2u) - FILL_INFO(ies.views.mask, 3u) + FILL_INFO(ies.views.candelaOctahedralMap, 0u) } - FILL_INFO(generalSampler, 4u); + FILL_INFO(generalSampler, ViewsCount); auto* samplerInfo = infos.back().data(); samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writes; - for (uint32_t i = 0; i < 4u; ++i) + std::array writes; + for (uint32_t i = 0; i < ViewsCount; ++i) { auto& write = writes[i]; write.count = m_assets.size(); @@ -277,9 +278,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) write.binding = i; } - for (uint32_t i = 4u; i < 8u; ++i) + for (uint32_t i = ViewsCount; i < ViewsCount*2u; ++i) { - auto ix = i - 4u; + auto ix = i - ViewsCount; auto& write = writes[i] = writes[ix]; write.binding = ix + 10u; } @@ -467,7 +468,6 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto* ix = addresses.data(); infos[*ix].desc = smart_refctd_ptr(imgui->getFontAtlasView()); ++ix; - for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; @@ -523,10 +523,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { auto& ies = m_assets[i]; - images.emplace_back() = ies.views.candela->getCreationParameters().image.get(); - images.emplace_back() = ies.views.spherical->getCreationParameters().image.get(); - images.emplace_back() = ies.views.direction->getCreationParameters().image.get(); - images.emplace_back() = ies.views.mask->getCreationParameters().image.get(); + images.emplace_back() = ies.views.candelaOctahedralMap->getCreationParameters().image.get(); } auto* cb = cbs.front().get(); diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 301a0ed53..cbc6a0238 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -58,13 +58,13 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi .hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(), .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), .dataBDA = ies.buffers.data->getDeviceAddress(), - .mode = ies.mode, - .symmetry = (uint32_t)accessor.symmetry(), + .txtInfoBDA = ies.buffers.textureInfo.buffer->getDeviceAddress(), + .mode = mode, .texIx = (uint32_t)m_activeAssetIx, .hAnglesCount = accessor.hAnglesCount(), .vAnglesCount = accessor.vAnglesCount(), - .maxIValue = accessor.properties.maxCandelaValue, - .zAngleDegreeRotation = ies.zDegree + .zAngleDegreeRotation = ies.zDegree, + .properties = accessor.getProperties() }; for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity @@ -78,7 +78,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } auto* const descriptor = m_descriptors[0].get(); - auto* image = ies.getActiveImage(); + auto* image = ies.getActiveImage(mode); // Compute { diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 0115da42b..59938d1c1 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -19,6 +19,7 @@ void IESViewer::uiListener() const auto name = path(ies.key).filename().string(); auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); + const auto& properties = accessor.getProperties(); const float lowerBound = accessor.hAngles.front(); const float upperBound = accessor.hAngles.back(); @@ -33,10 +34,10 @@ void IESViewer::uiListener() float x = vp->Pos.x + 8.f; float y = vp->Pos.y + 8.f; - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(ies.mode)); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(mode)); y += ImGui::GetTextLineHeightWithSpacing(); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(accessor.symmetry())); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(properties.getSymmetry())); y += ImGui::GetTextLineHeightWithSpacing(); fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index 7e75240b8..fe75c071a 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -213,7 +213,6 @@ class CSimpleIESRenderer final : public core::IReferenceCounted enum PipelineType : uint8_t { SphereTriangleStrip, - // TODO: I would also like to project onto cube in which a sphere is put Count }; diff --git a/50.IESViewer/IES.cpp b/50.IESViewer/IES.cpp index c1f67a161..92b82fec0 100644 --- a/50.IESViewer/IES.cpp +++ b/50.IESViewer/IES.cpp @@ -13,18 +13,12 @@ const asset::CIESProfile* IES::getProfile() const return nullptr; } -video::IGPUImage* IES::getActiveImage() const +video::IGPUImage* IES::getActiveImage(E_MODE mode) const { switch (mode) { - case EM_IES_C: - return views.candela->getCreationParameters().image.get(); - case EM_SPERICAL_C: - return views.spherical->getCreationParameters().image.get(); - case EM_DIRECTION: - return views.direction->getCreationParameters().image.get(); - case EM_PASS_T_MASK: - return views.mask->getCreationParameters().image.get(); + case EM_OCTAHEDRAL_MAP: + return views.candelaOctahedralMap->getCreationParameters().image.get(); case EM_CDC: default: @@ -38,16 +32,10 @@ const char* IES::modeToRS(E_MODE mode) { case IES::EM_CDC: return "Candlepower Distribution Curve"; - case IES::EM_IES_C: - return "Sample IES Candela"; - case IES::EM_SPERICAL_C: - return "Sample Spherical Coordinates"; - case IES::EM_DIRECTION: - return "Sample Direction"; - case IES::EM_PASS_T_MASK: - return "Sample Pass Mask"; - default: - return "ERROR (mode)"; + case IES::EM_OCTAHEDRAL_MAP: + return "Candela Octahedral Map"; + default: + return "ERROR (mode)"; } } diff --git a/50.IESViewer/IES.hpp b/50.IESViewer/IES.hpp index 6dd5de936..0684c53e2 100644 --- a/50.IESViewer/IES.hpp +++ b/50.IESViewer/IES.hpp @@ -13,33 +13,30 @@ struct IES { enum E_MODE : uint32_t { - EM_CDC, //! Candlepower Distribution Curve - EM_IES_C, //! IES Candela - EM_SPERICAL_C, //! Sperical coordinates - EM_DIRECTION, //! Sample direction - EM_PASS_T_MASK, //! Test mask + EM_CDC, //! Candlepower Distribution Curve + EM_OCTAHEDRAL_MAP, //! Candela Octahedral Map EM_SIZE }; struct { - smart_refctd_ptr candela = nullptr, spherical = nullptr, direction = nullptr, mask = nullptr; + smart_refctd_ptr candelaOctahedralMap = nullptr; } views; struct { - smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; + smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; // allocation per ies + SBufferBinding textureInfo; // shared allocation for all ies } buffers; SAssetBundle bundle; std::string key; float zDegree = 0.f; - E_MODE mode = EM_CDC; const asset::CIESProfile* getProfile() const; - video::IGPUImage* getActiveImage() const; + video::IGPUImage* getActiveImage(E_MODE mode) const; static const char* modeToRS(E_MODE mode); static const char* symmetryToRS(CIESProfile::properties_t::LuminairePlanesSymmetry symmetry); diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index ba3a28f7c..360485180 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -36,6 +36,8 @@ struct CdcPC uint32_t vAnglesCount; float32_t zAngleDegreeRotation; nbl::hlsl::ies::ProfileProperties properties; + + float32_t pad; }; struct SpherePC From 57bcf320167405e32c4ad54f9e37106c1cd3a428 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 4 Dec 2025 16:27:56 +0100 Subject: [PATCH 087/219] replace old lat plot with octahedral utils --- 50.IESViewer/app_resources/ies.unified.hlsl | 37 ++++++--------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index 2be9e451b..f4bbb86cf 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -45,33 +45,16 @@ struct SInterpolants using octahedral_t = math::OctahedralTransform; using texture_t = nbl::hlsl::ies::Texture; -float32_t3 latLongDir(float32_t2 uv) -{ - const float32_t phi = 6.28318530718f * uv.x; - const float32_t th = 3.14159265359f * uv.y; - const float32_t s = sin(th), c = cos(th); - return float32_t3(s * cos(phi), c, s * sin(phi)); -} - [shader("vertex")] -SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) +SInterpolants SphereVS(uint32_t vIx : SV_VertexID) { - uint32_t2 resolution; - inIESCandelaImage[pc.sphere.texIx].GetDimensions(resolution.x, resolution.y); - - const uint32_t W = resolution.x, H = resolution.y; - const uint32_t i = VertexIndex % W, j = VertexIndex / W; - - // for sphere geometry created from our grid we need to make sure the surface is closed, aligned at U/V edges - const float32_t2 uv = float32_t2( - (float32_t(i)) / float32_t(W), - (float32_t(j)) / float32_t(H) - ); - const float32_t vPos = (j == 0u) ? 0.0f : (j == H - 1u) ? 1.0f : uv.y; - const float32_t uPos = (i == W - 1u) ? 1.0f : uv.x; - const float32_t2 uvPos = float32_t2(uPos, vPos); - - const float32_t3 dir = latLongDir(uvPos); + uint32_t2 res; + inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); + + const float32_t2 inv = float32_t2(1.f, 1.f) / float32_t2(res - 1u); + const float32_t2 uv = float32_t2(vIx % res.x, vIx / res.x) * inv; + + const float32_t3 dir = octahedral_t::uvToDir(uv); const float32_t3 pos = pc.sphere.radius * dir; SInterpolants o; @@ -85,8 +68,8 @@ SInterpolants SphereVS(uint32_t VertexIndex : SV_VertexID) float32_t4 SpherePS(SInterpolants input) : SV_Target0 { float32_t2 uv = 0.5f * octahedral_t::dirToNDC(input.latDir) + 0.5f; - float32_t candela = inIESCandelaImage[pc.sphere.texIx].Sample(generalSampler, uv).r; - float32_t v = 1.0f - exp(-candela); + float32_t intensity = inIESCandelaImage[pc.sphere.texIx].Sample(generalSampler, uv).r; + float32_t v = 1.0f - exp(-intensity); return float32_t4(v,v,v,1); } From fa797e4e17eb02c203227c58f805896cd65997c4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 5 Dec 2025 14:49:51 +0100 Subject: [PATCH 088/219] add new interpolate and false color modes to sphere plot, fix runtime artifacts, sample with corner UVs --- 50.IESViewer/App.hpp | 5 +- 50.IESViewer/AppEvent.cpp | 4 +- 50.IESViewer/AppRender.cpp | 6 +- 50.IESViewer/AppUI.cpp | 108 ++++++++++++++------ 50.IESViewer/CSimpleIESRenderer.hpp | 4 +- 50.IESViewer/app_resources/common.hlsl | 10 +- 50.IESViewer/app_resources/ies.unified.hlsl | 105 +++++++++++++++---- 7 files changed, 180 insertions(+), 62 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index a1b0904cf..6d7577016 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -72,7 +72,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr descriptor; } ui; - IES::E_MODE mode = IES::EM_CDC; + struct { + IES::E_MODE view = IES::EM_CDC; + bitflag sphere = this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE; + } mode; void processMouse(const IMouseEventChannel::range_t& events); void processKeyboard(const IKeyboardEventChannel::range_t& events); diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index b02284c7f..672a77e21 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -37,9 +37,9 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e auto& ies = m_assets[m_activeAssetIx]; if (ev.keyCode == nbl::ui::EKC_C) - mode = IES::EM_CDC; + mode.view = IES::EM_CDC; else if (ev.keyCode == nbl::ui::EKC_V) - mode = IES::EM_OCTAHEDRAL_MAP; + mode.view = IES::EM_OCTAHEDRAL_MAP; if (ev.keyCode == nbl::ui::EKC_Q) m_running = false; diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index cbc6a0238..4074c7f0c 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -59,7 +59,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), .dataBDA = ies.buffers.data->getDeviceAddress(), .txtInfoBDA = ies.buffers.textureInfo.buffer->getDeviceAddress(), - .mode = mode, + .mode = mode.view, .texIx = (uint32_t)m_activeAssetIx, .hAnglesCount = accessor.hAnglesCount(), .vAnglesCount = accessor.vAnglesCount(), @@ -78,7 +78,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } auto* const descriptor = m_descriptors[0].get(); - auto* image = ies.getActiveImage(mode); + auto* image = ies.getActiveImage(mode.view); // Compute { @@ -167,7 +167,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx }); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx, .mode = mode.sphere.value }); // tear down scene every frame m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 59938d1c1..308c945d2 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -7,6 +7,8 @@ #include "app_resources/common.hlsl" #include "app_resources/imgui.opts.hlsl" +using namespace this_example; + void IESViewer::uiListener() { const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; @@ -34,7 +36,7 @@ void IESViewer::uiListener() float x = vp->Pos.x + 8.f; float y = vp->Pos.y + 8.f; - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(mode)); + fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(mode.view)); y += ImGui::GetTextLineHeightWithSpacing(); fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(properties.getSymmetry())); @@ -141,35 +143,77 @@ void IESViewer::uiListener() ies.zDegree = angle; - // 3D plot - { - info.textureID += device_base_t::MaxFramesInFlight; - - { - const ImVec2 imageCenter( - vp->Pos.x + vp->Size.x * 0.5f, - vp->Pos.y + vp->Size.y * 0.75f - ); - - ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); - - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); - - ImGuiWindowFlags imgFlags = - ImGuiWindowFlags_NoSavedSettings | - ImGuiWindowFlags_NoBringToFrontOnFocus | - ImGuiWindowFlags_NoNav | - ImGuiWindowFlags_NoScrollbar | - ImGuiWindowFlags_NoScrollWithMouse; - - if (ImGui::Begin("3D Plot", nullptr, imgFlags)) - { - ImGui::Image(info, imageSize); - } - ImGui::End(); - - ImGui::PopStyleVar(2); - } - } + // 3D plot + { + info.textureID += device_base_t::MaxFramesInFlight; + + { + const ImVec2 imageCenter( + vp->Pos.x + vp->Size.x * 0.5f, + vp->Pos.y + vp->Size.y * 0.75f + ); + + ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + + ImGuiWindowFlags imgFlags = + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse; + + if (ImGui::Begin("3D Plot", nullptr, imgFlags)) + { + ImVec2 imgPos = ImGui::GetCursorScreenPos(); + ImGui::Image(info, imageSize); + + const ImGuiStyle& style = ImGui::GetStyle(); + float frameH = ImGui::GetFrameHeight(); + float margin = 6.0f; + + ImVec2 overlayPos( + imgPos.x + margin, + imgPos.y + margin + ); + + bool interpolateCandela = + mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + + ImGui::SetCursorScreenPos(overlayPos); + if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) + { + if (interpolateCandela) + mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; + else + mode.sphere &= static_cast( + ~this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE + ); + } + + bool falseColor = + mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); + + ImVec2 overlayPos2( + overlayPos.x, + overlayPos.y + frameH + margin + ); + ImGui::SetCursorScreenPos(overlayPos2); + if (ImGui::Checkbox("false color", &falseColor)) + { + if (falseColor) + mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; + else + mode.sphere &= static_cast( + ~this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR + ); + } + } + ImGui::End(); + + ImGui::PopStyleVar(2); + } + } } \ No newline at end of file diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index fe75c071a..9af4a07ca 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -52,7 +52,8 @@ class CSimpleIESRenderer final : public core::IReferenceCounted { hlsl::float32_t radius = 1.f; IGPUDescriptorSet* ds = nullptr; - uint16_t texID; + uint16_t texID = 0u; + uint16_t mode = this_example::ies::ESM_NONE; }; // struct SPackedGeometry @@ -78,6 +79,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted .positionView = packedGeo->positionView, .normalView = packedGeo->normalView, .radius = iesParams.radius, + .mode = iesParams.mode, .texIx = iesParams.texID }; } diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index 360485180..bfb3fc007 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -40,6 +40,13 @@ struct CdcPC float32_t pad; }; +enum E_SPHERE_MODE : uint16_t +{ + ESM_NONE = 0, + ESM_OCTAHEDRAL_UV_INTERPOLATE = 1u << 0, + ESM_FALSE_COLOR = 1u << 1 +}; + struct SpherePC { NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; @@ -47,7 +54,8 @@ struct SpherePC uint32_t positionView : 16; uint32_t normalView : 16; float32_t radius; - uint16_t texIx; + uint32_t mode : 8; + uint32_t texIx : 24; }; struct PushConstants diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index f4bbb86cf..c57ca95b3 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -40,6 +40,7 @@ struct SInterpolants { float32_t4 ndc : SV_Position; float32_t3 latDir : COLOR1; + float32_t2 uv : TEXCOORD0; }; using octahedral_t = math::OctahedralTransform; @@ -51,8 +52,8 @@ SInterpolants SphereVS(uint32_t vIx : SV_VertexID) uint32_t2 res; inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); - const float32_t2 inv = float32_t2(1.f, 1.f) / float32_t2(res - 1u); - const float32_t2 uv = float32_t2(vIx % res.x, vIx / res.x) * inv; + const float32_t2 inv = float32_t2(1.f, 1.f) / float32_t2(res - 1u); + const float32_t2 uv = float32_t2(vIx % res.x, vIx / res.x) * inv; const float32_t3 dir = octahedral_t::uvToDir(uv); const float32_t3 pos = pc.sphere.radius * dir; @@ -60,17 +61,72 @@ SInterpolants SphereVS(uint32_t vIx : SV_VertexID) SInterpolants o; o.ndc = math::linalg::promoted_mul(pc.sphere.matrices.worldViewProj, pos); o.latDir = dir; + o.uv = uv; return o; } +float32_t3 falseColor(float32_t v) +{ + v = saturate(v); + v = pow(v, 0.8f); + + const float32_t3 c0 = float32_t3(0.0f, 0.0f, 0.0f); // 0.00 - black + const float32_t3 c1 = float32_t3(0.0f, 0.0f, 0.35f); // 0.15 - very dark blue + const float32_t3 c2 = float32_t3(0.10f, 0.20f, 0.90f); // 0.35 - bright blue + const float32_t3 c3 = float32_t3(0.70f, 0.00f, 0.80f); // 0.55 - violet/magenta + const float32_t3 c4 = float32_t3(1.00f, 0.30f, 1.00f); // 0.75 - bright pink + const float32_t3 c5 = float32_t3(1.00f, 1.00f, 1.00f); // 1.00 - white + + if (v < 0.15f) + { + float32_t t = v / 0.15f; + return lerp(c0, c1, t); + } + else if (v < 0.35f) + { + float32_t t = (v - 0.15f) / (0.35f - 0.15f); + return lerp(c1, c2, t); + } + else if (v < 0.55f) + { + float32_t t = (v - 0.35f) / (0.55f - 0.35f); + return lerp(c2, c3, t); + } + else if (v < 0.75f) + { + float32_t t = (v - 0.55f) / (0.75f - 0.55f); + return lerp(c3, c4, t); + } + else + { + float32_t t = (v - 0.75f) / (1.0f - 0.75f); + return lerp(c4, c5, t); + } +} + [shader("pixel")] float32_t4 SpherePS(SInterpolants input) : SV_Target0 { - float32_t2 uv = 0.5f * octahedral_t::dirToNDC(input.latDir) + 0.5f; - float32_t intensity = inIESCandelaImage[pc.sphere.texIx].Sample(generalSampler, uv).r; - float32_t v = 1.0f - exp(-intensity); - return float32_t4(v,v,v,1); + uint32_t2 res; + inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); + float32_t2 uv = input.uv; + + const bool dontInterpolateUV = (pc.sphere.mode & ESM_OCTAHEDRAL_UV_INTERPOLATE) == 0; + if (dontInterpolateUV) + { + float32_t2 pixel = floor(uv * float32_t2(res) + 0.5f); + uv = pixel / float32_t2(res); + } + + float32_t2 scale = 1.0f - 1.0f / float32_t2(res); + float32_t2 uvCorner = (uv - 0.5f) * scale + 0.5f; + + float32_t I = inIESCandelaImage[pc.sphere.texIx].SampleLevel(generalSampler, uvCorner, 0.0f).r; + const bool useFalseColor = (pc.sphere.mode & ESM_FALSE_COLOR) != 0; + float32_t3 col = useFalseColor ? falseColor(I) : float32_t3(I, I, I); + + return float32_t4(col, 1.0f); } [numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] @@ -96,21 +152,26 @@ float32_t plot(float32_t cand, float32_t pct, float32_t bold) // vertical cut of IES (i.e. cut by plane x = 0) float32_t f(float32_t2 uv) { - float32_t3 dir = normalize(float32_t3(uv.x, 0.001, uv.y)); - if (pc.cdc.zAngleDegreeRotation != 0.f) - { - float32_t rad = radians(pc.cdc.zAngleDegreeRotation); - float32_t s = sin(rad); - float32_t c = cos(rad); - - // rotate around Z axis - dir = float32_t3( - c * dir.x - s * dir.y, - s * dir.x + c * dir.y, - dir.z - ); - } - return inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, (0.5f * octahedral_t::dirToNDC(dir) + 0.5f)).x; + float32_t3 dir = normalize(float32_t3(uv.x, 0.001, uv.y)); + if (pc.cdc.zAngleDegreeRotation != 0.f) + { + float32_t rad = radians(pc.cdc.zAngleDegreeRotation); + float32_t s = sin(rad); + float32_t c = cos(rad); + + dir = float32_t3( + c * dir.x - s * dir.y, + s * dir.x + c * dir.y, + dir.z + ); + } + + uint32_t2 res; + inIESCandelaImage[pc.cdc.texIx].GetDimensions(res.x, res.y); + float32_t2 halfMinusHalfPixel = 0.5f - 0.5f / float32_t2(res); + float32_t2 uvCorner = octahedral_t::toCornerSampledUV(dir, halfMinusHalfPixel); + + return inIESCandelaImage[pc.cdc.texIx].SampleLevel(generalSampler, uvCorner, 0u).x; } #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" @@ -138,4 +199,4 @@ float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 default: return float32_t4(0.f, 0.f, 0.f, 0.f); } -} +} \ No newline at end of file From 93861bd59f85721993472e3de67f23bec6170363 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 6 Dec 2025 21:02:46 +0300 Subject: [PATCH 089/219] Make camera account for up direction, corrected framebuffer resolutions for both views, solid angle shader now outputs correct cube vertices correctly --- .../hlsl/SolidAngleVis.frag.hlsl | 157 +++++++++++------- 72_SolidAngleVisualizer/include/transform.hpp | 2 +- 72_SolidAngleVisualizer/main.cpp | 134 ++++++++------- .../include/nbl/examples/cameras/CCamera.hpp | 50 +++--- 4 files changed, 190 insertions(+), 153 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index d783a5b37..2ad766c8a 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -9,7 +9,7 @@ using namespace ext::FullScreenTriangle; [[vk::push_constant]] struct PushConstants pc; -static const float CIRCLE_RADIUS = 0.45f; +static const float CIRCLE_RADIUS = 0.75f; // --- Geometry Utils --- @@ -33,17 +33,23 @@ static float3 corners[8]; static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; static float2 projCorners[8]; +static bool cornerVisible[8]; // Converts UV into centered, aspect-corrected NDC circle space float2 toCircleSpace(float2 uv) { - float aspect = pc.viewport.z / pc.viewport.w; - float2 centered = uv - 0.5f; - centered.x *= aspect; - return centered; + // Map [0,1] UV to [-1,1] + float2 p = uv * 2.0f - 1.0f; + + // Correct aspect ratio + float aspect = pc.viewport.z / pc.viewport.w; // width / height + p.x *= aspect; + + return p; } + // Distance to a 2D line segment float sdSegment(float2 p, float2 a, float2 b) { @@ -54,9 +60,18 @@ float sdSegment(float2 p, float2 a, float2 b) } // TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) -float2 project(float3 p) +bool projectToOrthoSphere(float3 p, out float2 uv) { - return normalize(p).xy; + float3 n = normalize(p); // direction to sphere + + // hemisphere (Z > 0) + if (n.z <= 0.0) + return false; + + // orthographic projection (drop Z) + uv = n.xy; + + return true; // valid } void computeCubeGeo() @@ -66,71 +81,72 @@ void computeCubeGeo() float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - corners[i] = worldPos; + corners[i] = worldPos.xyz; faceCenters[i/4] += worldPos / 4.0f; faceCenters[2+i%2] += worldPos / 4.0f; faceCenters[4+(i/2)%2] += worldPos / 4.0f; - float3 viewPos = worldPos; - projCorners[i] = project(viewPos); + float3 viewPos = worldPos.xyz; + cornerVisible[i] = projectToOrthoSphere(viewPos, projCorners[i]); + projCorners[i] *= CIRCLE_RADIUS; // scale to circle radius } } -int getVisibilityCount(int2 faces, float3 cameraPos) -{ - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); - float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); +// int getVisibilityCount(int2 faces, float3 cameraPos) +// { +// float3x3 rotMatrix = (float3x3)pc.modelMatrix; +// float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); +// float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; - float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; +// float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; +// float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; - // Face is visible if its outward normal points towards the origin (camera). - bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; - bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; +// // Face is visible if its outward normal points towards the origin (camera). +// bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; +// bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; - // Determine Line Style: - bool isSilhouette = visible1 != visible2; // One face visible, the other hidden - bool isInner = visible1 && visible2; // Both faces visible +// // Determine Line Style: +// bool isSilhouette = visible1 != visible2; // One face visible, the other hidden +// bool isInner = visible1 && visible2; // Both faces visible - int visibilityCount = 0; - if (isSilhouette) - { - visibilityCount = 1; - } - else if (isInner) - { - visibilityCount = 2; - } - - return visibilityCount; -} - -void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) -{ - if (visibilityCount > 0) - { - float3 A = corners[a]; - float3 B = corners[b]; - - float avgDepth = (length(A) + length(B)) * 0.5f; - float referenceDepth = 3.0f; - float depthScale = referenceDepth / avgDepth; - - float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; - float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; - float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red +// int visibilityCount = 0; +// if (isSilhouette) +// { +// visibilityCount = 1; +// } +// else if (isInner) +// { +// visibilityCount = 2; +// } + +// return visibilityCount; +// } + +// void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) +// { +// if (visibilityCount > 0) +// { +// float3 A = corners[a]; +// float3 B = corners[b]; + +// float avgDepth = (length(A) + length(B)) * 0.5f; +// float referenceDepth = 3.0f; +// float depthScale = referenceDepth / avgDepth; + +// float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; +// float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; +// float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red - float width = min(baseWidth * depthScale, 0.03f); +// float width = min(baseWidth * depthScale, 0.03f); - float dist = sdSegment(p, projCorners[a], projCorners[b]); +// float dist = sdSegment(p, projCorners[a], projCorners[b]); - float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); +// float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - color += edgeColor * alpha * intensity; - } -} +// color += edgeColor * alpha * intensity; +// } +// } void drawRing(float2 p, inout float4 color, float aaWidth) { @@ -149,6 +165,12 @@ void drawRing(float2 p, inout float4 color, float aaWidth) color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); } +float plotPoint(float2 uv, float2 p, float r) +{ + return step(length(uv - p), r); +} + + [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { float3 cameraPos = float3(0, 0, 0); // Camera at origin @@ -159,16 +181,25 @@ void drawRing(float2 p, inout float4 color, float aaWidth) float aaWidth = max(fwidth(p.x), fwidth(p.y)); - for (int j = 0; j < 12; j++) + float pointMask = 0.0; + for (int i=0; i<8; i++) { - int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - int b = a + (4 >> (j / 4)); - - int2 faces = edgeToFaces[j]; - int visibilityCount = getVisibilityCount(faces, cameraPos); - drawLine(p, a, b, visibilityCount, color, aaWidth); + if (cornerVisible[i]) + pointMask += plotPoint(p, projCorners[i], 0.015f); } + color += pointMask * float4(1,0,0,1); // red points + + // for (int j = 0; j < 12; j++) + // { + // int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + // int b = a + (4 >> (j / 4)); + + // // int2 faces = edgeToFaces[j]; + // // int visibilityCount = getVisibilityCount(faces, cameraPos); + // // drawLine(p, a, b, visibilityCount, color, aaWidth); + // } + drawRing(p, color, aaWidth); return color; diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 002a9d215..5061ebd49 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -19,7 +19,7 @@ struct TransformRequestParams struct TransformReturnInfo { - nbl::hlsl::uint16_t2 sceneResolution = { 2048,1024 }; + nbl::hlsl::uint16_t2 sceneResolution = { 0, 0 }; bool isGizmoWindowHovered; bool isGizmoBeingUsed; }; diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index b6d723e70..1025eb067 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -5,7 +5,6 @@ #include "common.hpp" #include "app_resources/hlsl/common.hlsl" - #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" /* @@ -319,10 +318,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // CPU events update(nextPresentationTimestamp); - const auto& virtualWindowRes = interface.transformReturnInfo.sceneResolution; - // TODO: check main frame buffer too - if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualWindowRes[1]) - recreateFramebuffer(virtualWindowRes); + { + const auto& virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const auto& virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || + !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) + recreateFramebuffer(); + } // const auto resourceIx = m_realFrameIx % MaxFramesInFlight; @@ -334,6 +336,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; if (m_solidAngleViewFramebuffer) { + auto creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); cb->beginDebugMarker("Draw Circle View Frame"); { const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; @@ -344,7 +347,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .depthStencilClearValues = &farValue, .renderArea = { .offset = {0,0}, - .extent = {virtualWindowRes[0],virtualWindowRes[1]} + .extent = {creationParams.width, creationParams.height} } }; beginRenderpass(cb, renderpassInfo); @@ -353,7 +356,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { PushConstants pc{ .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(virtualWindowRes[0]),static_cast(virtualWindowRes[1]) } + .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) } }; auto pipeline = m_visualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); @@ -369,6 +372,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { cb->beginDebugMarker("Main Scene Frame"); { + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { @@ -377,7 +381,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .depthStencilClearValues = &farValue, .renderArea = { .offset = {0,0}, - .extent = {virtualWindowRes[0],virtualWindowRes[1]} + .extent = {creationParams.width, creationParams.height} } }; beginRenderpass(cb, renderpassInfo); @@ -404,12 +408,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: a better way to get identity matrix float32_t3x4 origin = { - 0.2f,0.0f,0.0f,0.0f, - 0.0f,0.2f,0.0f,0.0f, - 0.0f,0.0f,0.2f,0.0f + 1.0f,0.0f,0.0f,0.0f, + 0.0f,1.0f,0.0f,0.0f, + 0.0f,0.0f,1.0f,0.0f }; memcpy(&instance.world, &origin, sizeof(instance.world)); - instance.packedGeo = m_renderer->getGeometries().data() + 3; // sphere + instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk m_renderer->render(cb, viewParams); } cb->endRenderPass(); @@ -575,7 +579,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { - //if (interface.move) + if (interface.move) camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl for (const auto& e : events) // here capture @@ -606,9 +610,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR interface.imGUI->update(params); } - void recreateFramebuffer(const uint16_t2 resolution) + void recreateFramebuffer() { - auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr + + auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format)->smart_refctd_ptr { auto image = m_device->createImage({ { .type = IGPUImage::ET_2D, @@ -632,29 +637,32 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr solidAngleView; smart_refctd_ptr mainView; + const uint16_t2 solidAngleViewRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const uint16_t2 mainViewRes = interface.mainViewTransformReturnInfo.sceneResolution; + // detect window minimization - if (resolution.x < 0x4000 && resolution.y < 0x4000) + if (solidAngleViewRes.x < 0x4000 && solidAngleViewRes.y < 0x4000 || + mainViewRes.x < 0x4000 && mainViewRes.y < 0x4000) { - solidAngleView = createImageAndView(finalSceneRenderFormat); - auto solidAngleDepthView = createImageAndView(sceneRenderDepthFormat); + solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); m_solidAngleViewFramebuffer = m_device->createFramebuffer({ { .renderpass = m_solidAngleRenderpass, .depthStencilAttachments = &solidAngleDepthView.get(), .colorAttachments = &solidAngleView.get(), - .width = resolution.x, - .height = resolution.y + .width = solidAngleViewRes.x, + .height = solidAngleViewRes.y } }); - mainView = createImageAndView(finalSceneRenderFormat); - auto mainDepthView = createImageAndView(sceneRenderDepthFormat); + mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); + auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); m_mainViewFramebuffer = m_device->createFramebuffer({ { .renderpass = m_mainRenderpass, .depthStencilAttachments = &mainDepthView.get(), .colorAttachments = &mainView.get(), - .width = resolution.x, - .height = resolution.y + .width = mainViewRes.x, + .height = mainViewRes.y } }); - } else { @@ -715,6 +723,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + constexpr static inline float32_t4x4 OBBModelMatrixDefault + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 6.0f, 1.0f + }; // smart_refctd_ptr m_scene; smart_refctd_ptr m_solidAngleRenderpass; @@ -722,7 +737,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_renderer; smart_refctd_ptr m_solidAngleViewFramebuffer; smart_refctd_ptr m_mainViewFramebuffer; - smart_refctd_ptr m_visualizationPipeline; + smart_refctd_ptr m_visualizationPipeline; // smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; @@ -733,19 +748,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // UI stuff struct CInterface { - void cameraToHome() - { - core::vectorSIMDf cameraPosition(-3.0f, 3.0f, 6.0f); - core::vectorSIMDf cameraTarget(0.f, 0.f, 6.f); - const static core::vectorSIMDf up(0.f, 1.f, 0.f); - - camera.setPosition(cameraPosition); - camera.setTarget(cameraTarget); - camera.setBackupUpVector(up); - - camera.recomputeViewMatrix(); - } - void operator()() { ImGuiIO& io = ImGui::GetIO(); @@ -773,7 +775,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return projection; }()); - ImGuizmo::SetOrthographic(false); + ImGuizmo::SetOrthographic(!isPerspective); ImGuizmo::BeginFrame(); ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); @@ -830,7 +832,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (viewDirty || firstFrame) { - cameraToHome(); + camera.setPosition(cameraIntialPosition); + camera.setTarget(cameraInitialTarget); + camera.setBackupUpVector(cameraInitialUp); + camera.setUpVector(cameraInitialUp); + + camera.recomputeViewMatrix(); } firstFrame = false; @@ -895,19 +902,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR * note it also modifies input view matrix but projection matrix is immutable */ - if (ImGui::IsKeyPressed(ImGuiKey_Home)) - { - cameraToHome(); - } + // No need because camera already has this functionality + // if (ImGui::IsKeyPressed(ImGuiKey_Home)) + // { + // cameraToHome(); + // } if (ImGui::IsKeyPressed(ImGuiKey_End)) { - m_OBBModelMatrix = { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 12.0f, 1.0f - }; + m_OBBModelMatrix = OBBModelMatrixDefault; } static struct @@ -930,10 +933,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ transformParams.editTransformDecomposition = true; - transformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + // MODEL: Zup -> Yup + + m_OBBModelMatrix = imguizmoM16InOut.model; // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used - move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || transformReturnInfo.isGizmoWindowHovered) && (!transformReturnInfo.isGizmoBeingUsed); + move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); + } // to Nabla + update camera & model matrices @@ -957,9 +964,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); static bool isOpen = true; - ImGui::Begin("Solid angle view", &isOpen, 0); + ImGui::Begin("Projected Solid Angle View", &isOpen, 0); ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); + solidAngleViewTransformReturnInfo.isGizmoBeingUsed = false; // not used in this view + solidAngleViewTransformReturnInfo.isGizmoWindowHovered = false; // not used in this view ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); ImGui::End(); } @@ -1081,21 +1091,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // mutables - float32_t4x4 m_OBBModelMatrix{ - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 12.0f, 1.0f - }; + float32_t4x4 m_OBBModelMatrix = OBBModelMatrixDefault; //std::string_view objectName; TransformRequestParams transformParams; - TransformReturnInfo transformReturnInfo; + TransformReturnInfo mainViewTransformReturnInfo; + TransformReturnInfo solidAngleViewTransformReturnInfo; + + const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; + const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; + const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; float viewWidth = 10.f; - float camYAngle = 90.f / 180.f * 3.14159f; - float camXAngle = 0.f / 180.f * 3.14159f; //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; bool firstFrame = true; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 3b3cd38d8..f35cd341a 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -149,38 +149,36 @@ class Camera if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { nbl::core::vectorSIMDf pos = getPosition(); - nbl::core::vectorSIMDf localTarget = getTarget() - pos; - - // Get Relative Rotation for localTarget in Radians - float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.X, localTarget.Z); - const double z1 = nbl::core::sqrt(localTarget.X*localTarget.X + localTarget.Z*localTarget.Z); - relativeRotationX = atan2(z1, localTarget.Y) - nbl::core::PI()/2; - - constexpr float RotateSpeedScale = 0.003f; - relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; - float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; + nbl::core::vectorSIMDf upVector = getUpVector(); + nbl::core::vectorSIMDf forward = nbl::core::normalize(getTarget() - pos); + + nbl::core::vectorSIMDf right = nbl::core::normalize(nbl::core::cross(forward, upVector)); + nbl::core::vectorSIMDf up = nbl::core::normalize(nbl::core::cross(right, forward)); + + constexpr float RotateSpeedScale = 0.003f; + float pitchDelta = ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; + float yawDelta = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; if (leftHanded) - relativeRotationY -= tmpYRot; - else - relativeRotationY += tmpYRot; + yawDelta = -yawDelta; - const double MaxVerticalAngle = nbl::core::radians(88.0f); + // Clamp pitch BEFORE applying rotation + const float MaxVerticalAngle = nbl::core::radians(88.0f); + float currentPitch = asin(nbl::core::dot(forward, upVector).X); + float newPitch = nbl::core::clamp(currentPitch + pitchDelta, -MaxVerticalAngle, MaxVerticalAngle); + pitchDelta = newPitch - currentPitch; - if (relativeRotationX > MaxVerticalAngle*2 && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = 2 * nbl::core::PI()-MaxVerticalAngle; - else - if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = MaxVerticalAngle; + // Create rotation quaternions using axis-angle method + nbl::core::quaternion pitchRot = nbl::core::quaternion::fromAngleAxis(pitchDelta, right); + nbl::core::quaternion yawRot = nbl::core::quaternion::fromAngleAxis(yawDelta, upVector); + nbl::core::quaternion combinedRot = yawRot * pitchRot; - localTarget.set(0,0, nbl::core::max(1.f, nbl::core::length(pos)[0]), 1.f); + // Apply to forward vector + forward = nbl::core::normalize(combinedRot.transformVect(forward)); - nbl::core::matrix3x4SIMD mat; - mat.setRotation(nbl::core::quaternion(relativeRotationX, relativeRotationY, 0)); - mat.transformVect(localTarget); - - setTarget(localTarget + pos); + // Set new target + float targetDistance = nbl::core::length(getTarget() - pos).X; + setTarget(pos + forward * targetDistance); } } } From adb15edd201e82cbc9ed3526bbfccfc67ccdf4ff Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:12:56 +0300 Subject: [PATCH 090/219] sphere arc "cube edge" in solid angle view, more reliable resizing of windows --- .../hlsl/SolidAngleVis.frag.hlsl | 218 ++++++++---------- 72_SolidAngleVisualizer/main.cpp | 24 +- 2 files changed, 107 insertions(+), 135 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 2ad766c8a..badf1e4be 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -32,8 +32,7 @@ static const float3 localNormals[6] = { static float3 corners[8]; static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; -static float2 projCorners[8]; -static bool cornerVisible[8]; + // Converts UV into centered, aspect-corrected NDC circle space @@ -46,32 +45,7 @@ float2 toCircleSpace(float2 uv) float aspect = pc.viewport.z / pc.viewport.w; // width / height p.x *= aspect; - return p; -} - - -// Distance to a 2D line segment -float sdSegment(float2 p, float2 a, float2 b) -{ - float2 pa = p - a; - float2 ba = b - a; - float h = clamp(dot(pa, ba) / dot(ba, ba), 0.0f, 1.0f); - return length(pa - ba * h); -} - -// TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) -bool projectToOrthoSphere(float3 p, out float2 uv) -{ - float3 n = normalize(p); // direction to sphere - - // hemisphere (Z > 0) - if (n.z <= 0.0) - return false; - - // orthographic projection (drop Z) - uv = n.xy; - - return true; // valid + return p * CIRCLE_RADIUS; } void computeCubeGeo() @@ -86,121 +60,121 @@ void computeCubeGeo() faceCenters[i/4] += worldPos / 4.0f; faceCenters[2+i%2] += worldPos / 4.0f; faceCenters[4+(i/2)%2] += worldPos / 4.0f; - - float3 viewPos = worldPos.xyz; - cornerVisible[i] = projectToOrthoSphere(viewPos, projCorners[i]); - projCorners[i] *= CIRCLE_RADIUS; // scale to circle radius } } -// int getVisibilityCount(int2 faces, float3 cameraPos) -// { -// float3x3 rotMatrix = (float3x3)pc.modelMatrix; -// float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); -// float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - -// float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; -// float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; - -// // Face is visible if its outward normal points towards the origin (camera). -// bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; -// bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; - -// // Determine Line Style: -// bool isSilhouette = visible1 != visible2; // One face visible, the other hidden -// bool isInner = visible1 && visible2; // Both faces visible - -// int visibilityCount = 0; -// if (isSilhouette) -// { -// visibilityCount = 1; -// } -// else if (isInner) -// { -// visibilityCount = 2; -// } - -// return visibilityCount; -// } - -// void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) -// { -// if (visibilityCount > 0) -// { -// float3 A = corners[a]; -// float3 B = corners[b]; - -// float avgDepth = (length(A) + length(B)) * 0.5f; -// float referenceDepth = 3.0f; -// float depthScale = referenceDepth / avgDepth; - -// float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; -// float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; -// float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red - -// float width = min(baseWidth * depthScale, 0.03f); - -// float dist = sdSegment(p, projCorners[a], projCorners[b]); - -// float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - -// color += edgeColor * alpha * intensity; -// } -// } - -void drawRing(float2 p, inout float4 color, float aaWidth) +float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); - - // Mask to cut off drawing outside the circle - // float circleMask = 1.0f - smoothstep(CIRCLE_RADIUS, CIRCLE_RADIUS + aaWidth, positionLength); - // color *= circleMask; // Add a white background circle ring - float ringWidth = 0.005f; + float ringWidth = 0.01f; float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - // Ring color is now white - color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); + return ringAlpha.xxxx; } -float plotPoint(float2 uv, float2 p, float r) +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float3 faceCenter, float3 faceNormal) { - return step(length(uv - p), r); + // Face is visible if normal points toward camera (at origin) + float3 viewVec = -normalize(faceCenter); // Vector from face to camera + return dot(faceNormal, viewVec) > 0.0f; } +int getEdgeVisibility(int edgeIdx, float3 cameraPos) +{ + int2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) return 2; + + // Hidden edge: both faces hidden + return 0; +} + +// Draw great circle arc in fragment shader +float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) +{ + if (visibility == 0) return float4(0,0,0,0); // Hidden edge + + float3 v0 = normalize(corners[edgeVerts.x]); + float3 v1 = normalize(corners[edgeVerts.y]); + float3 p = normalize(fragPos); // Current point on hemisphere + + // Great circle plane normal + float3 arcNormal = normalize(cross(v0, v1)); + + // Distance to great circle + float dist = abs(dot(p, arcNormal)); + + // Check if point is within arc bounds + float dotMid = dot(v0, v1); + bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); + + if (!onArc) return float4(0,0,0,0); + + // Depth-based width scaling + float avgDepth = (length(corners[edgeVerts.x]) + length(corners[edgeVerts.y])) * 0.5f; + float depthScale = 3.0f / avgDepth; + + float baseWidth = (visibility == 1) ? 0.01f : 0.005f; + float width = min(baseWidth * depthScale, 0.02f); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + float4 edgeColor = (visibility == 1) ? + float4(0.0f, 0.5f, 1.0f, 1.0f) : // Silhouette: blue + float4(1.0f, 0.0f, 0.0f, 1.0f); // Inner: red + + float intensity = (visibility == 1) ? 1.0f : 0.5f; + return edgeColor * alpha * intensity; +} [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float3 cameraPos = float3(0, 0, 0); // Camera at origin - float2 p = toCircleSpace(vx.uv); + float3 cameraPos = float3(0, 0, 0); float4 color = float4(0, 0, 0, 0); - - computeCubeGeo(); + float2 p = toCircleSpace(vx.uv); - float aaWidth = max(fwidth(p.x), fwidth(p.y)); - - float pointMask = 0.0; - for (int i=0; i<8; i++) + // Convert 2D disk position to 3D hemisphere position + // p is in range [-CIRCLE_RADIUS, CIRCLE_RADIUS] + float2 normalized = p / CIRCLE_RADIUS; // Now in range [-1, 1] + float r2 = dot(normalized, normalized); + + if (r2 > 1.0f) + discard; + + // Convert UV to 3D position on hemisphere + float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); + + computeCubeGeo(); // Your existing function + + float aaWidth = length(float2(ddx(p.x), ddy(p.y))); + + // Draw edges as great circle arcs + for (int j = 0; j < 12; j++) { - if (cornerVisible[i]) - pointMask += plotPoint(p, projCorners[i], 0.015f); + int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + int b = a + (4 >> (j / 4)); + + int visibility = getEdgeVisibility(j, cameraPos); + color += drawGreatCircleArc(spherePos, int2(a, b), visibility, aaWidth); } - - color += pointMask * float4(1,0,0,1); // red points - - // for (int j = 0; j < 12; j++) - // { - // int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - // int b = a + (4 >> (j / 4)); - - // // int2 faces = edgeToFaces[j]; - // // int visibilityCount = getVisibilityCount(faces, cameraPos); - // // drawLine(p, a, b, visibilityCount, color, aaWidth); - // } - - drawRing(p, color, aaWidth); - + + color += drawRing(p, aaWidth); + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 1025eb067..8fb8bf144 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -323,7 +323,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const auto& virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) - recreateFramebuffer(); + recreateFramebuffers(); } // @@ -402,10 +402,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto& instance = m_renderer->m_instances[0]; auto transposed = hlsl::transpose(interface.m_OBBModelMatrix); memcpy(&instance.world, &transposed, sizeof(instance.world)); - instance.packedGeo = m_renderer->getGeometries().data();// +interface.gcIndex; + instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB - // TODO: a better way to get identity matrix float32_t3x4 origin = { 1.0f,0.0f,0.0f,0.0f, @@ -536,7 +535,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR camera.setMoveSpeed(interface.moveSpeed); camera.setRotateSpeed(interface.rotateSpeed); - m_inputSystem->getDefaultMouse(&mouse); m_inputSystem->getDefaultKeyboard(&keyboard); @@ -610,7 +608,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR interface.imGUI->update(params); } - void recreateFramebuffer() + void recreateFramebuffers() { auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format)->smart_refctd_ptr @@ -671,30 +669,30 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // release previous slot and its image - interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1 }); // - if (solidAngleView) + if (solidAngleView && mainView) { interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); // update descriptor set IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; - infos[0].desc = solidAngleView; + infos[0].desc = mainView; infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; - infos[1].desc = mainView; + infos[1].desc = solidAngleView; infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { {.dstSet = interface.subAllocDS->getDescriptorSet(), .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], .count = 1, .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)] }, { .dstSet = interface.subAllocDS->getDescriptorSet(), .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], .count = 1, - .info = &infos[1] + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)] } }; m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); @@ -728,7 +726,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 6.0f, 1.0f + 0.0f, 0.0f, 3.0f, 1.0f }; // smart_refctd_ptr m_scene; From 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:29:22 +0300 Subject: [PATCH 091/219] Scaling by pressing G to prevent conflict with WASD camera movement, also added Q and E for moving up and down --- 72_SolidAngleVisualizer/include/transform.hpp | 4 +++- common/include/nbl/examples/cameras/CCamera.hpp | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 5061ebd49..639c0fa3a 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -35,13 +35,15 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti static bool boundSizing = false; static bool boundSizingSnap = false; + ImGui::Text("Press T/R/G to change gizmo mode"); + if (params.editTransformDecomposition) { if (ImGui::IsKeyPressed(ImGuiKey_T)) mCurrentGizmoOperation = ImGuizmo::TRANSLATE; if (ImGui::IsKeyPressed(ImGuiKey_R)) mCurrentGizmoOperation = ImGuizmo::ROTATE; - if (ImGui::IsKeyPressed(ImGuiKey_S)) + if (ImGui::IsKeyPressed(ImGuiKey_G)) mCurrentGizmoOperation = ImGuizmo::SCALE; if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) mCurrentGizmoOperation = ImGuizmo::TRANSLATE; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index f35cd341a..e5f077e46 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -39,6 +39,8 @@ class Camera enum E_CAMERA_MOVE_KEYS : uint8_t { ECMK_MOVE_FORWARD = 0, + ECMK_MOVE_UP, + ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT, @@ -47,6 +49,8 @@ class Camera inline void mapKeysToWASD() { + keysMap[ECMK_MOVE_UP] = nbl::ui::EKC_E; + keysMap[ECMK_MOVE_DOWN] = nbl::ui::EKC_Q; keysMap[ECMK_MOVE_FORWARD] = nbl::ui::EKC_W; keysMap[ECMK_MOVE_BACKWARD] = nbl::ui::EKC_S; keysMap[ECMK_MOVE_LEFT] = nbl::ui::EKC_A; @@ -211,7 +215,7 @@ class Camera assert(timeDiff >= 0); // handle camera movement - for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) + for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_UP, ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) { const auto code = keysMap[logicalKey]; @@ -275,6 +279,9 @@ class Camera up = nbl::core::normalize(backupUpVector); } + pos += up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; + pos -= up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + nbl::core::vectorSIMDf strafevect = localTarget; if (leftHanded) strafevect = nbl::core::cross(strafevect, up); From 4290f4ab26360fbf8dac4c45c395fc4a20faf6e3 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 16:33:09 +0300 Subject: [PATCH 092/219] better clipping of arcs behind the hemisphere --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index badf1e4be..c12c007a0 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -114,6 +114,10 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float float3 v1 = normalize(corners[edgeVerts.y]); float3 p = normalize(fragPos); // Current point on hemisphere + // Skip fragment if not in front of hemisphere or edge if both endpoints are behind horizon + if (p.z < 0.0f || (v0.z < 0.0f && v1.z < 0.0f)) + return float4(0,0,0,0); + // Great circle plane normal float3 arcNormal = normalize(cross(v0, v1)); From ba068c44c08a777bb6794b3e0f019cbdc3605480 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Mon, 8 Dec 2025 08:47:02 +0300 Subject: [PATCH 093/219] WIP quick push for shader code --- .../hlsl/SolidAngleVis.frag.hlsl | 154 +++++++++++++++--- 1 file changed, 135 insertions(+), 19 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index c12c007a0..7c96a8316 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -20,6 +20,25 @@ static const int2 edgeToFaces[12] = { {0,4}, {5,0}, {4,1}, {1,5} }; +//float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f +static const float3 constCorners[8] = { + float3(-1, -1, -1), // 0 + float3( 1, -1, -1), // 1 + float3(-1, 1, -1), // 2 + float3( 1, 1, -1), // 3 + float3(-1, -1, 1), // 4 + float3( 1, -1, 1), // 5 + float3(-1, 1, 1), // 6 + float3( 1, 1, 1) // 7 +}; + +// All 12 edges of the cube (vertex index pairs) +static const int2 allEdges[12] = { + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // Edges along X axis + {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Edges along Y axis + {0, 4}, {1, 5}, {2, 6}, {3, 7} // Edges along Z axis +}; + static const float3 localNormals[6] = { float3(0, 0, -1), // Face 0 (Z-) float3(0, 0, 1), // Face 1 (Z+) @@ -34,6 +53,30 @@ static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; +static const float3 colorLUT[8] = { + float3(0, 0, 0), // 0: Black + float3(1, 0, 0), // 1: Red + float3(0, 1, 0), // 2: Green + float3(1, 1, 0), // 3: Yellow + float3(0, 0, 1), // 4: Blue + float3(1, 0, 1), // 5: Magenta + float3(0, 1, 1), // 6: Cyan + float3(1, 1, 1) // 7: White +}; + + + +// Vertices are ordered CCW relative to the camera view. +static const int silhouettes[8][6] = { + {2, 3, 1, 5, 4, 6}, // 0: Black + {6, 7, 5, 1, 0, 2}, // 1: Red + {7, 6, 4, 0, 1, 3}, // 2: Green + {3, 7, 5, 4, 0, 2}, // 3: Yellow + {3, 2, 0, 4, 5, 7}, // 4: Cyan + {1, 3, 7, 6, 4, 0}, // 5: Magenta + {0, 1, 5, 7, 6, 2}, // 6: White + {4, 6, 2, 3, 1, 5} // 7: Gray +}; // Converts UV into centered, aspect-corrected NDC circle space float2 toCircleSpace(float2 uv) @@ -52,7 +95,7 @@ void computeCubeGeo() { for (int i = 0; i < 8; i++) { - float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; + float3 localPos = constCorners[i]; //float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; corners[i] = worldPos.xyz; @@ -72,7 +115,7 @@ float4 drawRing(float2 p, float aaWidth) float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - return ringAlpha.xxxx; + return ringAlpha * float4(1, 1, 1, 1); } // Check if a face on the hemisphere is visible from camera at origin @@ -105,7 +148,7 @@ int getEdgeVisibility(int edgeIdx, float3 cameraPos) return 0; } -// Draw great circle arc in fragment shader +// Draw great circle arc in fragment shader with horizon clipping float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) { if (visibility == 0) return float4(0,0,0,0); // Hidden edge @@ -114,8 +157,12 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float float3 v1 = normalize(corners[edgeVerts.y]); float3 p = normalize(fragPos); // Current point on hemisphere - // Skip fragment if not in front of hemisphere or edge if both endpoints are behind horizon - if (p.z < 0.0f || (v0.z < 0.0f && v1.z < 0.0f)) + // HORIZON CLIPPING: Current fragment must be on front hemisphere + if (p.z < 0.0f) + return float4(0,0,0,0); + + // HORIZON CLIPPING: Skip edge if both endpoints are behind horizon + if (v0.z < 0.0f && v1.z < 0.0f) return float4(0,0,0,0); // Great circle plane normal @@ -149,36 +196,105 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float3 cameraPos = float3(0, 0, 0); float4 color = float4(0, 0, 0, 0); float2 p = toCircleSpace(vx.uv); // Convert 2D disk position to 3D hemisphere position - // p is in range [-CIRCLE_RADIUS, CIRCLE_RADIUS] - float2 normalized = p / CIRCLE_RADIUS; // Now in range [-1, 1] + float2 normalized = p / CIRCLE_RADIUS; float r2 = dot(normalized, normalized); - if (r2 > 1.0f) - discard; - // Convert UV to 3D position on hemisphere float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - computeCubeGeo(); // Your existing function + computeCubeGeo(); + + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3 viewDir = obbCenter; + + // Is this correct? + float dotX = dot(viewDir, float3(pc.modelMatrix[0][0], pc.modelMatrix[1][0], pc.modelMatrix[2][0])); + float dotY = dot(viewDir, float3(pc.modelMatrix[0][1], pc.modelMatrix[1][1], pc.modelMatrix[2][1])); + float dotZ = dot(viewDir, float3(pc.modelMatrix[0][2], pc.modelMatrix[1][2], pc.modelMatrix[2][2])); + + // Determine octant from ray direction signs + int octant = (dotX >= 0 ? 4 : 0) + + (dotY >= 0 ? 2 : 0) + + (dotZ >= 0 ? 1 : 0); + + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[octant], 1.0f); + } + + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - float aaWidth = length(float2(ddx(p.x), ddy(p.y))); + + // Draw the 6 silhouette edges + for (int i = 0; i < 6; i++) + { + int v0Idx = silhouettes[octant][i]; + int v1Idx = silhouettes[octant][(i + 1) % 6]; + + float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); + } - // Draw edges as great circle arcs - for (int j = 0; j < 12; j++) + // Draw the remaining edges (non-silhouette) in a different color + float3 hiddenEdgeColor = float3(0.3, 0.3, 0.3); // Gray color for hidden edges + + for (int i = 0; i < 12; i++) { - int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - int b = a + (4 >> (j / 4)); + int2 edge = allEdges[i]; + + // Check if this edge is already drawn as a silhouette edge + bool isSilhouette = false; + for (int j = 0; j < 6; j++) + { + int v0 = silhouettes[octant][j]; + int v1 = silhouettes[octant][(j + 1) % 6]; + + if ((edge.x == v0 && edge.y == v1) || (edge.x == v1 && edge.y == v0)) + { + isSilhouette = true; + break; + } + } - int visibility = getEdgeVisibility(j, cameraPos); - color += drawGreatCircleArc(spherePos, int2(a, b), visibility, aaWidth); + // Only draw if it's not a silhouette edge + if (!isSilhouette) + { + float4 edgeContribution = drawGreatCircleArc(spherePos, edge, 1, aaWidth); + color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + } + } + + // Draw corner labels for debugging + for (int i = 0; i < 8; i++) + { + float3 corner = normalize(corners[i]); + float2 cornerPos = corner.xy; + // Project corner onto 2D circle space + + // Distance from current fragment to corner + float dist = length(spherePos.xy - cornerPos); + + // Draw a small colored dot at the corner + float dotSize = 0.03f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + + if (dotAlpha > 0.0f) + { + float brightness = float(i) / 7.0f; + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } } color += drawRing(p, aaWidth); + + // if (r2 > 1.1f) + // color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file From 91ae8657dee9b4de82c81b97b23b83d3824a6011 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:20:01 +0300 Subject: [PATCH 094/219] Fixed main camera aspect ratio, added 27 configurations for cube silhouette --- .../hlsl/SolidAngleVis.frag.hlsl | 248 ++++++++++++------ 72_SolidAngleVisualizer/include/transform.hpp | 2 +- 72_SolidAngleVisualizer/main.cpp | 9 +- 3 files changed, 167 insertions(+), 92 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 7c96a8316..fa0805356 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -53,29 +53,84 @@ static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; -static const float3 colorLUT[8] = { +static const float3 colorLUT[27] = { + // Row 1: Pure and bright colors float3(0, 0, 0), // 0: Black - float3(1, 0, 0), // 1: Red - float3(0, 1, 0), // 2: Green - float3(1, 1, 0), // 3: Yellow - float3(0, 0, 1), // 4: Blue - float3(1, 0, 1), // 5: Magenta - float3(0, 1, 1), // 6: Cyan - float3(1, 1, 1) // 7: White + float3(1, 1, 1), // 1: White + float3(0.5, 0.5, 0.5), // 2: Gray + + // Row 2: Primary colors + float3(1, 0, 0), // 3: Red + float3(0, 1, 0), // 4: Green + float3(0, 0, 1), // 5: Blue + + // Row 3: Secondary colors + float3(1, 1, 0), // 6: Yellow + float3(1, 0, 1), // 7: Magenta + float3(0, 1, 1), // 8: Cyan + + // Row 4: Orange family + float3(1, 0.5, 0), // 9: Orange + float3(1, 0.65, 0), // 10: Light Orange + float3(0.8, 0.4, 0), // 11: Dark Orange + + // Row 5: Pink/Rose family + float3(1, 0.4, 0.7), // 12: Pink + float3(1, 0.75, 0.8), // 13: Light Pink + float3(0.7, 0.1, 0.3), // 14: Deep Rose + + // Row 6: Purple/Violet family + float3(0.5, 0, 0.5), // 15: Purple + float3(0.6, 0.4, 0.8), // 16: Light Purple + float3(0.3, 0, 0.5), // 17: Indigo + + // Row 7: Green variations + float3(0, 0.5, 0), // 18: Dark Green + float3(0.5, 1, 0), // 19: Lime + float3(0, 0.5, 0.25), // 20: Forest Green + + // Row 8: Blue variations + float3(0, 0, 0.5), // 21: Navy + float3(0.3, 0.7, 1), // 22: Sky Blue + float3(0, 0.4, 0.6), // 23: Teal + + // Row 9: Earth tones + float3(0.6, 0.4, 0.2), // 24: Brown + float3(0.8, 0.7, 0.3), // 25: Tan/Beige + float3(0.4, 0.3, 0.1) // 26: Dark Brown }; // Vertices are ordered CCW relative to the camera view. -static const int silhouettes[8][6] = { - {2, 3, 1, 5, 4, 6}, // 0: Black - {6, 7, 5, 1, 0, 2}, // 1: Red - {7, 6, 4, 0, 1, 3}, // 2: Green - {3, 7, 5, 4, 0, 2}, // 3: Yellow - {3, 2, 0, 4, 5, 7}, // 4: Cyan - {1, 3, 7, 6, 4, 0}, // 5: Magenta - {0, 1, 5, 7, 6, 2}, // 6: White - {4, 6, 2, 3, 1, 5} // 7: Gray +static const int silhouettes[27][7] = { + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5,}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; // Converts UV into centered, aspect-corrected NDC circle space @@ -106,6 +161,33 @@ void computeCubeGeo() } } +float4 drawCorners(float3 spherePos, float aaWidth) +{ + float4 color = float4(0,0,0,0); + // Draw corner labels for debugging + for (int i = 0; i < 8; i++) + { + float3 corner = normalize(corners[i]); + float2 cornerPos = corner.xy; + // Project corner onto 2D circle space + + // Distance from current fragment to corner + float dist = length(spherePos.xy - cornerPos); + + // Draw a small colored dot at the corner + float dotSize = 0.03f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + + if (dotAlpha > 0.0f) + { + float brightness = float(i) / 7.0f; + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } + } + return color; +} + float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); @@ -194,54 +276,11 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float return edgeColor * alpha * intensity; } -[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) { - float4 color = float4(0, 0, 0, 0); - float2 p = toCircleSpace(vx.uv); - - // Convert 2D disk position to 3D hemisphere position - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); - - // Convert UV to 3D position on hemisphere - float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - - computeCubeGeo(); - - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; - - float3 viewDir = obbCenter; - - // Is this correct? - float dotX = dot(viewDir, float3(pc.modelMatrix[0][0], pc.modelMatrix[1][0], pc.modelMatrix[2][0])); - float dotY = dot(viewDir, float3(pc.modelMatrix[0][1], pc.modelMatrix[1][1], pc.modelMatrix[2][1])); - float dotZ = dot(viewDir, float3(pc.modelMatrix[0][2], pc.modelMatrix[1][2], pc.modelMatrix[2][2])); - - // Determine octant from ray direction signs - int octant = (dotX >= 0 ? 4 : 0) + - (dotY >= 0 ? 2 : 0) + - (dotZ >= 0 ? 1 : 0); - - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[octant], 1.0f); - } - - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - - - // Draw the 6 silhouette edges - for (int i = 0; i < 6; i++) - { - int v0Idx = silhouettes[octant][i]; - int v1Idx = silhouettes[octant][(i + 1) % 6]; - - float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - } - + float4 color = float4(0,0,0,0); // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.3, 0.3, 0.3); // Gray color for hidden edges + float3 hiddenEdgeColor = float3(0.3, 0.3, 0); // dark yellow color for hidden edges for (int i = 0; i < 12; i++) { @@ -249,12 +288,14 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float // Check if this edge is already drawn as a silhouette edge bool isSilhouette = false; - for (int j = 0; j < 6; j++) + int vertexCount = silhouettes[configIndex][0]; + // Draw the 6 silhouette edges + for (int i = 0; i < vertexCount; i++) { - int v0 = silhouettes[octant][j]; - int v1 = silhouettes[octant][(j + 1) % 6]; + int v0Idx = silhouettes[configIndex][i + 1]; + int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - if ((edge.x == v0 && edge.y == v1) || (edge.x == v1 && edge.y == v0)) + if ((edge.x == v0Idx && edge.y == v1Idx) || (edge.x == v1Idx && edge.y == v0Idx)) { isSilhouette = true; break; @@ -268,33 +309,66 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); } } + return color; +} - // Draw corner labels for debugging - for (int i = 0; i < 8; i++) +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float4 color = float4(0, 0, 0, 0); + float2 p = toCircleSpace(vx.uv); + + // Convert 2D disk position to 3D hemisphere position + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) { - float3 corner = normalize(corners[i]); - float2 cornerPos = corner.xy; - // Project corner onto 2D circle space - - // Distance from current fragment to corner - float dist = length(spherePos.xy - cornerPos); - - // Draw a small colored dot at the corner - float dotSize = 0.03f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + return float4(colorLUT[configIndex], 1.0f); + } + + // Convert UV to 3D position on hemisphere + float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); + + computeCubeGeo(); + + // Get OBB center in world space + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 proj = mul(obbCenter, rotMatrix); // Get all 3 projections at once + + // Get squared column lengths + float lenSqX = dot(rotMatrix[0], rotMatrix[0]); + float lenSqY = dot(rotMatrix[1], rotMatrix[1]); + float lenSqZ = dot(rotMatrix[2], rotMatrix[2]); + + int3 region = int3( + proj.x < -lenSqX ? 0 : (proj.x > lenSqX ? 2 : 1), + proj.y < -lenSqY ? 0 : (proj.y > lenSqY ? 2 : 1), + proj.z < -lenSqZ ? 0 : (proj.z > lenSqZ ? 2 : 1) + ); + + int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 + + int vertexCount = silhouettes[configIndex][0]; + for (int i = 0; i < vertexCount; i++) + { + int v0Idx = silhouettes[configIndex][i + 1]; + int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - if (dotAlpha > 0.0f) - { - float brightness = float(i) / 7.0f; - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); - } + float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); } + color += drawHiddenEdges(spherePos, configIndex, aaWidth); + + color += drawCorners(spherePos, aaWidth); + color += drawRing(p, aaWidth); - // if (r2 > 1.1f) - // color.a = 0.0f; // Outside circle, make transparent + if (r2 > 1.1f) + color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 639c0fa3a..105b2f757 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -19,7 +19,7 @@ struct TransformRequestParams struct TransformReturnInfo { - nbl::hlsl::uint16_t2 sceneResolution = { 0, 0 }; + nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; bool isGizmoWindowHovered; bool isGizmoBeingUsed; }; diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 8fb8bf144..5f73797a6 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -753,16 +753,17 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - matrix4SIMD projection; + const auto& sceneRes = mainViewTransformReturnInfo.sceneResolution; + matrix4SIMD projection; if (isPerspective) if (isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); else { - float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + float viewHeight = viewWidth * sceneRes.y / sceneRes.x; if (isLH) projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); From 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:30:34 +0300 Subject: [PATCH 095/219] Shader fixes, bast uint16 resolutionf to float --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 16 +++++++++------- 72_SolidAngleVisualizer/main.cpp | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index fa0805356..ec30c2b64 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -322,10 +322,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) float r2 = dot(normalized, normalized); float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } + // Convert UV to 3D position on hemisphere float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); @@ -350,7 +347,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) ); int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 - + int vertexCount = silhouettes[configIndex][0]; for (int i = 0; i < vertexCount; i++) { @@ -367,8 +364,13 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) color += drawRing(p, aaWidth); - if (r2 > 1.1f) - color.a = 0.0f; // Outside circle, make transparent + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + + // if (r2 > 1.1f) + // color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 5f73797a6..85685e705 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -753,7 +753,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - const auto& sceneRes = mainViewTransformReturnInfo.sceneResolution; + const auto& sceneRes = float16_t2(mainViewTransformReturnInfo.sceneResolution); matrix4SIMD projection; if (isPerspective) From a35eddd1bd83fbf636e820b59c6eef939ed09668 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:44:42 +0300 Subject: [PATCH 096/219] Better color for non-silhouette edges --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 2 +- 72_SolidAngleVisualizer/main.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index ec30c2b64..51cb1946d 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -280,7 +280,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) { float4 color = float4(0,0,0,0); // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.3, 0.3, 0); // dark yellow color for hidden edges + float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); // dark yellow color for hidden edges for (int i = 0; i < 12; i++) { diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 85685e705..e9266520d 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -933,9 +933,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR transformParams.editTransformDecomposition = true; mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); - // MODEL: Zup -> Yup - - m_OBBModelMatrix = imguizmoM16InOut.model; // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); From b253e4a2a403e46e0d3b9f02bd7e2265563ef7c6 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 18:22:53 +0100 Subject: [PATCH 097/219] Fixed camera --- .../include/nbl/examples/cameras/CCamera.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index c0965b1e9..90abc06ce 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -12,8 +12,7 @@ #include #include -#include -#include +#include class Camera { @@ -77,7 +76,7 @@ class Camera { leftHanded = nbl::hlsl::determinant(hlslMatMap) < 0.f; } - concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::getMatrix3x4As4x4(viewMatrix)); + concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::math::linalg::promote_affine<4,4,3,4>(viewMatrix)); } inline void setPosition(const nbl::core::vectorSIMDf& pos) @@ -123,16 +122,17 @@ class Camera // problem. so solve this problem: nbl::hlsl::float32_t3 up = nbl::core::convertToHLSLVector(nbl::core::normalize(upVector)).xyz; nbl::hlsl::float32_t3 cross = nbl::hlsl::cross(localTarget, up); - const bool upVectorNeedsChange = nbl::hlsl::lengthsquared(cross) == 0; + const float squaredLength = dot(cross, cross); + const bool upVectorNeedsChange = squaredLength == 0; if (upVectorNeedsChange) up = nbl::core::convertToHLSLVector(nbl::core::normalize(backupUpVector)); if (leftHanded) - viewMatrix = nbl::hlsl::buildCameraLookAtMatrixLH(pos, _target, up); + viewMatrix = nbl::hlsl::math::linalg::lhLookAt(pos, _target, up); else - viewMatrix = nbl::hlsl::buildCameraLookAtMatrixRH(pos, _target, up); + viewMatrix = nbl::hlsl::math::linalg::rhLookAt(pos, _target, up); - concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::getMatrix3x4As4x4(viewMatrix)); + concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::math::linalg::promote_affine<4, 4, 3, 4>(viewMatrix)); } inline bool getLeftHanded() const { return leftHanded; } @@ -182,8 +182,8 @@ class Camera pos.w = 0; localTarget = nbl::hlsl::float32_t4(0, 0, nbl::core::max(1.f, nbl::hlsl::length(pos)), 1.0f); - nbl::hlsl::float32_t3x4 mat; - nbl::hlsl::setRotation(mat, nbl::hlsl::quaternion::create(relativeRotationX, relativeRotationY, 0)); + const nbl::hlsl::math::quaternion quat = nbl::hlsl::math::quaternion::create(relativeRotationX, relativeRotationY, 0); + nbl::hlsl::float32_t3x4 mat = nbl::hlsl::math::linalg::promote_affine<3, 4, 3, 3>(quat.constructMatrix()); localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 1.0f); From aa8c079d50e4761db67ad97f6e5df10ee754a4d2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 14:20:13 +0100 Subject: [PATCH 098/219] Fixed another camera bug --- 09_GeometryCreator/main.cpp | 6 +++--- common/include/nbl/examples/cameras/CCamera.hpp | 12 +++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index eaa1f78f4..06521a6d2 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include -#include +#include +#include #include "common.hpp" @@ -75,7 +75,7 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes { core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - float32_t4x4 projectionMatrix = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x) / m_initialResolution.y, 0.1f, 10000.0f); + float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(m_initialResolution.x) / m_initialResolution.y, 0.1f, 10000.0f); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 90abc06ce..2f4e2472a 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -13,6 +13,7 @@ #include #include +#include class Camera { @@ -28,6 +29,7 @@ class Camera , rotateSpeed(rotateSpeed) , upVector(upVec) , backupUpVector(backupUpVec) + , viewMatrix(nbl::hlsl::math::linalg::diagonal(1.0f)) { initDefaultKeysMap(); allKeysUp(); @@ -71,12 +73,8 @@ class Camera inline void setProjectionMatrix(const nbl::hlsl::float32_t4x4& projection) { projMatrix = projection; - - const auto hlslMatMap = *reinterpret_cast(&projMatrix); // TEMPORARY TILL THE CAMERA CLASS IS REFACTORED TO WORK WITH HLSL MATRICIES! - { - leftHanded = nbl::hlsl::determinant(hlslMatMap) < 0.f; - } - concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::math::linalg::promote_affine<4,4,3,4>(viewMatrix)); + leftHanded = nbl::hlsl::determinant(projMatrix) < 0.f; + concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } inline void setPosition(const nbl::core::vectorSIMDf& pos) @@ -132,7 +130,7 @@ class Camera else viewMatrix = nbl::hlsl::math::linalg::rhLookAt(pos, _target, up); - concatMatrix = nbl::hlsl::mul(projMatrix, nbl::hlsl::math::linalg::promote_affine<4, 4, 3, 4>(viewMatrix)); + concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } inline bool getLeftHanded() const { return leftHanded; } From 1dec4de5e5e92040150bf529ec311183efff3c8c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 17:25:22 +0100 Subject: [PATCH 099/219] Fixed multiple examples --- 12_MeshLoaders/main.cpp | 4 ++-- 61_UI/main.cpp | 20 ++++++++++---------- 67_RayQueryGeometry/include/common.hpp | 4 ++-- 67_RayQueryGeometry/main.cpp | 20 +++++++++----------- 70_FLIPFluids/main.cpp | 8 ++++---- 5 files changed, 27 insertions(+), 29 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index a012d0675..241fa5117 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -5,7 +5,7 @@ #include "common.hpp" #include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" -#include +#include #ifdef NBL_BUILD_MITSUBA_LOADER #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" @@ -452,7 +452,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc { const auto measure = hlsl::length(diagonal); const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); - camera.setProjectionMatrix(hlsl::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); + camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); camera.setMoveSpeed(measure * 0.04); } const auto pos = bound.maxVx + diagonal * distance; diff --git a/61_UI/main.cpp b/61_UI/main.cpp index 88f41fe6b..503a2e421 100644 --- a/61_UI/main.cpp +++ b/61_UI/main.cpp @@ -3,7 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" -#include +#include /* Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. @@ -570,17 +570,17 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA if (isPerspective) if(isLH) - projection = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else - projection = hlsl::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else { float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; if(isLH) - projection = hlsl::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); else - projection = hlsl::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); } return projection; @@ -723,9 +723,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA ImGuizmo::SetID(0u); - imguizmoM16InOut.view = hlsl::transpose(hlsl::getMatrix3x4As4x4(camera.getViewMatrix())); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4,4,3,4>(camera.getViewMatrix())); imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); - imguizmoM16InOut.model = hlsl::transpose(hlsl::getMatrix3x4As4x4(model)); + imguizmoM16InOut.model = hlsl::transpose(hlsl::math::linalg::promote_affine<4,4,3,4>(model)); { if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ @@ -734,14 +734,14 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA sceneResolution = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); } - model = hlsl::extractSub3x4From4x4Matrix(hlsl::transpose(imguizmoM16InOut.model)); + model = hlsl::math::linalg::truncate<3,4,4,4>(hlsl::transpose(imguizmoM16InOut.model)); // to Nabla + update camera & model matrices // TODO: make it more nicely, extract: // - Position by computing inverse of the view matrix and grabbing its translation // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position // But then set the view matrix this way anyway, because up-vector may not be compatible const auto& view = camera.getViewMatrix(); - const_cast(view) = hlsl::extractSub3x4From4x4Matrix(hlsl::transpose(imguizmoM16InOut.view)); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + const_cast(view) = hlsl::math::linalg::truncate<3,4,4,4>(hlsl::transpose(imguizmoM16InOut.view)); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) // update concatanated matrix const auto& projection = camera.getProjectionMatrix(); camera.setProjectionMatrix(projection); @@ -864,7 +864,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA // Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables - hlsl::float32_t3x4 model = identity(); + hlsl::float32_t3x4 model = hlsl::math::linalg::diagonal(1.0f); std::string_view objectName; TransformRequestParams transformParams; uint16_t2 sceneResolution = {1280,720}; diff --git a/67_RayQueryGeometry/include/common.hpp b/67_RayQueryGeometry/include/common.hpp index 84b0a3dcf..ac774b0df 100644 --- a/67_RayQueryGeometry/include/common.hpp +++ b/67_RayQueryGeometry/include/common.hpp @@ -23,9 +23,9 @@ using GeometryCollectionData = core::smart_refctd_ptr; using GeometryData = std::variant; struct ReferenceObjectCpu { - core::matrix3x4SIMD transform; + hlsl::float32_t3x4 transform; GeometryData data; - uint32_t instanceID; + uint32_t instanceID; }; } diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index 2783385f2..2463bac85 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -197,7 +198,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built { core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1, 1000); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1f, 1000.0f); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } @@ -266,13 +267,10 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const auto projectionMatrix = camera.getProjectionMatrix(); const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + hlsl::float32_t3x4 modelMatrix = hlsl::math::linalg::identity(); - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + hlsl::float32_t4x4 modelViewProjectionMatrix = nbl::hlsl::math::linalg::promoted_mul(viewProjectionMatrix, modelMatrix); + hlsl::float32_t4x4 invModelViewProjectionMatrix = hlsl::inverse(modelViewProjectionMatrix); auto* queue = getGraphicsQueue(); @@ -305,7 +303,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const core::vector3df camPos = camera.getPosition().getAsVector3df(); pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + pc.invMVP = invModelViewProjectionMatrix; pc.scaleNDC = { 2.f / WIN_W, -2.f / WIN_H }; pc.offsetNDC = { -1.f, 1.f }; @@ -494,8 +492,8 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built auto transform_i = 0; auto nextTransform = [&transform_i]() { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(5.f * transform_i, 0, 0, 0)); + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + hlsl::math::linalg::setTranslation(transform, hlsl::float32_t3(5.f * transform_i, 0.0f, 0.0f)); transform_i++; return transform; }; @@ -981,7 +979,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); video::CDumbPresentationOracle oracle; smart_refctd_ptr geometryInfoBuffer; diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index a867bf353..4e7a9aeae 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -6,7 +6,7 @@ #include "nbl/examples/examples.hpp" // TODO: why is it not in nabla.h ? #include "nbl/asset/metadata/CHLSLMetadata.h" -#include "nbl/builtin/hlsl/projection/projection.hlsl" +#include using namespace nbl; using namespace nbl::core; @@ -232,7 +232,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso float zNear = 0.1f, zFar = 10000.f; core::vectorSIMDf cameraPosition(14, 8, 12); core::vectorSIMDf cameraTarget(0, 0, 0); - hlsl::float32_t4x4 projectionMatrix = hlsl::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); m_pRenderParams.zNear = zNear; @@ -885,12 +885,12 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso const auto projectionMatrix = camera.getProjectionMatrix(); const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - hlsl::float32_t3x4 modelMatrix = hlsl::identity(); + hlsl::float32_t3x4 modelMatrix = hlsl::math::linalg::identity(); hlsl::float32_t3x4 modelViewMatrix = viewMatrix; hlsl::float32_t4x4 modelViewProjectionMatrix = viewProjectionMatrix; - auto modelMat = hlsl::getMatrix3x4As4x4(modelMatrix); + auto modelMat = hlsl::math::linalg::promote_affine<4, 4, 3, 4>(modelMatrix); const core::vector3df camPos = camera.getPosition().getAsVector3df(); From 8eeca4fb996ae591c22e1d726f5064e4c2496838 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 15:41:04 +0700 Subject: [PATCH 100/219] some crappy basic tests converting rot mat to quat and back --- 22_CppCompat/CTgmathTester.h | 29 +++++++++++++++++++++++++++++ 22_CppCompat/main.cpp | 3 +++ 2 files changed, 32 insertions(+) diff --git a/22_CppCompat/CTgmathTester.h b/22_CppCompat/CTgmathTester.h index 63b0e483e..3ab69f53f 100644 --- a/22_CppCompat/CTgmathTester.h +++ b/22_CppCompat/CTgmathTester.h @@ -7,6 +7,8 @@ #include "app_resources/common.hlsl" #include "ITester.h" +#include "nbl/builtin/hlsl/math/quaternions.hlsl" + using namespace nbl; @@ -355,6 +357,33 @@ class CTgmathTester final : public ITester verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testType); verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testType); verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testType); + + + { + float32_t angle = 0.5; + float32_t2 dir = float32_t2{ cos(angle), sin(angle) }; + float32_t3x3 rotateMat = + { + dir.x, -dir.y, 0.0, + dir.y, dir.x, 0.0, + 0.0, 0.0, 1.0 + }; + + float32_t scale = 100.0; + float32_t3x3 scaleMat = + { + scale, 0.0, 0.0, + 0.0, scale, 0.0, + 0.0, 0.0, scale + }; + + float32_t3x3 expectedTransform = nbl::hlsl::mul(rotateMat, scaleMat); + + math::quaternion quat = math::quaternion::create(expectedTransform); + float32_t3x3 testTransform = quat.constructMatrix(); + + verifyTestMatrix3x3Value("quaternion create from matrix", expectedTransform, testTransform, testType); + } } }; diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 70c8d7b3a..896899a46 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -67,6 +67,9 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + if (!concepts::Matricial) + return logFail("assert on float32_t3x3 doesn't work!\n"); + { CTgmathTester tgmathTester; pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; From df06c150d44c6e8ac20f07bb9de57d8b8ea14d02 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 13:10:09 +0100 Subject: [PATCH 101/219] Fixed a warning --- 73_Mortons/app_resources/test.comp.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/73_Mortons/app_resources/test.comp.hlsl b/73_Mortons/app_resources/test.comp.hlsl index 13b5a32f0..591915109 100644 --- a/73_Mortons/app_resources/test.comp.hlsl +++ b/73_Mortons/app_resources/test.comp.hlsl @@ -12,7 +12,7 @@ [shader("compute")] void main() { - const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID(); + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; TestExecutor executor; executor(inputTestValues[invID], outputTestValues[invID]); } From cc4f871dce0ccf56b54118c4e90ecf2b3107d19e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 16:08:29 +0100 Subject: [PATCH 102/219] Fixed compilation errors, adapted example 14 to new testing interface --- 14_Mortons/CTester.h | 585 +++++++++---------- 14_Mortons/ITester.h | 383 ------------ 14_Mortons/app_resources/test2.comp.hlsl | 2 +- 14_Mortons/app_resources/testCommon.hlsl | 35 +- 14_Mortons/app_resources/testCommon2.hlsl | 32 +- 14_Mortons/main.cpp | 36 +- common/include/nbl/examples/Tester/ITester.h | 27 +- 7 files changed, 364 insertions(+), 736 deletions(-) delete mode 100644 14_Mortons/ITester.h diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 5b9720a7d..e25fa58a2 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -3,8 +3,8 @@ #include #include "app_resources/testCommon.hlsl" -#include "nbl/examples/Tester/ITester.h" #include "app_resources/testCommon2.hlsl" +#include "nbl/examples/Tester/ITester.h" using namespace nbl; @@ -13,9 +13,6 @@ class CTester final : public ITester using base_t = ITester; public: - /** - * @param testBatchCount one test batch is 128 tests - */ CTester(const uint32_t testBatchCount) : base_t(testBatchCount) {}; @@ -58,6 +55,7 @@ class CTester final : public ITester expected.emulatedNot = _static_cast(~generatedA); expected.emulatedPlus = _static_cast(generatedA + generatedB); expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); expected.emulatedLess = uint32_t(generatedA < generatedB); expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); expected.emulatedGreater = uint32_t(generatedA > generatedB); @@ -71,88 +69,88 @@ class CTester final : public ITester uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2); - uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2); - uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); - uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); - uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); - uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); - uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); - uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); - uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); - uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); - uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); - uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); - uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); - uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); - uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); - uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them // so their highest bits are all 0s or 1s depending on the sign of the number they encode - int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - - int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - - int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); // Plus - expected.mortonPlus_small_2 = morton::code::create(Vec2ASmall + Vec2BSmall); - expected.mortonPlus_medium_2 = morton::code::create(Vec2AMedium + Vec2BMedium); - expected.mortonPlus_full_2 = morton::code::create(Vec2AFull + Vec2BFull); - expected.mortonPlus_emulated_2 = morton::code::create(Vec2AFull + Vec2BFull); + expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); - expected.mortonPlus_small_3 = morton::code::create(Vec3ASmall + Vec3BSmall); - expected.mortonPlus_medium_3 = morton::code::create(Vec3AMedium + Vec3BMedium); - expected.mortonPlus_full_3 = morton::code::create(Vec3AFull + Vec3BFull); - expected.mortonPlus_emulated_3 = morton::code::create(Vec3AFull + Vec3BFull); + expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); - expected.mortonPlus_small_4 = morton::code::create(Vec4ASmall + Vec4BSmall); - expected.mortonPlus_medium_4 = morton::code::create(Vec4AMedium + Vec4BMedium); - expected.mortonPlus_full_4 = morton::code::create(Vec4AFull + Vec4BFull); - expected.mortonPlus_emulated_4 = morton::code::create(Vec4AFull + Vec4BFull); + expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); // Minus - expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); - expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); - expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); - expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); + expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); - expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); - expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); - expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); - expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); + expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); - expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); - expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); - expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); - expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); + expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); // Coordinate-wise equality expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); @@ -168,6 +166,7 @@ class CTester final : public ITester expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); // Coordinate-wise unsigned inequality (just testing with less) expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); @@ -183,65 +182,72 @@ class CTester final : public ITester expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); // Coordinate-wise signed inequality expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); uint16_t castedShift = uint16_t(testInput.shift); // Left-shift - expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); + expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); + expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); - expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); + expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); + expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); - expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); + expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); + expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); - expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); - expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); } return expected; @@ -249,6 +255,7 @@ class CTester final : public ITester void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { + // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); @@ -262,229 +269,221 @@ class CTester final : public ITester verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); + verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType); - //// Morton Plus + // Morton Plus verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); - - //// Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); - - //// Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); - - //// Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); - - //// Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); - - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); - - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); - - //// Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); - - //// Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); + + // Morton Minus + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); + + // Morton coordinate-wise equality + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType); + + // Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); + + // Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); + + // Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); + + // Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); - - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); - - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); + //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); } }; // Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. -class CTester2 final : public ITester +class CTester2 final : public ITester { + using base_t = ITester; public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTester2(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; +private: + InputTestValues generateInputTestValues() override + { std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); - - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; - expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - uint16_t castedShift = uint16_t(generatedShift); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); + testInput.shift = generatedShift; - } + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + return testInput; } -private: - inline static constexpr int Iterations = 100u; - - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + TestValues determineExpectedResults(const InputTestValues& testInput) override { - TestValues cpuTestValues; - - fillTestValues2(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + const uint32_t generatedShift = testInput.shift; + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + return expected; } - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); - } + verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) - { - - verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); - - verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); - verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); - verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); - - verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); - + verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); } }; #endif \ No newline at end of file diff --git a/14_Mortons/ITester.h b/14_Mortons/ITester.h deleted file mode 100644 index 6291bf3de..000000000 --- a/14_Mortons/ITester.h +++ /dev/null @@ -1,383 +0,0 @@ -#ifndef _NBL_COMMON_I_TESTER_INCLUDED_ -#define _NBL_COMMON_I_TESTER_INCLUDED_ - -#include -#include -#include - -using namespace nbl; - -template -class ITester -{ -public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - - struct PipelineSetupData - { - std::string testShaderPath; - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return logFail("Could not load shader!"); - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - - auto overridenSource = asset::CHLSLCompiler::createOverridenCopy( - source.get(), "#define WORKGROUP_SIZE %d\n#define TEST_COUNT %d\n", - m_WorkgroupSize, m_testIterationCount - ); - - shader = m_device->compileShader({overridenSource.get()}); - } - - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - void performTestsAndVerifyResults() - { - core::vector inputTestValues; - core::vector exceptedTestResults; - - inputTestValues.reserve(m_testIterationCount); - exceptedTestResults.reserve(m_testIterationCount); - - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < m_testIterationCount; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput = generateInputTestValues(); - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected = determineExpectedResults(testInput); - - inputTestValues.push_back(testInput); - exceptedTestResults.push_back(expected); - } - - core::vector cpuTestResults = performCpuTests(inputTestValues); - core::vector gpuTestResults = performGpuTests(inputTestValues); - - verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); - - m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - reloadSeed(); - } - -protected: - enum class TestType - { - CPU, - GPU - }; - - virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; - - virtual InputTestValues generateInputTestValues() = 0; - - virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; - - std::mt19937& getRandomEngine() - { - return m_mersenneTwister; - } - -protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - ITester(const uint32_t testBatchCount) - : m_testIterationCount(testBatchCount * m_WorkgroupSize) - { - reloadSeed(); - }; - - void dispatchGpuTests(const core::vector& input, core::vector& output) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(m_testIterationCount == input.size()); - const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - const uint32_t dispatchSizeX = (m_testIterationCount + (m_WorkgroupSize - 1)) / m_WorkgroupSize; - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(dispatchSizeX, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - - // save test results - assert(m_testIterationCount == output.size()); - const size_t outputDataSize = sizeof(InputTestValues) * m_testIterationCount; - std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); - - m_device->waitIdle(); - } - - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType) - { - if (expectedVal == testVal) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; - ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; - ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - -private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } - - core::vector performCpuTests(const core::vector& inputTestValues) - { - core::vector output(m_testIterationCount); - TestExecutor testExecutor; - - auto iterations = std::views::iota(0ull, m_testIterationCount); - std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), - [&](size_t i) - { - testExecutor(inputTestValues[i], output[i]); - } - ); - - return output; - } - - core::vector performGpuTests(const core::vector& inputTestValues) - { - core::vector output(m_testIterationCount); - dispatchGpuTests(inputTestValues, output); - - return output; - } - - void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) - { - for (int i = 0; i < m_testIterationCount; ++i) - { - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU); - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::GPU); - } - } - - void reloadSeed() - { - std::random_device rd; - m_seed = rd(); - m_mersenneTwister = std::mt19937(m_seed); - } - - const size_t m_testIterationCount; - static constexpr size_t m_WorkgroupSize = 128u; - // seed will change after every call to performTestsAndVerifyResults() - std::mt19937 m_mersenneTwister; - uint32_t m_seed; -}; - -#endif \ No newline at end of file diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl index faf63f678..045ba1bdc 100644 --- a/14_Mortons/app_resources/test2.comp.hlsl +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -13,6 +13,6 @@ void main(uint3 invocationID : SV_DispatchThreadID) { const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; - TestExecutor executor; + TestExecutor2 executor; executor(inputTestValues[invID], outputTestValues[invID]); } diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index 20809071b..b285bd8cd 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -46,7 +46,7 @@ struct TestExecutor uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); @@ -64,7 +64,7 @@ struct TestExecutor int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); @@ -91,7 +91,7 @@ struct TestExecutor morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); - + morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); @@ -100,7 +100,7 @@ struct TestExecutor morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); - + morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); @@ -109,23 +109,23 @@ struct TestExecutor morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); - + morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); - + morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); - + morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. + // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. // Plus output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; @@ -172,7 +172,7 @@ struct TestExecutor output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); // Coordinate-wise unsigned inequality (just testing with less) output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); @@ -227,7 +227,7 @@ struct TestExecutor output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); left_shift_operator > leftShiftEmulated3; output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - + left_shift_operator > leftShiftSmall4; output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); left_shift_operator > leftShiftMedium4; @@ -236,7 +236,7 @@ struct TestExecutor output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); left_shift_operator > leftShiftEmulated4; output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - + // Unsigned right-shift arithmetic_right_shift_operator > rightShiftSmall2; output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); @@ -246,7 +246,7 @@ struct TestExecutor output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); arithmetic_right_shift_operator > rightShiftEmulated2; output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - + arithmetic_right_shift_operator > rightShiftSmall3; output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); arithmetic_right_shift_operator > rightShiftMedium3; @@ -255,7 +255,7 @@ struct TestExecutor output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); arithmetic_right_shift_operator > rightShiftEmulated3; output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - + arithmetic_right_shift_operator > rightShiftSmall4; output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); arithmetic_right_shift_operator > rightShiftMedium4; @@ -264,7 +264,7 @@ struct TestExecutor output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); arithmetic_right_shift_operator > rightShiftEmulated4; output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - + // Signed right-shift arithmetic_right_shift_operator > rightShiftSignedSmall2; output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); @@ -274,7 +274,7 @@ struct TestExecutor output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); // arithmetic_right_shift_operator > rightShiftSignedEmulated2; // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); - + arithmetic_right_shift_operator > rightShiftSignedSmall3; output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); arithmetic_right_shift_operator > rightShiftSignedMedium3; @@ -283,7 +283,7 @@ struct TestExecutor output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); // arithmetic_right_shift_operator > rightShiftSignedEmulated3; // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); - + arithmetic_right_shift_operator > rightShiftSignedSmall4; output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); arithmetic_right_shift_operator > rightShiftSignedMedium4; @@ -293,4 +293,5 @@ struct TestExecutor // arithmetic_right_shift_operator > rightShiftSignedEmulated4; // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); -} + } +}; diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl index 42beeb749..5c2a953ac 100644 --- a/14_Mortons/app_resources/testCommon2.hlsl +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -1,42 +1,42 @@ #include "common.hlsl" -struct TestExecutor +struct TestExecutor2 { void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) { uint64_t2 Vec2A = { input.coordX, input.coordY }; uint64_t2 Vec2B = { input.coordZ, input.coordW }; - + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - - + + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - - output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); - output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); - output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); - + + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + uint16_t castedShift = uint16_t(input.shift); - + arithmetic_right_shift_operator > rightShiftSignedEmulated2; - output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); arithmetic_right_shift_operator > rightShiftSignedEmulated3; - output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); arithmetic_right_shift_operator > rightShiftSignedEmulated4; output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); } -} +}; diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index 286718b9a..e7d350721 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -10,8 +10,6 @@ #include "app_resources/common.hlsl" #include "CTester.h" -#include - using namespace nbl::core; using namespace nbl::hlsl; using namespace nbl::system; @@ -36,26 +34,36 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - - CTester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester(1); // 4 * 128 = 512 tests + CTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + + CTester mortonTester(1); // 4 * 128 = 512 tests mortonTester.setupPipeline(pplnSetupData); mortonTester.performTestsAndVerifyResults(); - CTester2 mortonTester2(1); + } + { + CTester2::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; - mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTestsAndVerifyResults(); + + CTester2 mortonTester2(1); + mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); + mortonTester2.performTestsAndVerifyResults(); } return true; diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 6291bf3de..397c2a8ac 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -11,11 +11,6 @@ template class ITester { public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - struct PipelineSetupData { std::string testShaderPath; @@ -213,6 +208,11 @@ class ITester reloadSeed(); } + virtual ~ITester() + { + m_outputBufferAllocation.memory->unmap(); + }; + protected: enum class TestType { @@ -220,8 +220,17 @@ class ITester GPU }; + /** + * @param testBatchCount one test batch is equal to m_WorkgroupSize, so number of tests performed will be m_WorkgroupSize * testbatchCount + */ + ITester(const uint32_t testBatchCount) + : m_testIterationCount(testBatchCount* m_WorkgroupSize) + { + reloadSeed(); + }; + virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; - + virtual InputTestValues generateInputTestValues() = 0; virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; @@ -248,12 +257,6 @@ class ITester core::smart_refctd_ptr m_semaphore; video::IQueue* m_queue; uint64_t m_semaphoreCounter; - - ITester(const uint32_t testBatchCount) - : m_testIterationCount(testBatchCount * m_WorkgroupSize) - { - reloadSeed(); - }; void dispatchGpuTests(const core::vector& input, core::vector& output) { From ab5e466db43ff94e748bae478d0c0e28a492dfc8 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:07:55 +0100 Subject: [PATCH 103/219] Fixed ITester.h --- common/include/nbl/examples/Tester/ITester.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 397c2a8ac..d0aa2e63a 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -182,7 +182,7 @@ class ITester void performTestsAndVerifyResults() { core::vector inputTestValues; - core::vector exceptedTestResults; + core::vector exceptedTestResults; inputTestValues.reserve(m_testIterationCount); exceptedTestResults.reserve(m_testIterationCount); @@ -193,14 +193,14 @@ class ITester // Set input thest values that will be used in both CPU and GPU tests InputTestValues testInput = generateInputTestValues(); // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected = determineExpectedResults(testInput); + TestResults expected = determineExpectedResults(testInput); inputTestValues.push_back(testInput); exceptedTestResults.push_back(expected); } - core::vector cpuTestResults = performCpuTests(inputTestValues); - core::vector gpuTestResults = performGpuTests(inputTestValues); + core::vector cpuTestResults = performCpuTests(inputTestValues); + core::vector gpuTestResults = performGpuTests(inputTestValues); verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); @@ -229,7 +229,7 @@ class ITester reloadSeed(); }; - virtual void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; + virtual void verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; virtual InputTestValues generateInputTestValues() = 0; @@ -336,9 +336,9 @@ class ITester exit(-1); } - core::vector performCpuTests(const core::vector& inputTestValues) + core::vector performCpuTests(const core::vector& inputTestValues) { - core::vector output(m_testIterationCount); + core::vector output(m_testIterationCount); TestExecutor testExecutor; auto iterations = std::views::iota(0ull, m_testIterationCount); @@ -352,15 +352,15 @@ class ITester return output; } - core::vector performGpuTests(const core::vector& inputTestValues) + core::vector performGpuTests(const core::vector& inputTestValues) { - core::vector output(m_testIterationCount); + core::vector output(m_testIterationCount); dispatchGpuTests(inputTestValues, output); return output; } - void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) { for (int i = 0; i < m_testIterationCount; ++i) { From c593979c42627b49524690ea7a7717a2d7ca5fdf Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:18:49 +0100 Subject: [PATCH 104/219] Another ITester.h fix --- common/include/nbl/examples/Tester/ITester.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index d0aa2e63a..0027b8b70 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -299,7 +299,7 @@ class ITester // save test results assert(m_testIterationCount == output.size()); - const size_t outputDataSize = sizeof(InputTestValues) * m_testIterationCount; + const size_t outputDataSize = sizeof(TestResults) * m_testIterationCount; std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); m_device->waitIdle(); From 8114cb0740323bbde03375c731bce34d6eeeb8d9 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 19:43:39 +0100 Subject: [PATCH 105/219] Adepated example 22 to new testing interface --- 14_Mortons/main.cpp | 4 +- 22_CppCompat/CIntrinsicsTester.h | 462 ++++++------- 22_CppCompat/CTgmathTester.h | 627 +++++++++--------- 22_CppCompat/ITester.h | 337 ---------- 22_CppCompat/app_resources/common.hlsl | 254 +++---- .../app_resources/intrinsicsTest.comp.hlsl | 13 +- .../app_resources/tgmathTest.comp.hlsl | 13 +- 22_CppCompat/main.cpp | 36 +- 8 files changed, 690 insertions(+), 1056 deletions(-) delete mode 100644 22_CppCompat/ITester.h diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index e7d350721..a36db8a19 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -46,7 +46,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; - CTester mortonTester(1); // 4 * 128 = 512 tests + CTester mortonTester(4); // 4 * 128 = 512 tests mortonTester.setupPipeline(pplnSetupData); mortonTester.performTestsAndVerifyResults(); @@ -61,7 +61,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; - CTester2 mortonTester2(1); + CTester2 mortonTester2(4); mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); mortonTester2.performTestsAndVerifyResults(); } diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index f014bd1cb..c92df0079 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -5,19 +5,21 @@ #include "nbl/examples/examples.hpp" #include "app_resources/common.hlsl" -#include "ITester.h" using namespace nbl; -class CIntrinsicsTester final : public ITester +class CIntrinsicsTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CIntrinsicsTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; +private: + IntrinsicsIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); std::uniform_real_distribution realDistributionZeroToOne(0.0f, 1.0f); @@ -26,262 +28,232 @@ class CIntrinsicsTester final : public ITester std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution uintDistribution(0, 100); - m_logger->log("intrinsics.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - IntrinsicsIntputTestValues testInput; - testInput.bitCount = intDistribution(mt); - testInput.crossLhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.crossRhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampVal = realDistribution(mt); - testInput.clampMin = realDistributionNeg(mt); - testInput.clampMax = realDistributionPos(mt); - testInput.length = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.normalize = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.dotLhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.dotRhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.determinant = float32_t3x3( - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt) - ); - testInput.findMSB = realDistribution(mt); - testInput.findLSB = realDistribution(mt); - testInput.inverse = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.transpose = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulLhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulRhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.minA = realDistribution(mt); - testInput.minB = realDistribution(mt); - testInput.maxA = realDistribution(mt); - testInput.maxB = realDistribution(mt); - testInput.rsqrt = realDistributionPos(mt); - testInput.bitReverse = realDistribution(mt); - testInput.frac = realDistribution(mt); - testInput.mixX = realDistributionNeg(mt); - testInput.mixY = realDistributionPos(mt); - testInput.mixA = realDistributionZeroToOne(mt); - testInput.sign = realDistribution(mt); - testInput.radians = realDistribution(mt); - testInput.degrees = realDistribution(mt); - testInput.stepEdge = realDistribution(mt); - testInput.stepX = realDistribution(mt); - testInput.smoothStepEdge0 = realDistributionNeg(mt); - testInput.smoothStepEdge1 = realDistributionPos(mt); - testInput.smoothStepX = realDistribution(mt); - testInput.addCarryA = std::numeric_limits::max() - uintDistribution(mt); - testInput.addCarryB = uintDistribution(mt); - testInput.subBorrowA = uintDistribution(mt); - testInput.subBorrowB = uintDistribution(mt); - - testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampMinVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.clampMaxVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.findMSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.findLSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.minAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.minBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.rsqrtVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.bitReverseVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.fracVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.mixXVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.mixYVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.mixAVec = float32_t3(realDistributionZeroToOne(mt), realDistributionZeroToOne(mt), realDistributionZeroToOne(mt)); - - testInput.signVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.radiansVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.degreesVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepEdgeVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.smoothStepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardN = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardNref = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractEta = realDistribution(mt); - testInput.addCarryAVec = uint32_t3(std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt)); - testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - IntrinsicsTestValues expected; - expected.bitCount = glm::bitCount(testInput.bitCount); - expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); - expected.length = glm::length(testInput.length); - expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); - expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); - expected.findMSB = glm::findMSB(testInput.findMSB); - expected.findLSB = glm::findLSB(testInput.findLSB); - expected.min = glm::min(testInput.minA, testInput.minB); - expected.max = glm::max(testInput.maxA, testInput.maxB); - expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); - expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); - expected.sign = glm::sign(testInput.sign); - expected.radians = glm::radians(testInput.radians); - expected.degrees = glm::degrees(testInput.degrees); - expected.step = glm::step(testInput.stepEdge, testInput.stepX); - expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - - expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); - expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + IntrinsicsIntputTestValues testInput; + testInput.bitCount = intDistribution(getRandomEngine()); + testInput.crossLhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.crossRhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampVal = realDistribution(getRandomEngine()); + testInput.clampMin = realDistributionNeg(getRandomEngine()); + testInput.clampMax = realDistributionPos(getRandomEngine()); + testInput.length = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.normalize = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.dotLhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.dotRhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.determinant = float32_t3x3( + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()) + ); + testInput.findMSB = realDistribution(getRandomEngine()); + testInput.findLSB = realDistribution(getRandomEngine()); + testInput.inverse = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.transpose = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulLhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulRhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.minA = realDistribution(getRandomEngine()); + testInput.minB = realDistribution(getRandomEngine()); + testInput.maxA = realDistribution(getRandomEngine()); + testInput.maxB = realDistribution(getRandomEngine()); + testInput.rsqrt = realDistributionPos(getRandomEngine()); + testInput.bitReverse = realDistribution(getRandomEngine()); + testInput.frac = realDistribution(getRandomEngine()); + testInput.mixX = realDistributionNeg(getRandomEngine()); + testInput.mixY = realDistributionPos(getRandomEngine()); + testInput.mixA = realDistributionZeroToOne(getRandomEngine()); + testInput.sign = realDistribution(getRandomEngine()); + testInput.radians = realDistribution(getRandomEngine()); + testInput.degrees = realDistribution(getRandomEngine()); + testInput.stepEdge = realDistribution(getRandomEngine()); + testInput.stepX = realDistribution(getRandomEngine()); + testInput.smoothStepEdge0 = realDistributionNeg(getRandomEngine()); + testInput.smoothStepEdge1 = realDistributionPos(getRandomEngine()); + testInput.smoothStepX = realDistribution(getRandomEngine()); - expected.frac = testInput.frac - std::floor(testInput.frac); - expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); + testInput.bitCountVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.clampValVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampMinVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.clampMaxVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.findMSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.findLSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.minAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.minBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.rsqrtVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.bitReverseVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.fracVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.mixXVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.mixYVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.mixAVec = float32_t3(realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine())); - expected.normalize = glm::normalize(testInput.normalize); - expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); - expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); - expected.clampVec = float32_t3( - glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), - glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), - glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) - ); - expected.findMSBVec = glm::findMSB(testInput.findMSBVec); - expected.findLSBVec = glm::findLSB(testInput.findLSBVec); - expected.minVec = float32_t3( - glm::min(testInput.minAVec.x, testInput.minBVec.x), - glm::min(testInput.minAVec.y, testInput.minBVec.y), - glm::min(testInput.minAVec.z, testInput.minBVec.z) - ); - expected.maxVec = float32_t3( - glm::max(testInput.maxAVec.x, testInput.maxBVec.x), - glm::max(testInput.maxAVec.y, testInput.maxBVec.y), - glm::max(testInput.maxAVec.z, testInput.maxBVec.z) - ); - expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); - expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); - expected.fracVec = float32_t3( - testInput.fracVec.x - std::floor(testInput.fracVec.x), - testInput.fracVec.y - std::floor(testInput.fracVec.y), - testInput.fracVec.z - std::floor(testInput.fracVec.z)); - expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); - expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); - expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); + testInput.signVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.radiansVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.degreesVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepEdgeVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.smoothStepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardN = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardNref = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.refractN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractEta = realDistribution(getRandomEngine()); - expected.signVec = glm::sign(testInput.signVec); - expected.radiansVec = glm::radians(testInput.radiansVec); - expected.degreesVec = glm::degrees(testInput.degreesVec); - expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); - expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); - expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); - expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); - expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); + return testInput; + } - expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); - expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + IntrinsicsTestValues determineExpectedResults(const IntrinsicsIntputTestValues& testInput) override + { + IntrinsicsTestValues expected; + expected.bitCount = glm::bitCount(testInput.bitCount); + expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); + expected.length = glm::length(testInput.length); + expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); + expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); + expected.findMSB = glm::findMSB(testInput.findMSB); + expected.findLSB = glm::findLSB(testInput.findLSB); + expected.min = glm::min(testInput.minA, testInput.minB); + expected.max = glm::max(testInput.maxA, testInput.maxB); + expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); + expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); + expected.sign = glm::sign(testInput.sign); + expected.radians = glm::radians(testInput.radians); + expected.degrees = glm::degrees(testInput.degrees); + expected.step = glm::step(testInput.stepEdge, testInput.stepX); + expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); - expected.mul = reinterpret_cast(mulGlm); - auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); - expected.transpose = reinterpret_cast(transposeGlm); - auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); - expected.inverse = reinterpret_cast(inverseGlm); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("intrinsics.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.frac = testInput.frac - std::floor(testInput.frac); + expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); -private: - inline static constexpr int Iterations = 100u; + expected.normalize = glm::normalize(testInput.normalize); + expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); + expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); + expected.clampVec = float32_t3( + glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), + glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), + glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) + ); + expected.findMSBVec = glm::findMSB(testInput.findMSBVec); + expected.findLSBVec = glm::findLSB(testInput.findLSBVec); + expected.minVec = float32_t3( + glm::min(testInput.minAVec.x, testInput.minBVec.x), + glm::min(testInput.minAVec.y, testInput.minBVec.y), + glm::min(testInput.minAVec.z, testInput.minBVec.z) + ); + expected.maxVec = float32_t3( + glm::max(testInput.maxAVec.x, testInput.maxBVec.x), + glm::max(testInput.maxAVec.y, testInput.maxBVec.y), + glm::max(testInput.maxAVec.z, testInput.maxBVec.z) + ); + expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); + expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); + expected.fracVec = float32_t3( + testInput.fracVec.x - std::floor(testInput.fracVec.x), + testInput.fracVec.y - std::floor(testInput.fracVec.y), + testInput.fracVec.z - std::floor(testInput.fracVec.z)); + expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); + expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); + expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); - void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues cpuTestValues; + expected.signVec = glm::sign(testInput.signVec); + expected.radiansVec = glm::radians(testInput.radiansVec); + expected.degreesVec = glm::degrees(testInput.degreesVec); + expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); + expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); + expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); + expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); + expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); - } + auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); + expected.mul = reinterpret_cast(mulGlm); + auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); + expected.transpose = reinterpret_cast(transposeGlm); + auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); + expected.inverse = reinterpret_cast(inverseGlm); - void performGpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, ITester::TestType testType) + void verifyTestResults(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { - verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testType); - verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testType); - verifyTestValue("length", expectedTestValues.length, testValues.length, testType); - verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testType); - verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testType); - verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testType); - verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testType); - verifyTestValue("min", expectedTestValues.min, testValues.min, testType); - verifyTestValue("max", expectedTestValues.max, testValues.max, testType); - verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testType); - verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testType); - verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testType); - verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testType); - verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testType); - verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testType); - verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); - verifyTestValue("step", expectedTestValues.step, testValues.step, testType); - verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); - verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); - verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); - verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); - verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); + verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testIteration, seed, testType); + verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testIteration, seed, testType); + verifyTestValue("length", expectedTestValues.length, testValues.length, testIteration, seed, testType); + verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testIteration, seed, testType); + verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testIteration, seed, testType); + verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testIteration, seed, testType); + verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testIteration, seed, testType); + verifyTestValue("min", expectedTestValues.min, testValues.min, testIteration, seed, testType); + verifyTestValue("max", expectedTestValues.max, testValues.max, testIteration, seed, testType); + verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testIteration, seed, testType); + verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testIteration, seed, testType); + verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testIteration, seed, testType); + verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testIteration, seed, testType); + verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testIteration, seed, testType); + verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testIteration, seed, testType); + verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testIteration, seed, testType); + verifyTestValue("step", expectedTestValues.step, testValues.step, testIteration, seed, testType); + verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testIteration, seed, testType); + verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testIteration, seed, testType); + verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testIteration, seed, testType); + verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testIteration, seed, testType); + verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testIteration, seed, testType); - verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); - verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); - verifyTestVector3dValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testType); - verifyTestVector3dValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testType); - verifyTestVector3dValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testType); - verifyTestVector3dValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testType); - verifyTestVector3dValue("minVec", expectedTestValues.minVec, testValues.minVec, testType); - verifyTestVector3dValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testType); - verifyTestVector3dValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testType); - verifyTestVector3dValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testType); - verifyTestVector3dValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testType); - verifyTestVector3dValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testType); + verifyTestValue("normalize", expectedTestValues.normalize, testValues.normalize, testIteration, seed, testType); + verifyTestValue("cross", expectedTestValues.cross, testValues.cross, testIteration, seed, testType); + verifyTestValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testIteration, seed, testType); + verifyTestValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testIteration, seed, testType); + verifyTestValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testIteration, seed, testType); + verifyTestValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testIteration, seed, testType); + verifyTestValue("minVec", expectedTestValues.minVec, testValues.minVec, testIteration, seed, testType); + verifyTestValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testIteration, seed, testType); + verifyTestValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testIteration, seed, testType); + verifyTestValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testIteration, seed, testType); + verifyTestValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testIteration, seed, testType); + verifyTestValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testIteration, seed, testType); - verifyTestVector3dValue("signVec", expectedTestValues.signVec, testValues.signVec, testType); - verifyTestVector3dValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testType); - verifyTestVector3dValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testType); - verifyTestVector3dValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testType); - verifyTestVector3dValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testType); - verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); - verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); - verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); - verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); - verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); - verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); - verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); + verifyTestValue("signVec", expectedTestValues.signVec, testValues.signVec, testIteration, seed, testType); + verifyTestValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testIteration, seed, testType); + verifyTestValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testIteration, seed, testType); + verifyTestValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testIteration, seed, testType); + verifyTestValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testIteration, seed, testType); + verifyTestValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testIteration, seed, testType); + verifyTestValue("reflect", expectedTestValues.reflect, testValues.reflect, testIteration, seed, testType); + verifyTestValue("refract", expectedTestValues.refract, testValues.refract, testIteration, seed, testType); + verifyTestValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testIteration, seed, testType); + verifyTestValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testIteration, seed, testType); + verifyTestValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testIteration, seed, testType); + verifyTestValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testIteration, seed, testType); - verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); - verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); - verifyTestMatrix3x3Value("inverse", expectedTestValues.inverse, testValues.inverse, testType); + verifyTestValue("mul", expectedTestValues.mul, testValues.mul, testIteration, seed, testType); + verifyTestValue("transpose", expectedTestValues.transpose, testValues.transpose, testIteration, seed, testType); + verifyTestValue("inverse", expectedTestValues.inverse, testValues.inverse, testIteration, seed, testType); } }; diff --git a/22_CppCompat/CTgmathTester.h b/22_CppCompat/CTgmathTester.h index 63b0e483e..aa6c81d1c 100644 --- a/22_CppCompat/CTgmathTester.h +++ b/22_CppCompat/CTgmathTester.h @@ -3,358 +3,337 @@ #include "nbl/examples/examples.hpp" - #include "app_resources/common.hlsl" -#include "ITester.h" - +#include "nbl/examples/Tester/ITester.h" using namespace nbl; -class CTgmathTester final : public ITester +class CTgmathTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTgmathTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); - std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); +private: + TgmathIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistribution(-100.0f, 100.0f); std::uniform_real_distribution realDistributionSmall(1.0f, 4.0f); std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution coinFlipDistribution(0, 1); - m_logger->log("tgmath.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - TgmathIntputTestValues testInput; - testInput.floor = realDistribution(mt); - testInput.isnan = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::quiet_NaN(); - testInput.isinf = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::infinity(); - testInput.powX = realDistributionSmall(mt); - testInput.powY = realDistributionSmall(mt); - testInput.exp = realDistributionSmall(mt); - testInput.exp2 = realDistributionSmall(mt); - testInput.log = realDistribution(mt); - testInput.log2 = realDistribution(mt); - testInput.absF = realDistribution(mt); - testInput.absI = intDistribution(mt); - testInput.sqrt = realDistribution(mt); - testInput.sin = realDistribution(mt); - testInput.cos = realDistribution(mt); - testInput.tan = realDistribution(mt); - testInput.asin = realDistribution(mt); - testInput.atan = realDistribution(mt); - testInput.sinh = realDistribution(mt); - testInput.cosh = realDistribution(mt); - testInput.tanh = realDistribution(mt); - testInput.asinh = realDistribution(mt); - testInput.acosh = realDistribution(mt); - testInput.atanh = realDistribution(mt); - testInput.atan2X = realDistribution(mt); - testInput.atan2Y = realDistribution(mt); - testInput.acos = realDistribution(mt); - testInput.modf = realDistribution(mt); - testInput.round = realDistribution(mt); - testInput.roundEven = coinFlipDistribution(mt) ? realDistributionSmall(mt) : (static_cast(intDistribution(mt) / 2) + 0.5f); - testInput.trunc = realDistribution(mt); - testInput.ceil = realDistribution(mt); - testInput.fmaX = realDistribution(mt); - testInput.fmaY = realDistribution(mt); - testInput.fmaZ = realDistribution(mt); - testInput.ldexpArg = realDistributionSmall(mt); - testInput.ldexpExp = intDistribution(mt); - testInput.erf = realDistribution(mt); - testInput.erfInv = realDistribution(mt); - - testInput.floorVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isnanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isinfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.powXVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.powYVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.expVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.exp2Vec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.logVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.log2Vec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absFVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absIVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.sqrtVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.cosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.coshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acoshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2XVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2YVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.modfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.ldexpArgVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.ldexpExpVec = float32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.erfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.erfInvVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - - testInput.modfStruct = realDistribution(mt); - testInput.modfStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.frexpStruct = realDistribution(mt); - testInput.frexpStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); + TgmathIntputTestValues testInput; + testInput.floor = realDistribution(getRandomEngine()); + testInput.isnan = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::quiet_NaN(); + testInput.isinf = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::infinity(); + testInput.powX = realDistributionSmall(getRandomEngine()); + testInput.powY = realDistributionSmall(getRandomEngine()); + testInput.exp = realDistributionSmall(getRandomEngine()); + testInput.exp2 = realDistributionSmall(getRandomEngine()); + testInput.log = realDistribution(getRandomEngine()); + testInput.log2 = realDistribution(getRandomEngine()); + testInput.absF = realDistribution(getRandomEngine()); + testInput.absI = intDistribution(getRandomEngine()); + testInput.sqrt = realDistribution(getRandomEngine()); + testInput.sin = realDistribution(getRandomEngine()); + testInput.cos = realDistribution(getRandomEngine()); + testInput.tan = realDistribution(getRandomEngine()); + testInput.asin = realDistribution(getRandomEngine()); + testInput.atan = realDistribution(getRandomEngine()); + testInput.sinh = realDistribution(getRandomEngine()); + testInput.cosh = realDistribution(getRandomEngine()); + testInput.tanh = realDistribution(getRandomEngine()); + testInput.asinh = realDistribution(getRandomEngine()); + testInput.acosh = realDistribution(getRandomEngine()); + testInput.atanh = realDistribution(getRandomEngine()); + testInput.atan2X = realDistribution(getRandomEngine()); + testInput.atan2Y = realDistribution(getRandomEngine()); + testInput.acos = realDistribution(getRandomEngine()); + testInput.modf = realDistribution(getRandomEngine()); + testInput.round = realDistribution(getRandomEngine()); + testInput.roundEven = coinFlipDistribution(getRandomEngine()) ? realDistributionSmall(getRandomEngine()) : (static_cast(intDistribution(getRandomEngine()) / 2) + 0.5f); + testInput.trunc = realDistribution(getRandomEngine()); + testInput.ceil = realDistribution(getRandomEngine()); + testInput.fmaX = realDistribution(getRandomEngine()); + testInput.fmaY = realDistribution(getRandomEngine()); + testInput.fmaZ = realDistribution(getRandomEngine()); + testInput.ldexpArg = realDistributionSmall(getRandomEngine()); + testInput.ldexpExp = intDistribution(getRandomEngine()); + testInput.erf = realDistribution(getRandomEngine()); + testInput.erfInv = realDistribution(getRandomEngine()); + + testInput.floorVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isnanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isinfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.powXVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.powYVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.expVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.exp2Vec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.logVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.log2Vec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absFVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absIVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.sqrtVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.cosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.coshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acoshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2XVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2YVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.modfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.ldexpArgVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.ldexpExpVec = float32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.erfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.erfInvVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + testInput.modfStruct = realDistribution(getRandomEngine()); + testInput.modfStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.frexpStruct = realDistribution(getRandomEngine()); + testInput.frexpStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + return testInput; + } - // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values - TgmathTestValues expected; - expected.floor = std::floor(testInput.floor); - expected.isnan = std::isnan(testInput.isnan); - expected.isinf = std::isinf(testInput.isinf); - expected.pow = std::pow(testInput.powX, testInput.powY); - expected.exp = std::exp(testInput.exp); - expected.exp2 = std::exp2(testInput.exp2); - expected.log = std::log(testInput.log); - expected.log2 = std::log2(testInput.log2); - expected.absF = std::abs(testInput.absF); - expected.absI = std::abs(testInput.absI); - expected.sqrt = std::sqrt(testInput.sqrt); - expected.sin = std::sin(testInput.sin); - expected.cos = std::cos(testInput.cos); - expected.acos = std::acos(testInput.acos); - expected.tan = std::tan(testInput.tan); - expected.asin = std::asin(testInput.asin); - expected.atan = std::atan(testInput.atan); - expected.sinh = std::sinh(testInput.sinh); - expected.cosh = std::cosh(testInput.cosh); - expected.tanh = std::tanh(testInput.tanh); - expected.asinh = std::asinh(testInput.asinh); - expected.acosh = std::acosh(testInput.acosh); - expected.atanh = std::atanh(testInput.atanh); - expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); - expected.erf = std::erf(testInput.erf); + TgmathTestValues determineExpectedResults(const TgmathIntputTestValues& testInput) override + { + // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values + TgmathTestValues expected; + expected.floor = std::floor(testInput.floor); + expected.isnan = std::isnan(testInput.isnan); + expected.isinf = std::isinf(testInput.isinf); + expected.pow = std::pow(testInput.powX, testInput.powY); + expected.exp = std::exp(testInput.exp); + expected.exp2 = std::exp2(testInput.exp2); + expected.log = std::log(testInput.log); + expected.log2 = std::log2(testInput.log2); + expected.absF = std::abs(testInput.absF); + expected.absI = std::abs(testInput.absI); + expected.sqrt = std::sqrt(testInput.sqrt); + expected.sin = std::sin(testInput.sin); + expected.cos = std::cos(testInput.cos); + expected.acos = std::acos(testInput.acos); + expected.tan = std::tan(testInput.tan); + expected.asin = std::asin(testInput.asin); + expected.atan = std::atan(testInput.atan); + expected.sinh = std::sinh(testInput.sinh); + expected.cosh = std::cosh(testInput.cosh); + expected.tanh = std::tanh(testInput.tanh); + expected.asinh = std::asinh(testInput.asinh); + expected.acosh = std::acosh(testInput.acosh); + expected.atanh = std::atanh(testInput.atanh); + expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); + expected.erf = std::erf(testInput.erf); + { + float tmp; + expected.modf = std::modf(testInput.modf, &tmp); + } + expected.round = std::round(testInput.round); + // TODO: uncomment when C++23 + //expected.roundEven = std::roundeven(testInput.roundEven); + // TODO: remove when C++23 + auto roundeven = [](const float& val) -> float { float tmp; - expected.modf = std::modf(testInput.modf, &tmp); - } - expected.round = std::round(testInput.round); - // TODO: uncomment when C++23 - //expected.roundEven = std::roundeven(testInput.roundEven); - // TODO: remove when C++23 - auto roundeven = [](const float& val) -> float + if (std::abs(std::modf(val, &tmp)) == 0.5f) { - float tmp; - if (std::abs(std::modf(val, &tmp)) == 0.5f) - { - int32_t result = static_cast(val); - if (result % 2 != 0) - result >= 0 ? ++result : --result; - return result; - } - - return std::round(val); - }; - expected.roundEven = roundeven(testInput.roundEven); - - expected.trunc = std::trunc(testInput.trunc); - expected.ceil = std::ceil(testInput.ceil); - expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); - expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); - - expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); - - expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); - expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); - - expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); - expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); - expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); - - expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); - expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); - expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); - expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); - expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); - expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); - expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); - expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); - expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); - expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); - expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); - expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); - expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); - expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); - expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); - expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); - expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); - expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); - expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); - expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); - expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); - { - float tmp; - expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); - } - expected.roundVec = float32_t3( - std::round(testInput.roundVec.x), - std::round(testInput.roundVec.y), - std::round(testInput.roundVec.z) - ); - // TODO: uncomment when C++23 - //expected.roundEven = float32_t( - // std::roundeven(testInput.roundEvenVec.x), - // std::roundeven(testInput.roundEvenVec.y), - // std::roundeven(testInput.roundEvenVec.z) - // ); - // TODO: remove when C++23 - expected.roundEvenVec = float32_t3( - roundeven(testInput.roundEvenVec.x), - roundeven(testInput.roundEvenVec.y), - roundeven(testInput.roundEvenVec.z) - ); - - expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); - expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); - expected.fmaVec = float32_t3( - std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), - std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), - std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) - ); - expected.ldexpVec = float32_t3( - std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), - std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), - std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) - ); - - { - ModfOutput expectedModfStructOutput; - expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); - expected.modfStruct = expectedModfStructOutput; - - ModfOutput expectedModfStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); - expected.modfStructVec = expectedModfStructOutputVec; - } - - { - FrexpOutput expectedFrexpStructOutput; - expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); - expected.frexpStruct = expectedFrexpStructOutput; - - FrexpOutput expectedFrexpStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); - expected.frexpStructVec = expectedFrexpStructOutputVec; - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); + int32_t result = static_cast(val); + if (result % 2 != 0) + result >= 0 ? ++result : --result; + return result; + } + + return std::round(val); + }; + expected.roundEven = roundeven(testInput.roundEven); + + expected.trunc = std::trunc(testInput.trunc); + expected.ceil = std::ceil(testInput.ceil); + expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); + expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); + + expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); + + expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); + expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); + + expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); + expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); + expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); + + expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); + expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); + expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); + expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); + expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); + expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); + expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); + expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); + expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); + expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); + expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); + expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); + expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); + expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); + expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); + expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); + expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); + expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); + expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); + expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); + expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); + { + float tmp; + expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); } - m_logger->log("tgmath.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.roundVec = float32_t3( + std::round(testInput.roundVec.x), + std::round(testInput.roundVec.y), + std::round(testInput.roundVec.z) + ); + // TODO: uncomment when C++23 + //expected.roundEven = float32_t( + // std::roundeven(testInput.roundEvenVec.x), + // std::roundeven(testInput.roundEvenVec.y), + // std::roundeven(testInput.roundEvenVec.z) + // ); + // TODO: remove when C++23 + expected.roundEvenVec = float32_t3( + roundeven(testInput.roundEvenVec.x), + roundeven(testInput.roundEvenVec.y), + roundeven(testInput.roundEvenVec.z) + ); + + expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); + expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); + expected.fmaVec = float32_t3( + std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), + std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), + std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) + ); + expected.ldexpVec = float32_t3( + std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), + std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), + std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) + ); -private: - inline static constexpr int Iterations = 100u; + { + ModfOutput expectedModfStructOutput; + expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); + expected.modfStruct = expectedModfStructOutput; + + ModfOutput expectedModfStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); + expected.modfStructVec = expectedModfStructOutputVec; + } - void performCpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues cpuTestValues; - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); - - } + { + FrexpOutput expectedFrexpStructOutput; + expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); + expected.frexpStruct = expectedFrexpStructOutput; + + FrexpOutput expectedFrexpStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); + expected.frexpStructVec = expectedFrexpStructOutputVec; + } - void performGpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, ITester::TestType testType) + void verifyTestResults(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { // TODO: figure out input for functions: sinh, cosh so output isn't a crazy low number // very low numbers generate comparison errors - verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testType); - verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testType); - verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testType); - verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testType); - verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testType); - verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testType); - verifyTestValue("log", expectedTestValues.log, testValues.log, testType); - verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testType); - verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testType); - verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testType); - verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testType); - verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testType); - verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testType); - verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testType); - verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testType); - verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testType); - verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testType); - //verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testType); - //verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testType); - verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testType); - verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testType); - verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testType); - verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testType); - verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testType); - verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testType); - verifyTestValue("round", expectedTestValues.round, testValues.round, testType); - verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testType); - verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testType); - verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testType); - verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testType); - verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testType); - verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testType); - //verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testType); - - verifyTestVector3dValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testType); - verifyTestVector3dValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testType); - verifyTestVector3dValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testType); - verifyTestVector3dValue("powVec", expectedTestValues.powVec, testValues.powVec, testType); - verifyTestVector3dValue("expVec", expectedTestValues.expVec, testValues.expVec, testType); - verifyTestVector3dValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testType); - verifyTestVector3dValue("logVec", expectedTestValues.logVec, testValues.logVec, testType); - verifyTestVector3dValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testType); - verifyTestVector3dValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testType); - verifyTestVector3dValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testType); - verifyTestVector3dValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testType); - verifyTestVector3dValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testType); - verifyTestVector3dValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testType); - verifyTestVector3dValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testType); - verifyTestVector3dValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testType); - verifyTestVector3dValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testType); - verifyTestVector3dValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testType); - verifyTestVector3dValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testType); - verifyTestVector3dValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testType); - verifyTestVector3dValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testType); - verifyTestVector3dValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testType); - verifyTestVector3dValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testType); - verifyTestVector3dValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testType); - verifyTestVector3dValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testType); - //verifyTestVector3dValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testType); - //verifyTestVector3dValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testType); - verifyTestVector3dValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testType); - verifyTestVector3dValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testType); - verifyTestVector3dValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testType); - verifyTestVector3dValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testType); - verifyTestVector3dValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testType); - verifyTestVector3dValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testType); - //verifyTestVector3dValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testType); + verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testIteration, seed, testType); + verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testIteration, seed, testType); + verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testIteration, seed, testType); + verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testIteration, seed, testType); + verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testIteration, seed, testType); + verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testIteration, seed, testType); + verifyTestValue("log", expectedTestValues.log, testValues.log, testIteration, seed, testType); + verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testIteration, seed, testType); + verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testIteration, seed, testType); + verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testIteration, seed, testType); + verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testIteration, seed, testType); + verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testIteration, seed, testType); + verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testIteration, seed, testType); + verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testIteration, seed, testType); + verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testIteration, seed, testType); + verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testIteration, seed, testType); + verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testIteration, seed, testType); + //verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testIteration, seed, testType); + //verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testIteration, seed, testType); + verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testIteration, seed, testType); + verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testIteration, seed, testType); + verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testIteration, seed, testType); + verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testIteration, seed, testType); + verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testIteration, seed, testType); + verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testIteration, seed, testType); + verifyTestValue("round", expectedTestValues.round, testValues.round, testIteration, seed, testType); + verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testIteration, seed, testType); + verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testIteration, seed, testType); + verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testIteration, seed, testType); + verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testIteration, seed, testType); + verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testIteration, seed, testType); + verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testIteration, seed, testType); + //verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testIteration, seed, testType); + + verifyTestValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testIteration, seed, testType); + verifyTestValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testIteration, seed, testType); + verifyTestValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testIteration, seed, testType); + verifyTestValue("powVec", expectedTestValues.powVec, testValues.powVec, testIteration, seed, testType); + verifyTestValue("expVec", expectedTestValues.expVec, testValues.expVec, testIteration, seed, testType); + verifyTestValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testIteration, seed, testType); + verifyTestValue("logVec", expectedTestValues.logVec, testValues.logVec, testIteration, seed, testType); + verifyTestValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testIteration, seed, testType); + verifyTestValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testIteration, seed, testType); + verifyTestValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testIteration, seed, testType); + verifyTestValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testIteration, seed, testType); + verifyTestValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testIteration, seed, testType); + verifyTestValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testIteration, seed, testType); + verifyTestValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testIteration, seed, testType); + verifyTestValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testIteration, seed, testType); + verifyTestValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testIteration, seed, testType); + verifyTestValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testIteration, seed, testType); + verifyTestValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testIteration, seed, testType); + verifyTestValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testIteration, seed, testType); + verifyTestValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testIteration, seed, testType); + verifyTestValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testIteration, seed, testType); + verifyTestValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testIteration, seed, testType); + verifyTestValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testIteration, seed, testType); + verifyTestValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testIteration, seed, testType); + //verifyTestValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testIteration, seed, testType); + //verifyTestValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testIteration, seed, testType); + verifyTestValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testIteration, seed, testType); + verifyTestValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testIteration, seed, testType); + verifyTestValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testIteration, seed, testType); + verifyTestValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testIteration, seed, testType); + verifyTestValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testIteration, seed, testType); + verifyTestValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testIteration, seed, testType); + //verifyTestValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testIteration, seed, testType); // verify output of struct producing functions - verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testType); - verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testType); - - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testType); - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testType); + verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testIteration, seed, testType); + verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testIteration, seed, testType); + verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testIteration, seed, testType); + verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testIteration, seed, testType); + + verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testIteration, seed, testType); + verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testIteration, seed, testType); + verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testIteration, seed, testType); + verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testIteration, seed, testType); } }; diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h deleted file mode 100644 index 39ceb8141..000000000 --- a/22_CppCompat/ITester.h +++ /dev/null @@ -1,337 +0,0 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ - - -#include "nbl/examples/examples.hpp" - -#include "app_resources/common.hlsl" -#include "nbl/asset/metadata/CHLSLMetadata.h" - - -using namespace nbl; - -class ITester -{ -public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - - struct PipelineSetupData - { - std::string testShaderPath; - - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - template - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - auto shaderStage = ESS_UNKNOWN; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty() || assetBundle.getAssetType() != asset::IAsset::ET_SHADER) - { - logFail("Could not load shader!"); - assert(0); - } - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - const auto hlslMetadata = static_cast(assetBundle.getMetadata()); - shaderStage = hlslMetadata->shaderStages->front(); - - auto* compilerSet = m_assetMgr->getCompilerSet(); - - asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = shaderStage; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - shader = compilerSet->compileToSPIRV(source.get(), options); - } - - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - constexpr size_t BufferSize = sizeof(InputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - constexpr size_t BufferSize = sizeof(OutputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - enum class TestType - { - CPU, - GPU - }; - - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal) - double(testVal)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << testVal << " expected value: " << expectedVal << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestVector3dValue(const std::string& memberName, const nbl::hlsl::vector& expectedVal, const nbl::hlsl::vector& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal.x) - double(testVal.x)) <= MaxAllowedError && - std::abs(double(expectedVal.y) - double(testVal.y)) <= MaxAllowedError && - std::abs(double(expectedVal.z) - double(testVal.z)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << - testVal.x << ' ' << testVal.y << ' ' << testVal.z << - " expected value: " << expectedVal.x << ' ' << expectedVal.y << ' ' << expectedVal.z << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestMatrix3x3Value(const std::string& memberName, const nbl::hlsl::matrix& expectedVal, const nbl::hlsl::matrix& testVal, const TestType testType) - { - for (int i = 0; i < 3; ++i) - { - auto expectedValRow = expectedVal[i]; - auto testValRow = testVal[i]; - verifyTestVector3dValue(memberName, expectedValRow, testValRow, testType); - } - } - -protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - template - OutputStruct dispatch(const InputStruct& input) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); - m_device->waitIdle(); - - return output; - } - -private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } -}; - -#endif \ No newline at end of file diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl index dc3ff5fcd..7fed20bbe 100644 --- a/22_CppCompat/app_resources/common.hlsl +++ b/22_CppCompat/app_resources/common.hlsl @@ -208,82 +208,6 @@ struct TgmathTestValues ModfOutput modfStructVec; FrexpOutput frexpStruct; FrexpOutput frexpStructVec; - - void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) - { - floor = nbl::hlsl::floor(input.floor); - isnan = nbl::hlsl::isnan(input.isnan); - isinf = nbl::hlsl::isinf(input.isinf); - pow = nbl::hlsl::pow(input.powX, input.powY); - exp = nbl::hlsl::exp(input.exp); - exp2 = nbl::hlsl::exp2(input.exp2); - log = nbl::hlsl::log(input.log); - log2 = nbl::hlsl::log2(input.log2); - absF = nbl::hlsl::abs(input.absF); - absI = nbl::hlsl::abs(input.absI); - sqrt = nbl::hlsl::sqrt(input.sqrt); - sin = nbl::hlsl::sin(input.sin); - cos = nbl::hlsl::cos(input.cos); - tan = nbl::hlsl::tan(input.tan); - asin = nbl::hlsl::asin(input.asin); - atan = nbl::hlsl::atan(input.atan); - sinh = nbl::hlsl::sinh(input.sinh); - cosh = nbl::hlsl::cosh(input.cosh); - tanh = nbl::hlsl::tanh(input.tanh); - asinh = nbl::hlsl::asinh(input.asinh); - acosh = nbl::hlsl::acosh(input.acosh); - atanh = nbl::hlsl::atanh(input.atanh); - atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); - erf = nbl::hlsl::erf(input.erf); - erfInv = nbl::hlsl::erfInv(input.erfInv); - acos = nbl::hlsl::acos(input.acos); - modf = nbl::hlsl::modf(input.modf); - round = nbl::hlsl::round(input.round); - roundEven = nbl::hlsl::roundEven(input.roundEven); - trunc = nbl::hlsl::trunc(input.trunc); - ceil = nbl::hlsl::ceil(input.ceil); - fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); - ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); - - floorVec = nbl::hlsl::floor(input.floorVec); - isnanVec = nbl::hlsl::isnan(input.isnanVec); - isinfVec = nbl::hlsl::isinf(input.isinfVec); - powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); - expVec = nbl::hlsl::exp(input.expVec); - exp2Vec = nbl::hlsl::exp2(input.exp2Vec); - logVec = nbl::hlsl::log(input.logVec); - log2Vec = nbl::hlsl::log2(input.log2Vec); - absFVec = nbl::hlsl::abs(input.absFVec); - absIVec = nbl::hlsl::abs(input.absIVec); - sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); - sinVec = nbl::hlsl::sin(input.sinVec); - cosVec = nbl::hlsl::cos(input.cosVec); - tanVec = nbl::hlsl::tan(input.tanVec); - asinVec = nbl::hlsl::asin(input.asinVec); - atanVec = nbl::hlsl::atan(input.atanVec); - sinhVec = nbl::hlsl::sinh(input.sinhVec); - coshVec = nbl::hlsl::cosh(input.coshVec); - tanhVec = nbl::hlsl::tanh(input.tanhVec); - asinhVec = nbl::hlsl::asinh(input.asinhVec); - acoshVec = nbl::hlsl::acosh(input.acoshVec); - atanhVec = nbl::hlsl::atanh(input.atanhVec); - atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); - acosVec = nbl::hlsl::acos(input.acosVec); - modfVec = nbl::hlsl::modf(input.modfVec); - roundVec = nbl::hlsl::round(input.roundVec); - roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); - truncVec = nbl::hlsl::trunc(input.truncVec); - ceilVec = nbl::hlsl::ceil(input.ceilVec); - fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); - ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); - erfVec = nbl::hlsl::erf(input.erfVec); - erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); - - modfStruct = nbl::hlsl::modfStruct(input.modfStruct); - modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); - frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); - frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); - } }; struct IntrinsicsIntputTestValues @@ -416,58 +340,140 @@ struct IntrinsicsTestValues spirv::SubBorrowOutput subBorrow; spirv::AddCarryOutput addCarryVec; spirv::SubBorrowOutput subBorrowVec; +}; - void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) +struct IntrinsicsTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input, NBL_REF_ARG(IntrinsicsTestValues) output) { - bitCount = nbl::hlsl::bitCount(input.bitCount); - cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); - clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); - length = nbl::hlsl::length(input.length); - normalize = nbl::hlsl::normalize(input.normalize); - dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); - determinant = nbl::hlsl::determinant(input.determinant); - findMSB = nbl::hlsl::findMSB(input.findMSB); - findLSB = nbl::hlsl::findLSB(input.findLSB); - inverse = nbl::hlsl::inverse(input.inverse); - transpose = nbl::hlsl::transpose(input.transpose); - mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); + output.bitCount = nbl::hlsl::bitCount(input.bitCount); + output.cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); + output.clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); + output.length = nbl::hlsl::length(input.length); + output.normalize = nbl::hlsl::normalize(input.normalize); + output.dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); + output.determinant = nbl::hlsl::determinant(input.determinant); + output.findMSB = nbl::hlsl::findMSB(input.findMSB); + output.findLSB = nbl::hlsl::findLSB(input.findLSB); + output.inverse = nbl::hlsl::inverse(input.inverse); + output.transpose = nbl::hlsl::transpose(input.transpose); + output.mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); // TODO: fix min and max - min = nbl::hlsl::min(input.minA, input.minB); - max = nbl::hlsl::max(input.maxA, input.maxB); - rsqrt = nbl::hlsl::rsqrt(input.rsqrt); - bitReverse = nbl::hlsl::bitReverse(input.bitReverse); - frac = nbl::hlsl::fract(input.frac); - mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); - sign = nbl::hlsl::sign(input.sign); - radians = nbl::hlsl::radians(input.radians); - degrees = nbl::hlsl::degrees(input.degrees); - step = nbl::hlsl::step(input.stepEdge, input.stepX); - smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); - - bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); - clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); - findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); - findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); + output.min = nbl::hlsl::min(input.minA, input.minB); + output.max = nbl::hlsl::max(input.maxA, input.maxB); + output.rsqrt = nbl::hlsl::rsqrt(input.rsqrt); + output.bitReverse = nbl::hlsl::bitReverse(input.bitReverse); + output.frac = nbl::hlsl::fract(input.frac); + output.mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); + output.sign = nbl::hlsl::sign(input.sign); + output.radians = nbl::hlsl::radians(input.radians); + output.degrees = nbl::hlsl::degrees(input.degrees); + output.step = nbl::hlsl::step(input.stepEdge, input.stepX); + output.smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); + + output.bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); + output.clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); + output.findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); + output.findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); // TODO: fix min and max - minVec = nbl::hlsl::min(input.minAVec, input.minBVec); - maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); - rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); - bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); - fracVec = nbl::hlsl::fract(input.fracVec); - mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); - - signVec = nbl::hlsl::sign(input.signVec); - radiansVec = nbl::hlsl::radians(input.radiansVec); - degreesVec = nbl::hlsl::degrees(input.degreesVec); - stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); - smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); - faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); - reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); - refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); - addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); - subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); - addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); - subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + output.minVec = nbl::hlsl::min(input.minAVec, input.minBVec); + output.maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); + output.rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); + output.bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); + output.fracVec = nbl::hlsl::fract(input.fracVec); + output.mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); + + output.signVec = nbl::hlsl::sign(input.signVec); + output.radiansVec = nbl::hlsl::radians(input.radiansVec); + output.degreesVec = nbl::hlsl::degrees(input.degreesVec); + output.stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); + output.smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); + output.faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); + output.reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); + output.refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); + output.addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); + output.subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); + output.addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); + output.subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + } +}; + +struct TgmathTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(TgmathIntputTestValues) input, NBL_REF_ARG(TgmathTestValues) output) + { + output.floor = nbl::hlsl::floor(input.floor); + output.isnan = nbl::hlsl::isnan(input.isnan); + output.isinf = nbl::hlsl::isinf(input.isinf); + output.pow = nbl::hlsl::pow(input.powX, input.powY); + output.exp = nbl::hlsl::exp(input.exp); + output.exp2 = nbl::hlsl::exp2(input.exp2); + output.log = nbl::hlsl::log(input.log); + output.log2 = nbl::hlsl::log2(input.log2); + output.absF = nbl::hlsl::abs(input.absF); + output.absI = nbl::hlsl::abs(input.absI); + output.sqrt = nbl::hlsl::sqrt(input.sqrt); + output.sin = nbl::hlsl::sin(input.sin); + output.cos = nbl::hlsl::cos(input.cos); + output.tan = nbl::hlsl::tan(input.tan); + output.asin = nbl::hlsl::asin(input.asin); + output.atan = nbl::hlsl::atan(input.atan); + output.sinh = nbl::hlsl::sinh(input.sinh); + output.cosh = nbl::hlsl::cosh(input.cosh); + output.tanh = nbl::hlsl::tanh(input.tanh); + output.asinh = nbl::hlsl::asinh(input.asinh); + output.acosh = nbl::hlsl::acosh(input.acosh); + output.atanh = nbl::hlsl::atanh(input.atanh); + output.atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); + output.erf = nbl::hlsl::erf(input.erf); + output.erfInv = nbl::hlsl::erfInv(input.erfInv); + output.acos = nbl::hlsl::acos(input.acos); + output.modf = nbl::hlsl::modf(input.modf); + output.round = nbl::hlsl::round(input.round); + output.roundEven = nbl::hlsl::roundEven(input.roundEven); + output.trunc = nbl::hlsl::trunc(input.trunc); + output.ceil = nbl::hlsl::ceil(input.ceil); + output.fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); + output.ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); + + output.floorVec = nbl::hlsl::floor(input.floorVec); + output.isnanVec = nbl::hlsl::isnan(input.isnanVec); + output.isinfVec = nbl::hlsl::isinf(input.isinfVec); + output.powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); + output.expVec = nbl::hlsl::exp(input.expVec); + output.exp2Vec = nbl::hlsl::exp2(input.exp2Vec); + output.logVec = nbl::hlsl::log(input.logVec); + output.log2Vec = nbl::hlsl::log2(input.log2Vec); + output.absFVec = nbl::hlsl::abs(input.absFVec); + output.absIVec = nbl::hlsl::abs(input.absIVec); + output.sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); + output.sinVec = nbl::hlsl::sin(input.sinVec); + output.cosVec = nbl::hlsl::cos(input.cosVec); + output.tanVec = nbl::hlsl::tan(input.tanVec); + output.asinVec = nbl::hlsl::asin(input.asinVec); + output.atanVec = nbl::hlsl::atan(input.atanVec); + output.sinhVec = nbl::hlsl::sinh(input.sinhVec); + output.coshVec = nbl::hlsl::cosh(input.coshVec); + output.tanhVec = nbl::hlsl::tanh(input.tanhVec); + output.asinhVec = nbl::hlsl::asinh(input.asinhVec); + output.acoshVec = nbl::hlsl::acosh(input.acoshVec); + output.atanhVec = nbl::hlsl::atanh(input.atanhVec); + output.atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); + output.acosVec = nbl::hlsl::acos(input.acosVec); + output.modfVec = nbl::hlsl::modf(input.modfVec); + output.roundVec = nbl::hlsl::round(input.roundVec); + output.roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); + output.truncVec = nbl::hlsl::trunc(input.truncVec); + output.ceilVec = nbl::hlsl::ceil(input.ceilVec); + output.fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); + output.ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); + output.erfVec = nbl::hlsl::erf(input.erfVec); + output.erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); + + output.modfStruct = nbl::hlsl::modfStruct(input.modfStruct); + output.modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); + output.frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); + output.frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); } }; diff --git a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl index df7cef1cf..5fe3b4c20 100644 --- a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl +++ b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + IntrinsicsTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/app_resources/tgmathTest.comp.hlsl b/22_CppCompat/app_resources/tgmathTest.comp.hlsl index 5d93ffb64..6115eebc6 100644 --- a/22_CppCompat/app_resources/tgmathTest.comp.hlsl +++ b/22_CppCompat/app_resources/tgmathTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TgmathTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 70c8d7b3a..9bfcbb894 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -59,25 +59,33 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat if (!asset_base_t::onAppInitialized(std::move(system))) return false; - ITester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - { - CTgmathTester tgmathTester; + CTgmathTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; - tgmathTester.setupPipeline(pplnSetupData); - tgmathTester.performTests(); + + CTgmathTester tgmathTester(4); + tgmathTester.setupPipeline(pplnSetupData); + tgmathTester.performTestsAndVerifyResults(); } { - CIntrinsicsTester intrinsicsTester; + CIntrinsicsTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/intrinsicsTest.comp.hlsl"; - intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTests(); + + CIntrinsicsTester intrinsicsTester(4); + intrinsicsTester.setupPipeline(pplnSetupData); + intrinsicsTester.performTestsAndVerifyResults(); } m_queue = m_device->getQueue(0, 0); From 2a7a800195f945981ce8ade4f07c31f14925cfb5 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 17 Dec 2025 18:36:53 +0100 Subject: [PATCH 106/219] Implemented relative approx compare --- 22_CppCompat/CIntrinsicsTester.h | 18 ++++++------ 22_CppCompat/main.cpp | 2 ++ 64_EmulatedFloatTest/main.cpp | 1 + common/include/nbl/examples/Tester/ITester.h | 31 ++++++++++++++++++-- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index c92df0079..dfc1ab5e0 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -204,8 +204,8 @@ class CIntrinsicsTester final : public ITester&& system) override { + ieee754::isZero(-0.0f); + // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index a4f177f16..74155465a 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -872,6 +872,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso EmulatedFloat64TestOutput output; // cpu validation + testValInfo.expectedTestValues.additionVal = 0; output.cpuTestsSucceed = compareEmulatedFloat64TestValues(testValInfo.expectedTestValues, cpuTestValues); // gpu validation diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 0027b8b70..c5869151d 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -4,9 +4,23 @@ #include #include #include +#include using namespace nbl; +#include +template // TODO: require to be float +struct RelativeFloatingPointComparator +{ + bool operator()(const T a, const T b, const T epsilon) + { + if (ieee754::isSubnormal(a) && ieee754::isSubnormal(b)) + return true; + + return max(abs(a / b), abs(b / a)) <= 1.f + epsilon; + } +}; + template class ITester { @@ -306,9 +320,10 @@ class ITester } template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType) + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference = 0.0) { - if (expectedVal == testVal) + if (compareTestValues(expectedVal, testVal, maxAllowedDifference)) return; std::stringstream ss; @@ -376,6 +391,18 @@ class ITester m_mersenneTwister = std::mt19937(m_seed); } + template requires concepts::IntegralLikeScalar || concepts::IntegralLikeVectorial || (concepts::Matricial && concepts::IntegralLikeScalar::scalar_type>) + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) + { + // no difference allowed for integers + return lhs == rhs; + } + template requires concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial || (concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) + { + return nbl::hlsl::testing::relativeApproxCompare(lhs, rhs, maxAllowedDifference); + } + const size_t m_testIterationCount; static constexpr size_t m_WorkgroupSize = 128u; // seed will change after every call to performTestsAndVerifyResults() From ab4ae7d2ac92030437477e3172866804587b6c14 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 17 Dec 2025 19:43:45 +0100 Subject: [PATCH 107/219] Testers now save logs to files --- 14_Mortons/main.cpp | 4 +-- 22_CppCompat/main.cpp | 10 +++---- 64_EmulatedFloatTest/main.cpp | 30 ++++++++++++++------ common/include/nbl/examples/Tester/ITester.h | 24 +++++++--------- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index a36db8a19..8dc0d0146 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -48,7 +48,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp CTester mortonTester(4); // 4 * 128 = 512 tests mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTestsAndVerifyResults(); + mortonTester.performTestsAndVerifyResults("MortonTestLog.txt"); } { @@ -63,7 +63,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp CTester2 mortonTester2(4); mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); - mortonTester2.performTestsAndVerifyResults(); + mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt"); } return true; diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 440c003b3..b2a22d0e3 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -53,8 +53,6 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat bool onAppInitialized(smart_refctd_ptr&& system) override { - ieee754::isZero(-0.0f); - // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; @@ -71,9 +69,9 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; - CTgmathTester tgmathTester(4); + CTgmathTester tgmathTester(8); tgmathTester.setupPipeline(pplnSetupData); - tgmathTester.performTestsAndVerifyResults(); + tgmathTester.performTestsAndVerifyResults("TgmathTestLog.txt"); } { CIntrinsicsTester::PipelineSetupData pplnSetupData; @@ -85,9 +83,9 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/intrinsicsTest.comp.hlsl"; - CIntrinsicsTester intrinsicsTester(4); + CIntrinsicsTester intrinsicsTester(8); intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTestsAndVerifyResults(); + intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt"); } m_queue = m_device->getQueue(0, 0); diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index 74155465a..f3a4a8fd3 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -42,13 +42,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // since emulated_float64_t rounds to zero std::fesetround(FE_TOWARDZERO); - // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - - // In contrast to fences, we just need one semaphore to rule all dispatches + return true; } @@ -97,10 +95,14 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso auto printOnFailure = [this](EmulatedFloatTestDevice device) { + std::string errorMsgPrefix = ""; if (device == EmulatedFloatTestDevice::CPU) - m_logger->log("CPU test fail:", ILogger::ELL_ERROR); + errorMsgPrefix = "CPU test fail:"; else - m_logger->log("GPU test fail:", ILogger::ELL_ERROR); + errorMsgPrefix = "GPU test fail:"; + + m_logger->log(errorMsgPrefix.c_str(), ILogger::ELL_ERROR); + m_logFile << errorMsgPrefix << '\n'; }; auto printOnArithmeticFailure = [this](const char* valName, uint64_t expectedValue, uint64_t testValue, uint64_t a, uint64_t b) @@ -121,8 +123,9 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso ss << std::bitset<64>(testValue) << " - testValue bit pattern \n"; m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logFile << ss.str() << '\n'; - std::cout << "ULP error: " << std::max(expectedValue, testValue) - std::min(expectedValue, testValue) << "\n\n"; + //std::cout << "ULP error: " << std::max(expectedValue, testValue) - std::min(expectedValue, testValue) << "\n\n"; }; @@ -133,7 +136,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso auto printOnComparisonFailure = [this](const char* valName, int expectedValue, int testValue, double a, double b) { - m_logger->log("for input values: A = %f B = %f", ILogger::ELL_ERROR, a, b); + std::string inputValuesStr = std::string("for input values: A = ") + std::to_string(a) + std::string(" B = ") + std::to_string(b); + + m_logger->log(inputValuesStr.c_str() , ILogger::ELL_ERROR); + m_logFile << inputValuesStr << '\n'; std::stringstream ss; ss << valName << " not equal!"; @@ -141,6 +147,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso ss << "\ntest value: " << std::boolalpha << bool(testValue); m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logFile << ss.str() << '\n'; }; if (calcULPError(expectedValues.int32CreateVal, testValues.int32CreateVal) > 1u) @@ -438,6 +445,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso m_logger->log("Correct GPU determinated values!", ILogger::ELL_PERFORMANCE); }; + m_logFile.open("EmulatedFloatTestLog.txt", std::ios::out | std::ios::trunc); + if (!m_logFile.is_open()) + m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); + printTestOutput("emulatedFloat64RandomValuesTest", emulatedFloat64RandomValuesTest(submitter)); printTestOutput("emulatedFloat64RandomValuesTestContrastingExponents", emulatedFloat64RandomValuesTestContrastingExponents(submitter)); printTestOutput("emulatedFloat64NegAndPosZeroTest", emulatedFloat64NegAndPosZeroTest(submitter)); @@ -450,6 +461,8 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso printTestOutput("emulatedFloat64BNaNTest", emulatedFloat64BNaNTest(submitter)); printTestOutput("emulatedFloat64BInfTest", emulatedFloat64OneValIsZeroTest(submitter)); printTestOutput("emulatedFloat64BNegInfTest", emulatedFloat64OneValIsNegZeroTest(submitter)); + + m_logFile.close(); } template @@ -872,7 +885,6 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso EmulatedFloat64TestOutput output; // cpu validation - testValInfo.expectedTestValues.additionVal = 0; output.cpuTestsSucceed = compareEmulatedFloat64TestValues(testValInfo.expectedTestValues, cpuTestValues); // gpu validation @@ -1172,6 +1184,8 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); return false; } + + std::ofstream m_logFile; }; NBL_MAIN_FUNC(CompatibilityTest) \ No newline at end of file diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index c5869151d..560ca562b 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -9,17 +9,6 @@ using namespace nbl; #include -template // TODO: require to be float -struct RelativeFloatingPointComparator -{ - bool operator()(const T a, const T b, const T epsilon) - { - if (ieee754::isSubnormal(a) && ieee754::isSubnormal(b)) - return true; - - return max(abs(a / b), abs(b / a)) <= 1.f + epsilon; - } -}; template class ITester @@ -193,8 +182,12 @@ class ITester m_queue = m_device->getQueue(m_queueFamily, 0); } - void performTestsAndVerifyResults() + void performTestsAndVerifyResults(const std::string& logFileName) { + m_logFile.open(logFileName, std::ios::out | std::ios::trunc); + if (!m_logFile.is_open()) + m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); + core::vector inputTestValues; core::vector exceptedTestResults; @@ -220,6 +213,8 @@ class ITester m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); reloadSeed(); + + m_logFile.close(); } virtual ~ITester() @@ -341,6 +336,7 @@ class ITester ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + m_logFile << ss.str() << '\n'; } private: @@ -391,10 +387,9 @@ class ITester m_mersenneTwister = std::mt19937(m_seed); } - template requires concepts::IntegralLikeScalar || concepts::IntegralLikeVectorial || (concepts::Matricial && concepts::IntegralLikeScalar::scalar_type>) + template bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) { - // no difference allowed for integers return lhs == rhs; } template requires concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial || (concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) @@ -408,6 +403,7 @@ class ITester // seed will change after every call to performTestsAndVerifyResults() std::mt19937 m_mersenneTwister; uint32_t m_seed; + std::ofstream m_logFile; }; #endif \ No newline at end of file From 1c6458d81b83aea176ac7ebda7450a9b395a85bd Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 17 Dec 2025 22:23:10 +0300 Subject: [PATCH 108/219] A lot more debuggability, and: - Camera movement is disabled correctly - Hacked ViewManipulate to use for the cube itself - Added a storage buffer for debugging and getting stuff from GPU to CPU - Most importantly, disabled skew, used TRS for that - Random OBB buttons - Detection of mismatch of silhouette vertices (between slow more correct algo vs fast LUT based algo) --- .../app_resources/hlsl/Drawing.hlsl | 172 +++++ .../hlsl/SolidAngleVis.frag.hlsl | 644 +++++++++--------- .../app_resources/hlsl/common.hlsl | 49 +- .../app_resources/hlsl/utils.hlsl | 23 + 72_SolidAngleVisualizer/include/transform.hpp | 73 +- 72_SolidAngleVisualizer/main.cpp | 375 ++++++++-- .../include/nbl/examples/cameras/CCamera.hpp | 5 + 7 files changed, 939 insertions(+), 402 deletions(-) create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl new file mode 100644 index 000000000..c3cb5befa --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -0,0 +1,172 @@ +#ifndef _DEBUG_HLSL_ +#define _DEBUG_HLSL_ +#include "common.hlsl" + +float2 sphereToCircle(float3 spherePoint) +{ + if (spherePoint.z >= 0.0f) + { + return spherePoint.xy * CIRCLE_RADIUS; + } + else + { + float r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float uv2Plus1 = r2 + 1.0f; + return (spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS; + } +} + +float4 drawGreatCircleArc(float3 fragPos, float3 points[2], int visibility, float aaWidth) +{ + if (visibility == 0) return float4(0,0,0,0); + + float3 v0 = normalize(points[0]); + float3 v1 = normalize(points[1]); + float3 p = normalize(fragPos); + + float3 arcNormal = normalize(cross(v0, v1)); + float dist = abs(dot(p, arcNormal)); + + float dotMid = dot(v0, v1); + bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); + + if (!onArc) return float4(0,0,0,0); + + float avgDepth = (length(points[0]) + length(points[1])) * 0.5f; + float depthScale = 3.0f / avgDepth; + + float baseWidth = (visibility == 1) ? 0.01f : 0.005f; + float width = min(baseWidth * depthScale, 0.02f); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + float4 edgeColor = (visibility == 1) ? + float4(0.0f, 0.5f, 1.0f, 1.0f) : + float4(1.0f, 0.0f, 0.0f, 1.0f); + + float intensity = (visibility == 1) ? 1.0f : 0.5f; + return edgeColor * alpha * intensity; +} + +float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) +{ + float4 color = float4(0,0,0,0); + float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); + + for (int i = 0; i < 12; i++) + { + if ((silEdgeMask & (1u << i)) == 0) + { + int2 edge = allEdges[i]; + float3 edgePoints[2] = { corners[edge.x], corners[edge.y] }; + float4 edgeContribution = drawGreatCircleArc(spherePos, edgePoints, 1, aaWidth); + color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + } + } + return color; +} + +float4 drawCorners(float3 spherePos, float2 p, float aaWidth) +{ + float4 color = float4(0,0,0,0); + for (int i = 0; i < 8; i++) + { + float3 corner3D = normalize(corners[i]); + float2 cornerPos = sphereToCircle(corner3D); + float dist = length(p - cornerPos); + float dotSize = 0.02f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + if (dotAlpha > 0.0f) + { + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } + } + return color; +} + +float4 drawRing(float2 p, float aaWidth) +{ + float positionLength = length(p); + float ringWidth = 0.002f; + float ringDistance = abs(positionLength - CIRCLE_RADIUS); + float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + return ringAlpha * float4(1, 1, 1, 1); +} + +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float3 faceCenter, float3 faceNormal) +{ + float3 viewVec = normalize(-faceCenter); // Vector from camera to face + return dot(faceNormal, viewVec) > 0.0f; +} + +int getEdgeVisibility(int edgeIdx) +{ + int2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) return 2; + + // Hidden edge: both faces hidden + return 0; +} + +#if DEBUG_DATA +uint32_t computeGroundTruthEdgeMask() +{ + uint32_t mask = 0u; + NBL_UNROLL + for (int j = 0; j < 12; j++) + { + // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry + if (getEdgeVisibility(j) == 1) + { + mask |= (1u << j); + } + } + return mask; +} + +void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSilMask) +{ + uint32_t mismatchAccumulator = 0; + + // The Ground Truth now represents the full 3D silhouette, clipped or not. + uint32_t groundTruthMask = computeGroundTruthEdgeMask(); + + // The comparison checks if the generated mask perfectly matches the full 3D ground truth. + uint32_t mismatchMask = groundTruthMask ^ generatedSilMask; + + if (mismatchMask != 0) + { + NBL_UNROLL + for (int j = 0; j < 12; j++) + { + if ((mismatchMask >> j) & 1u) + { + int2 edge = allEdges[j]; + // Accumulate vertex indices where error occurred + mismatchAccumulator |= (1u << edge.x) | (1u << edge.y); + } + } + } + + // Simple Write (assuming all fragments calculate the same result) + InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); +} +#endif + + +#endif // _DEBUG_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 51cb1946d..cd291dbd2 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -1,376 +1,374 @@ #pragma wave shader_stage(fragment) #include "common.hlsl" - #include +#include "utils.hlsl" using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; [[vk::push_constant]] struct PushConstants pc; +[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; -static const float CIRCLE_RADIUS = 0.75f; +static const float CIRCLE_RADIUS = 0.5f; // --- Geometry Utils --- -// Adjacency of edges to faces -static const int2 edgeToFaces[12] = { - {4,2}, {3,4}, {2,5}, {5,3}, - {2,0}, {0,3}, {1,2}, {3,1}, - {0,4}, {5,0}, {4,1}, {1,5} -}; - -//float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f static const float3 constCorners[8] = { - float3(-1, -1, -1), // 0 - float3( 1, -1, -1), // 1 - float3(-1, 1, -1), // 2 - float3( 1, 1, -1), // 3 - float3(-1, -1, 1), // 4 - float3( 1, -1, 1), // 5 - float3(-1, 1, 1), // 6 - float3( 1, 1, 1) // 7 + float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), + float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1) }; -// All 12 edges of the cube (vertex index pairs) static const int2 allEdges[12] = { - {0, 1}, {2, 3}, {4, 5}, {6, 7}, // Edges along X axis - {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Edges along Y axis - {0, 4}, {1, 5}, {2, 6}, {3, 7} // Edges along Z axis + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis + {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Y axis + {0, 4}, {1, 5}, {2, 6}, {3, 7} // Z axis }; -static const float3 localNormals[6] = { - float3(0, 0, -1), // Face 0 (Z-) - float3(0, 0, 1), // Face 1 (Z+) - float3(-1, 0, 0), // Face 2 (X-) - float3(1, 0, 0), // Face 3 (X+) - float3(0, -1, 0), // Face 4 (Y-) - float3(0, 1, 0) // Face 5 (Y+) +// Adjacency of edges to faces +// Corrected Adjacency of edges to faces +static const int2 edgeToFaces[12] = { + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) }; - static float3 corners[8]; -static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), - float3(0,0,0), float3(0,0,0), float3(0,0,0) }; - - -static const float3 colorLUT[27] = { - // Row 1: Pure and bright colors - float3(0, 0, 0), // 0: Black - float3(1, 1, 1), // 1: White - float3(0.5, 0.5, 0.5), // 2: Gray - - // Row 2: Primary colors - float3(1, 0, 0), // 3: Red - float3(0, 1, 0), // 4: Green - float3(0, 0, 1), // 5: Blue - - // Row 3: Secondary colors - float3(1, 1, 0), // 6: Yellow - float3(1, 0, 1), // 7: Magenta - float3(0, 1, 1), // 8: Cyan - - // Row 4: Orange family - float3(1, 0.5, 0), // 9: Orange - float3(1, 0.65, 0), // 10: Light Orange - float3(0.8, 0.4, 0), // 11: Dark Orange - - // Row 5: Pink/Rose family - float3(1, 0.4, 0.7), // 12: Pink - float3(1, 0.75, 0.8), // 13: Light Pink - float3(0.7, 0.1, 0.3), // 14: Deep Rose - - // Row 6: Purple/Violet family - float3(0.5, 0, 0.5), // 15: Purple - float3(0.6, 0.4, 0.8), // 16: Light Purple - float3(0.3, 0, 0.5), // 17: Indigo - - // Row 7: Green variations - float3(0, 0.5, 0), // 18: Dark Green - float3(0.5, 1, 0), // 19: Lime - float3(0, 0.5, 0.25), // 20: Forest Green - - // Row 8: Blue variations - float3(0, 0, 0.5), // 21: Navy - float3(0.3, 0.7, 1), // 22: Sky Blue - float3(0, 0.4, 0.6), // 23: Teal - - // Row 9: Earth tones - float3(0.6, 0.4, 0.2), // 24: Brown - float3(0.8, 0.7, 0.3), // 25: Tan/Beige - float3(0.4, 0.3, 0.1) // 26: Dark Brown +static float3 faceCenters[6] = { + float3(0,0,0), float3(0,0,0), float3(0,0,0), + float3(0,0,0), float3(0,0,0), float3(0,0,0) +}; + +static const float3 localNormals[6] = { + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) }; - +// TODO: unused, remove later // Vertices are ordered CCW relative to the camera view. static const int silhouettes[27][7] = { - {6, 1, 3, 2, 6, 4, 5}, // 0: Black - {6, 2, 6, 4, 5, 7, 3}, // 1: White - {6, 0, 4, 5, 7, 3, 2}, // 2: Gray - {6, 1, 3, 7, 6, 4, 5,}, // 3: Red - {4, 4, 5, 7, 6, -1, -1}, // 4: Green - {6, 0, 4, 5, 7, 6, 2}, // 5: Blue - {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow - {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta - {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan - {6, 1, 3, 2, 6, 7, 5}, // 9: Orange - {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange - {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange - {4, 1, 3, 7, 5, -1, -1}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink - {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose - {6, 0, 1, 3, 7, 5, 4}, // 15: Purple - {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple - {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo - {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green - {6, 0, 2, 6, 7, 3, 1}, // 19: Lime - {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green - {6, 0, 2, 3, 7, 5, 1}, // 21: Navy - {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue - {6, 0, 4, 6, 2, 3, 1}, // 23: Teal - {6, 0, 2, 3, 7, 5, 4}, // 24: Brown - {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige - {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5,}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; -// Converts UV into centered, aspect-corrected NDC circle space -float2 toCircleSpace(float2 uv) -{ - // Map [0,1] UV to [-1,1] - float2 p = uv * 2.0f - 1.0f; - - // Correct aspect ratio - float aspect = pc.viewport.z / pc.viewport.w; // width / height - p.x *= aspect; - - return p * CIRCLE_RADIUS; -} +// Binary packed silhouettes +static const uint32_t binSilhouettes[27] = { + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, +}; -void computeCubeGeo() +int getSilhouetteVertex(uint32_t packedSil, int index) { - for (int i = 0; i < 8; i++) - { - float3 localPos = constCorners[i]; //float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; - float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - - corners[i] = worldPos.xyz; - - faceCenters[i/4] += worldPos / 4.0f; - faceCenters[2+i%2] += worldPos / 4.0f; - faceCenters[4+(i/2)%2] += worldPos / 4.0f; - } + return (packedSil >> (3 * index)) & 0x7; } -float4 drawCorners(float3 spherePos, float aaWidth) +// Get silhouette size +int getSilhouetteSize(uint32_t sil) { - float4 color = float4(0,0,0,0); - // Draw corner labels for debugging - for (int i = 0; i < 8; i++) - { - float3 corner = normalize(corners[i]); - float2 cornerPos = corner.xy; - // Project corner onto 2D circle space - - // Distance from current fragment to corner - float dist = length(spherePos.xy - cornerPos); - - // Draw a small colored dot at the corner - float dotSize = 0.03f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); - - if (dotAlpha > 0.0f) - { - float brightness = float(i) / 7.0f; - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); - } - } - return color; + return (sil >> 29) & 0x7; + } -float4 drawRing(float2 p, float aaWidth) +// Check if vertex has negative z +bool getVertexZNeg(int vertexIdx) { - float positionLength = length(p); - - // Add a white background circle ring - float ringWidth = 0.01f; - float ringDistance = abs(positionLength - CIRCLE_RADIUS); - float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - - return ringAlpha * float4(1, 1, 1, 1); + return normalize(corners[vertexIdx]).z < 0.0f; } -// Check if a face on the hemisphere is visible from camera at origin -bool isFaceVisible(float3 faceCenter, float3 faceNormal) +#include "Drawing.hlsl" + + +void setDebugData(uint32_t sil, int3 region, int configIndex, uint32_t clippedVertexCount) { - // Face is visible if normal points toward camera (at origin) - float3 viewVec = -normalize(faceCenter); // Vector from face to camera - return dot(faceNormal, viewVec) > 0.0f; +#if DEBUG_DATA + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + DebugDataBuffer[0].region = uint3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + DebugDataBuffer[0].clippedVertexCount = clippedVertexCount; + for (int i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; +#endif } -int getEdgeVisibility(int edgeIdx, float3 cameraPos) +float2 toCircleSpace(float2 uv) { - int2 faces = edgeToFaces[edgeIdx]; - - // Transform normals to world space - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); - float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - - bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); - bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); - - // Silhouette: exactly one face visible - if (visible1 != visible2) return 1; - - // Inner edge: both faces visible - if (visible1 && visible2) return 2; - - // Hidden edge: both faces hidden - return 0; + float2 p = uv * 2.0f - 1.0f; + float aspect = pc.viewport.z / pc.viewport.w; + p.x *= aspect; + return p; } -// Draw great circle arc in fragment shader with horizon clipping -float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) +uint32_t packSilhouette(const int s[7]) { - if (visibility == 0) return float4(0,0,0,0); // Hidden edge - - float3 v0 = normalize(corners[edgeVerts.x]); - float3 v1 = normalize(corners[edgeVerts.y]); - float3 p = normalize(fragPos); // Current point on hemisphere - - // HORIZON CLIPPING: Current fragment must be on front hemisphere - if (p.z < 0.0f) - return float4(0,0,0,0); - - // HORIZON CLIPPING: Skip edge if both endpoints are behind horizon - if (v0.z < 0.0f && v1.z < 0.0f) - return float4(0,0,0,0); - - // Great circle plane normal - float3 arcNormal = normalize(cross(v0, v1)); - - // Distance to great circle - float dist = abs(dot(p, arcNormal)); - - // Check if point is within arc bounds - float dotMid = dot(v0, v1); - bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); - - if (!onArc) return float4(0,0,0,0); - - // Depth-based width scaling - float avgDepth = (length(corners[edgeVerts.x]) + length(corners[edgeVerts.y])) * 0.5f; - float depthScale = 3.0f / avgDepth; - - float baseWidth = (visibility == 1) ? 0.01f : 0.005f; - float width = min(baseWidth * depthScale, 0.02f); - - float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - - float4 edgeColor = (visibility == 1) ? - float4(0.0f, 0.5f, 1.0f, 1.0f) : // Silhouette: blue - float4(1.0f, 0.0f, 0.0f, 1.0f); // Inner: red - - float intensity = (visibility == 1) ? 1.0f : 0.5f; - return edgeColor * alpha * intensity; + uint32_t packed = 0; + int size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (int i = 1; i <= 6; ++i) { + int v = s[i]; + if (v < 0) v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; } -float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) +void computeCubeGeo() { - float4 color = float4(0,0,0,0); - // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); // dark yellow color for hidden edges - - for (int i = 0; i < 12; i++) - { - int2 edge = allEdges[i]; - - // Check if this edge is already drawn as a silhouette edge - bool isSilhouette = false; - int vertexCount = silhouettes[configIndex][0]; - // Draw the 6 silhouette edges - for (int i = 0; i < vertexCount; i++) - { - int v0Idx = silhouettes[configIndex][i + 1]; - int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - - if ((edge.x == v0Idx && edge.y == v1Idx) || (edge.x == v1Idx && edge.y == v0Idx)) - { - isSilhouette = true; - break; - } - } - - // Only draw if it's not a silhouette edge - if (!isSilhouette) - { - float4 edgeContribution = drawGreatCircleArc(spherePos, edge, 1, aaWidth); - color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); - } - } - return color; + for (int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) + { + float3 localPos = constCorners[i]; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + corners[i] = worldPos.xyz; + faceCenters[i / 4] += worldPos / 4.0f; + faceCenters[2 + i % 2] += worldPos / 4.0f; + faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; + } } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float4 color = float4(0, 0, 0, 0); - float2 p = toCircleSpace(vx.uv); - - // Convert 2D disk position to 3D hemisphere position - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - - - - // Convert UV to 3D position on hemisphere - float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - - computeCubeGeo(); - - // Get OBB center in world space - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; - - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 proj = mul(obbCenter, rotMatrix); // Get all 3 projections at once - - // Get squared column lengths - float lenSqX = dot(rotMatrix[0], rotMatrix[0]); - float lenSqY = dot(rotMatrix[1], rotMatrix[1]); - float lenSqZ = dot(rotMatrix[2], rotMatrix[2]); - - int3 region = int3( - proj.x < -lenSqX ? 0 : (proj.x > lenSqX ? 2 : 1), - proj.y < -lenSqY ? 0 : (proj.y > lenSqY ? 2 : 1), - proj.z < -lenSqZ ? 0 : (proj.z > lenSqZ ? 2 : 1) - ); - - int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 - - int vertexCount = silhouettes[configIndex][0]; - for (int i = 0; i < vertexCount; i++) - { - int v0Idx = silhouettes[configIndex][i + 1]; - int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - - float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - } - - color += drawHiddenEdges(spherePos, configIndex, aaWidth); - - color += drawCorners(spherePos, aaWidth); - - color += drawRing(p, aaWidth); - - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } - - // if (r2 > 1.1f) - // color.a = 0.0f; // Outside circle, make transparent - - return color; + float4 color = float4(0, 0, 0, 0); + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + float2 p = toCircleSpace(vx.uv); + + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); + + float3 spherePos; + if (r2 <= 1.0f) + { + spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float uv2Plus1 = r2 + 1.0f; + spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); + + computeCubeGeo(); + + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3x3 upper3x3 = (float3x3)pc.modelMatrix; + +#if 1 + // Compute reciprocal scales + float3 rcpScales = rsqrt(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) + )); + + // Build inverse-rotation-only matrix + float3x3 invRot; + invRot[0] = upper3x3[0] * rcpScales.x; + invRot[1] = upper3x3[1] * rcpScales.y; + invRot[2] = upper3x3[2] * rcpScales.z; + + // Project center into OBB local space + float3 normalizedProj = mul(invRot, obbCenter); +#else + float3 normalizedProj = mul(inverse(upper3x3), obbCenter); +#endif + int3 region = int3( + normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), + normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), + normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1) + ); + int configIndex = region.x + region.y * 3 + region.z * 9; + + // uint32_t sil = packSilhouette(silhouettes[configIndex]); + uint32_t sil = binSilhouettes[configIndex]; + + int vertexCount = getSilhouetteSize(sil); + bool longSilhouette = (vertexCount == 6); + uint32_t silEdgeMask = 0; + +#if DEBUG_DATA + { + for (int i = 0; i < vertexCount; i++) + { + int vIdx = i % vertexCount; + int v1Idx = (i + 1) % vertexCount; + + int v0Corner = getSilhouetteVertex(sil, vIdx); + int v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (int e = 0; e < 12; e++) + { + int2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(sil, vertexCount, silEdgeMask); + } +#endif + // Build clip mask for vertices below horizon (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (int i = 0; i < 6; i++) + { + if (i >= vertexCount) break; + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + int clipCount = countbits(clipMask); + + // Total clipped vertices + int clippedVertCount = vertexCount + (clipMask != 0u ? (2 - clipCount) : 0); + + // Find rotation amount to place positive vertices first + int rotateAmount = 0; + if (clipMask != 0u) + { + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask >> (vertexCount - 1)) & 1u); + + rotateAmount = wrapAround ? + ((firstbithigh(invertedMask) + 1) % vertexCount) : + firstbitlow(clipMask); + } + + // Rotate silhouette bits + uint32_t vertexBits = sil & 0x1FFFFFFF; + uint32_t rotatedVertexBits = rotr(vertexBits, rotateAmount * 3, vertexCount * 3); + uint32_t rotatedSil = (sil & 0xE0000000) | rotatedVertexBits; + + // Rotate the clip mask to match + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + + // Draw clipped silhouette edges + for (int i = 0; i < clippedVertCount; i++) + { + int nextI = (i + 1) % clippedVertCount; + + int vIdx = i % vertexCount; + int v1Idx = nextI % vertexCount; + + // Extract clip bits directly + bool v0Clipped = (rotatedClipMask >> vIdx) & 1u; + bool v1Clipped = (rotatedClipMask >> v1Idx) & 1u; + + // Skip if both clipped + if (v0Clipped && v1Clipped) continue; + + int v0Corner = getSilhouetteVertex(rotatedSil, vIdx); + int v1Corner = getSilhouetteVertex(rotatedSil, v1Idx); + + float3 v0 = normalize(corners[v0Corner]); + float3 v1 = normalize(corners[v1Corner]); + + float3 points[2] = { corners[v0Corner], corners[v1Corner] }; + + // Clip using bit state + if (v0Clipped) + { + float t = v0.z / (v0.z - v1.z); + points[0] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); + } + else if (v1Clipped) + { + float t = v0.z / (v0.z - v1.z); + points[1] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); + } + + // Draw edge + float4 edgeContribution = drawGreatCircleArc(spherePos, points, 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); + + } + + + setDebugData(sil, region, configIndex, clippedVertCount); + + color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); + color += drawCorners(spherePos, p, aaWidth); + color += drawRing(p, aaWidth); + + if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index 80368d08f..3c87a48bc 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -2,13 +2,52 @@ #define _SOLID_ANGLE_VIS_COMMON_HLSL_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#define DEBUG_DATA 1 - -struct PushConstants +namespace nbl { - nbl::hlsl::float32_t3x4 modelMatrix; - nbl::hlsl::float32_t4 viewport; -}; + namespace hlsl + { + + struct ResultData + { + uint32_t3 region; + uint32_t silhouetteIndex; + + uint32_t silhouetteVertexCount; + uint32_t silhouette; + uint32_t clippedVertexCount; + uint32_t edgeVisibilityMismatch; + + uint32_t vertices[6]; + }; + + struct PushConstants + { + float32_t3x4 modelMatrix; + float32_t4 viewport; + }; + static const float32_t3 colorLUT[27] = { + float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1) + }; +#ifndef __HLSL_VERSION + static const char* colorNames[27] = {"Black", + "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown" + }; +#endif // __HLSL_VERSION + } +} #endif // _SOLID_ANGLE_VIS_COMMON_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl new file mode 100644 index 000000000..4031e048f --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -0,0 +1,23 @@ +#ifndef _UTILS_HLSL_ +#define _UTILS_HLSL_ + +// TODO: implemented somewhere else? +// Bit rotation helpers +uint32_t rotl(uint32_t value, uint32_t bits, uint32_t width) +{ + bits = bits % width; + uint32_t mask = (1u << width) - 1u; + value &= mask; + return ((value << bits) | (value >> (width - bits))) & mask; +} + +uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) +{ + bits = bits % width; + uint32_t mask = (1u << width) - 1u; + value &= mask; + return ((value >> bits) | (value << (width - bits))) & mask; +} + + +#endif // _UTILS_HLSL_ diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 105b2f757..538173223 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -1,27 +1,21 @@ #ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ #define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ - #include "nbl/ui/ICursorControl.h" - #include "nbl/ext/ImGui/ImGui.h" - #include "imgui/imgui_internal.h" #include "imguizmo/ImGuizmo.h" - struct TransformRequestParams { - float camDistance = 8.f; uint8_t sceneTexDescIx = ~0; - bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = true; }; struct TransformReturnInfo { nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; - bool isGizmoWindowHovered; - bool isGizmoBeingUsed; + bool allowCameraMovement = false; }; TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) @@ -35,7 +29,7 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti static bool boundSizing = false; static bool boundSizingSnap = false; - ImGui::Text("Press T/R/G to change gizmo mode"); + ImGui::Text("Use gizmo (T/R/G) or ViewManipulate widget to transform the cube"); if (params.editTransformDecomposition) { @@ -55,11 +49,13 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti mCurrentGizmoOperation = ImGuizmo::SCALE; if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + + // For UI editing, decompose temporarily float matrixTranslation[3], matrixRotation[3], matrixScale[3]; ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); - ImGui::InputFloat3("Tr", matrixTranslation); - ImGui::InputFloat3("Rt", matrixRotation); - ImGui::InputFloat3("Sc", matrixScale); + ImGui::DragFloat3("Tr", matrixTranslation, 0.01f); + ImGui::DragFloat3("Rt", matrixRotation, 0.01f); + ImGui::DragFloat3("Sc", matrixScale, 0.01f); ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); if (mCurrentGizmoOperation != ImGuizmo::SCALE) @@ -101,17 +97,18 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImGuiIO& io = ImGui::GetIO(); float viewManipulateRight = io.DisplaySize.x; float viewManipulateTop = 0; + bool isWindowHovered = false; static ImGuiWindowFlags gizmoWindowFlags = 0; /* - for the "useWindow" case we just render to a gui area, + for the "useWindow" case we just render to a gui area, otherwise to fake full screen transparent window - note that for both cases we make sure gizmo being - rendered is aligned to our texture scene using - imgui "cursor" screen positions + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions */ -// TODO: this shouldn't be handled here I think + // TODO: this shouldn't be handled here I think SImResourceInfo info; info.textureID = params.sceneTexDescIx; info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; @@ -128,17 +125,17 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); ImVec2 windowPos = ImGui::GetWindowPos(); ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; - retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; ImGuiWindow* window = ImGui::GetCurrentWindow(); - gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + gizmoWindowFlags = (isWindowHovered && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); } else { @@ -149,21 +146,45 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; - retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; } + // Standard Manipulate gizmo - let ImGuizmo modify the matrix directly ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); - retval.isGizmoBeingUsed = ImGuizmo::IsOver() || (ImGuizmo::IsUsing() && ImGui::IsMouseDown(ImGuiMouseButton_Left)); - if(params.enableViewManipulate) - ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + retval.allowCameraMovement = isWindowHovered && !ImGuizmo::IsUsing(); + + // ViewManipulate for rotating the view + if (params.enableViewManipulate) + { + // Store original translation and scale before ViewManipulate + // Decompose original matrix + nbl::hlsl::float32_t3 translation, rotation, scale; + ImGuizmo::DecomposeMatrixToComponents(matrix, &translation.x, &rotation.x, &scale.x); + + float temp[16]; + nbl::hlsl::float32_t3 baseTranslation(0.0f); + nbl::hlsl::float32_t3 baseScale(1.0f); + ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, temp); + // Manipulate rotation only + ImGuizmo::ViewManipulate(temp, 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + // Extract rotation from manipulated temp + nbl::hlsl::float32_t3 newRot; + ImGuizmo::DecomposeMatrixToComponents(temp, &baseTranslation.x, &newRot.x, &baseScale.x); + + // Recompose original matrix with new rotation but keep translation & scale + ImGuizmo::RecomposeMatrixFromComponents(&translation.x, &newRot.x, &scale.x, matrix); + + retval.allowCameraMovement &= isWindowHovered && !ImGuizmo::IsUsingViewManipulate(); + } ImGui::End(); ImGui::PopStyleColor(); @@ -171,4 +192,4 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti return retval; } -#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file +#endif // _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index e9266520d..1c52547af 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -211,7 +211,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return shader; }; - auto scRes = static_cast(m_surface->getSwapchainResources()); ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); @@ -232,17 +231,73 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .size = sizeof(PushConstants) } }; - auto visualizationLayout = m_device->createPipelineLayout( - ranges, - nullptr, - nullptr, - nullptr, - nullptr + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { + { + .binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1 + } + }; + smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + + auto visualizationLayout = m_device->createPipelineLayout(ranges +#if DEBUG_DATA + , dsLayout +#endif ); m_visualizationPipeline = fsTriProtoPPln.createPipeline(fragSpec, visualizationLayout.get(), m_solidAngleRenderpass.get()); if (!m_visualizationPipeline) return logFail("Could not create Graphics Pipeline!"); + // Allocate the memory +#if DEBUG_DATA + { + constexpr size_t BufferSize = sizeof(ResultData); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + m_outputStorageBuffer = m_device->createBuffer(std::move(params)); + if (!m_outputStorageBuffer) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + m_outputStorageBuffer->setObjectDebugName("ResultData output buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = m_outputStorageBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_allocation = m_device->allocate(reqs, m_outputStorageBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_allocation.memory->map({ 0ull,m_allocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const ILogicalDevice::MappedMemoryRange memoryRange(m_allocation.memory.get(), 0ull, m_allocation.memory->getAllocationSize()); + if (!m_allocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); +#endif } // Create ImGUI @@ -336,6 +391,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; if (m_solidAngleViewFramebuffer) { +#if DEBUG_DATA + asset::SBufferRange range + { + .offset = 0, + .size = m_outputStorageBuffer->getSize(), + .buffer = m_outputStorageBuffer + }; + cb->fillBuffer(range, 0u); +#endif auto creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); cb->beginDebugMarker("Draw Circle View Frame"); { @@ -361,11 +425,17 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto pipeline = m_visualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - //cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 3, 1, &ds); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); ext::FullScreenTriangle::recordDrawCall(cb); } cb->endRenderPass(); cb->endDebugMarker(); + +#if DEBUG_DATA + m_device->waitIdle(); + std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); + m_device->waitIdle(); +#endif } // draw main view if (m_mainViewFramebuffer) @@ -557,6 +627,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { if (interface.move) camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + else + camera.mouseKeysUp(); for (const auto& e : events) // here capture { @@ -713,6 +785,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->setViewport(0u, 1u, &viewport); } +#if DEBUG_DATA + ~SolidAngleVisualizer() override + { + m_allocation.memory->unmap(); + } +#endif + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers constexpr static inline uint32_t MaxFramesInFlight = 3u; constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; @@ -721,13 +800,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; - constexpr static inline float32_t4x4 OBBModelMatrixDefault - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 3.0f, 1.0f - }; + static inline ResultData m_GPUOutResulData; // smart_refctd_ptr m_scene; smart_refctd_ptr m_solidAngleRenderpass; @@ -737,6 +810,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_mainViewFramebuffer; smart_refctd_ptr m_visualizationPipeline; // + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_outputStorageBuffer; + smart_refctd_ptr m_ds = nullptr; smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; @@ -794,7 +870,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // transformParams.useWindow = true; ImGui::Text("Camera"); - bool viewDirty = false; if (ImGui::RadioButton("LH", isLH)) isLH = true; @@ -827,13 +902,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); - if (viewDirty || firstFrame) + if (firstFrame) { camera.setPosition(cameraIntialPosition); camera.setTarget(cameraInitialTarget); - camera.setBackupUpVector(cameraInitialUp); camera.setUpVector(cameraInitialUp); camera.recomputeViewMatrix(); @@ -909,45 +982,35 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (ImGui::IsKeyPressed(ImGuiKey_End)) { - m_OBBModelMatrix = OBBModelMatrixDefault; + m_TRS = TRS{}; } - static struct { - float32_t4x4 view, projection, model; - } imguizmoM16InOut; + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; - ImGuizmo::SetID(0u); + ImGuizmo::SetID(0u); - // TODO: camera will return hlsl::float32_tMxN - auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); - imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + // TODO: camera will return hlsl::float32_tMxN + auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); + imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); - // TODO: camera will return hlsl::float32_tMxN - imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); - imguizmoM16InOut.model = m_OBBModelMatrix; + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); - { if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ transformParams.editTransformDecomposition = true; mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + move = mainViewTransformReturnInfo.allowCameraMovement; - // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used - move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); - + ImGuizmo::DecomposeMatrixToComponents(&imguizmoM16InOut.model[0][0], &m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); } - - // to Nabla + update camera & model matrices - // TODO: make it more nicely, extract: - // - Position by computing inverse of the view matrix and grabbing its translation - // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position - // But then set the view matrix this way anyway, because up-vector may not be compatible - //const auto& view = camera.getViewMatrix(); - //const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) - m_OBBModelMatrix = imguizmoM16InOut.model; - // object meta display //{ // ImGui::Begin("Object"); @@ -964,12 +1027,193 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); - solidAngleViewTransformReturnInfo.isGizmoBeingUsed = false; // not used in this view - solidAngleViewTransformReturnInfo.isGizmoWindowHovered = false; // not used in this view + solidAngleViewTransformReturnInfo.allowCameraMovement = false; // not used in this view ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); ImGui::End(); } + // Show data coming from GPU +#if DEBUG_DATA + { + if (ImGui::Begin("Result Data")) + { + auto drawColorField = [&](const char* fieldName, uint32_t index) + { + ImGui::Text("%s: %u", fieldName, index); + + if (index >= 27) + { + ImGui::SameLine(); + ImGui::Text(""); + return; + } + + const auto& c = colorLUT[index]; // uses the combined LUT we made earlier + + ImGui::SameLine(); + + // Color preview button + ImGui::ColorButton( + fieldName, + ImVec4(c.r, c.g, c.b, 1.0f), + 0, + ImVec2(20, 20) + ); + + ImGui::SameLine(); + ImGui::Text("%s", colorNames[index]); + }; + + // Vertices + if (ImGui::CollapsingHeader("Vertices", ImGuiTreeNodeFlags_DefaultOpen)) + { + for (uint32_t i = 0; i < 6; ++i) + { + if (i < m_GPUOutResulData.silhouetteVertexCount) + { + ImGui::Text("corners[%u]", i); + ImGui::SameLine(); + drawColorField(":", m_GPUOutResulData.vertices[i]); + ImGui::SameLine(); + static const float32_t3 constCorners[8] = { + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1) + }; + float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; + ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); + } + else + { + ImGui::Text("corners[%u] :: ", i); + ImGui::SameLine(); + ImGui::ColorButton( + "", + ImVec4(0.0f, 0.0f, 0.0f, 0.0f), + 0, + ImVec2(20, 20) + ); + ImGui::SameLine(); + ImGui::Text(""); + + } + + } + } + + if (ImGui::CollapsingHeader("Color LUT Map")) + { + for (int i = 0; i < 27; i++) + drawColorField(" ", i); + } + + ImGui::Separator(); + + // Silhouette info + drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + + ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); + ImGui::Text("silhouette Clipped VertexCount: %u", m_GPUOutResulData.clippedVertexCount); + ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + + { + float32_t3 xAxis = m_OBBModelMatrix[0].xyz; + float32_t3 yAxis = m_OBBModelMatrix[1].xyz; + float32_t3 zAxis = m_OBBModelMatrix[2].xyz; + + float32_t3 nx = normalize(xAxis); + float32_t3 ny = normalize(yAxis); + float32_t3 nz = normalize(zAxis); + + const float epsilon = 1e-4; + bool hasSkew = false; + if (abs(dot(nx, ny)) > epsilon || abs(dot(nx, nz)) > epsilon || abs(dot(ny, nz)) > epsilon) + hasSkew = true; + ImGui::Text("Matrix Has Skew: %s", hasSkew ? "true" : "false"); + } + + static bool modalShown = false; + static uint32_t lastSilhouetteIndex = ~0u; + + // Reset modal flag if silhouette configuration changed + if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) + { + modalShown = false; + lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; + } + + if (!m_GPUOutResulData.edgeVisibilityMismatch) + { + // Reset flag when mismatch is cleared + modalShown = false; + } + if (m_GPUOutResulData.edgeVisibilityMismatch && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + { + // Open modal popup only once per configuration + ImGui::OpenPopup("Edge Visibility Mismatch Warning"); + modalShown = true; + } + + // Modal popup + if (ImGui::BeginPopupModal("Edge Visibility Mismatch Warning", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "Warning: Edge Visibility Mismatch Detected!"); + ImGui::Separator(); + + ImGui::Text("The silhouette lookup table (LUT) does not match the computed edge visibility."); + ImGui::Text("This indicates the pre-computed silhouette data may be incorrect."); + ImGui::Spacing(); + + // Show configuration info + ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); + ImGui::TextWrapped("Region: (%d, %d, %d)", + m_GPUOutResulData.region.x, + m_GPUOutResulData.region.y, + m_GPUOutResulData.region.z); + ImGui::Spacing(); + + ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); + + // Show which specific vertices are mismatched + ImGui::Text("Vertices involved in mismatched edges:"); + ImGui::Indent(); + for (int i = 0; i < 8; i++) + { + if (m_GPUOutResulData.edgeVisibilityMismatch & (1u << i)) + { + ImGui::BulletText("Vertex %d", i); + } + } + ImGui::Unindent(); + ImGui::Spacing(); + + if (ImGui::Button("OK", ImVec2(120, 0))) + { + ImGui::CloseCurrentPopup(); + } + + ImGui::EndPopup(); + } + + ImGui::Separator(); + + // Region (uint32_t3) + ImGui::Text("region: (%u, %u, %u)", + m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + + ImGui::Separator(); + + // Silhouette mask printed in binary + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (m_GPUOutResulData.silhouette & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + + ImGui::Text("silhouette: 0x%08X", m_GPUOutResulData.silhouette); + ImGui::Text("binary: %s", buf); + } + ImGui::End(); + } +#endif // view matrices editor { ImGui::Begin("Matrices"); @@ -995,6 +1239,32 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Separator(); }; + static RandomSampler rng(69); // Initialize RNG with seed + if (ImGui::Button("Randomize Translation")) + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + } + ImGui::SameLine(); + + if (ImGui::Button("Randomize Rotation")) + { + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + } + ImGui::SameLine(); + + if (ImGui::Button("Randomize Scale")) + { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + } + + ImGui::SameLine(); + if (ImGui::Button("Randomize All")) + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + } + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer()); addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false); @@ -1071,6 +1341,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::End(); } ImGui::End(); + + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &m_OBBModelMatrix[0][0]); } smart_refctd_ptr imGUI; @@ -1085,15 +1357,22 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR }; SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = { SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value }; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, core::matrix4SIMD(), 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); // mutables - float32_t4x4 m_OBBModelMatrix = OBBModelMatrixDefault; + struct TRS // Source of truth + { + float32_t3 translation{ 0.0f, 0.0f, 3.0f }; + float32_t3 rotation{ 0.0f }; // MUST stay orthonormal + float32_t3 scale{ 1.0f }; + } m_TRS; + float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS //std::string_view objectName; TransformRequestParams transformParams; TransformReturnInfo mainViewTransformReturnInfo; TransformReturnInfo solidAngleViewTransformReturnInfo; + const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index e5f077e46..c61f93333 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -302,6 +302,11 @@ class Camera lastVirtualUpTimeStamp = nextPresentationTimeStamp; } + // TODO: temporary but a good fix for the camera events when mouse stops dragging gizmo + void mouseKeysUp() + { + mouseDown = false; + } private: inline void initDefaultKeysMap() { mapKeysToWASD(); } From 2e306fc96bfae85a9669ad552751cece33d1b383 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 01:10:56 +0300 Subject: [PATCH 109/219] better (still not perfect) manual inverse of rotation matrix --- .../hlsl/SolidAngleVis.frag.hlsl | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index cd291dbd2..bf58e3231 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -228,21 +228,13 @@ void computeCubeGeo() float3x3 upper3x3 = (float3x3)pc.modelMatrix; #if 1 - // Compute reciprocal scales - float3 rcpScales = rsqrt(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) - )); - - // Build inverse-rotation-only matrix - float3x3 invRot; - invRot[0] = upper3x3[0] * rcpScales.x; - invRot[1] = upper3x3[1] * rcpScales.y; - invRot[2] = upper3x3[2] * rcpScales.z; - - // Project center into OBB local space - float3 normalizedProj = mul(invRot, obbCenter); +float3 rcpScales = rsqrt(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) +)); + +float3 normalizedProj = mul(transpose(upper3x3), obbCenter) * rcpScales; #else float3 normalizedProj = mul(inverse(upper3x3), obbCenter); #endif From 12486d4670f0453722351814996d91f198a16749 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 02:24:41 +0300 Subject: [PATCH 110/219] Fixed faster inverse of rotation matrix, thanks Matt! --- .../hlsl/SolidAngleVis.frag.hlsl | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index bf58e3231..01d166aac 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -223,21 +223,20 @@ void computeCubeGeo() computeCubeGeo(); - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + float4x3 columnModel = transpose(pc.modelMatrix); - float3x3 upper3x3 = (float3x3)pc.modelMatrix; + float3 obbCenter = columnModel[3].xyz; -#if 1 -float3 rcpScales = rsqrt(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) -)); + float3x3 upper3x3 = (float3x3)columnModel; + + float3 rcpScales = rcp(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) + )); + + float3 normalizedProj = mul(upper3x3, obbCenter) * rcpScales; -float3 normalizedProj = mul(transpose(upper3x3), obbCenter) * rcpScales; -#else - float3 normalizedProj = mul(inverse(upper3x3), obbCenter); -#endif int3 region = int3( normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), From ec1f5a5a6f805c5213499d6611a7e7785ee60aaf Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 18 Dec 2025 15:12:20 +0100 Subject: [PATCH 111/219] Example 22 now uses precompiled shaders --- 22_CppCompat/CIntrinsicsTester.h | 2 +- 22_CppCompat/CMakeLists.txt | 63 ++++++++++++++++++- 22_CppCompat/CTgmathTester.h | 4 +- .../app_resources/intrinsicsTest.comp.hlsl | 2 +- .../app_resources/tgmathTest.comp.hlsl | 2 +- 22_CppCompat/main.cpp | 10 +-- common/include/nbl/examples/Tester/ITester.h | 23 +++---- 7 files changed, 80 insertions(+), 26 deletions(-) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index dfc1ab5e0..00e343d90 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -244,7 +244,7 @@ class CIntrinsicsTester final : public ITester inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(WORKGROUP_SIZE, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] void main() { diff --git a/22_CppCompat/app_resources/tgmathTest.comp.hlsl b/22_CppCompat/app_resources/tgmathTest.comp.hlsl index 6115eebc6..4aeecb91d 100644 --- a/22_CppCompat/app_resources/tgmathTest.comp.hlsl +++ b/22_CppCompat/app_resources/tgmathTest.comp.hlsl @@ -9,7 +9,7 @@ [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(WORKGROUP_SIZE, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] void main() { diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index b2a22d0e3..5f7e09f01 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "app_resources/common.hlsl" @@ -67,25 +67,25 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"tgmathTest">(m_device.get()); CTgmathTester tgmathTester(8); tgmathTester.setupPipeline(pplnSetupData); tgmathTester.performTestsAndVerifyResults("TgmathTestLog.txt"); } { - CIntrinsicsTester::PipelineSetupData pplnSetupData; + /*CIntrinsicsTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; pplnSetupData.assetMgr = m_assetMgr; pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - pplnSetupData.testShaderPath = "app_resources/intrinsicsTest.comp.hlsl"; + pplnSetupData.shaderKey = "intrinsicsTest"; CIntrinsicsTester intrinsicsTester(8); intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt"); + intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt");*/ } m_queue = m_device->getQueue(0, 0); diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 560ca562b..aedd2f083 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -16,7 +16,7 @@ class ITester public: struct PipelineSetupData { - std::string testShaderPath; + std::string shaderKey; core::smart_refctd_ptr device; core::smart_refctd_ptr api; core::smart_refctd_ptr assetMgr; @@ -45,8 +45,8 @@ class ITester { asset::IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); + lp.workingDirectory = "app_resources"; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.shaderKey.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); @@ -55,17 +55,9 @@ class ITester assert(assets.size() == 1); core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - auto overridenSource = asset::CHLSLCompiler::createOverridenCopy( - source.get(), "#define WORKGROUP_SIZE %d\n#define TEST_COUNT %d\n", - m_WorkgroupSize, m_testIterationCount - ); - - shader = m_device->compileShader({overridenSource.get()}); + shader = m_device->compileShader({ source.get() }); } - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { { .binding = 0, @@ -233,7 +225,7 @@ class ITester * @param testBatchCount one test batch is equal to m_WorkgroupSize, so number of tests performed will be m_WorkgroupSize * testbatchCount */ ITester(const uint32_t testBatchCount) - : m_testIterationCount(testBatchCount* m_WorkgroupSize) + : m_testBatchCount(testBatchCount), m_testIterationCount(testBatchCount * m_WorkgroupSize) { reloadSeed(); }; @@ -284,7 +276,7 @@ class ITester m_inputBufferAllocation.memory->unmap(); // record command buffer - const uint32_t dispatchSizeX = (m_testIterationCount + (m_WorkgroupSize - 1)) / m_WorkgroupSize; + const uint32_t dispatchSizeX = m_testBatchCount; m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); @@ -399,7 +391,8 @@ class ITester } const size_t m_testIterationCount; - static constexpr size_t m_WorkgroupSize = 128u; + const uint32_t m_testBatchCount; + static constexpr size_t m_WorkgroupSize = 256u; // seed will change after every call to performTestsAndVerifyResults() std::mt19937 m_mersenneTwister; uint32_t m_seed; From e4366b1885f05d85addfdec067444c7a0d7c0bec Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 19 Dec 2025 10:41:13 +0700 Subject: [PATCH 112/219] removed commented out bits --- 34_DebugDraw/include/common.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/34_DebugDraw/include/common.hpp b/34_DebugDraw/include/common.hpp index e70eb47a8..aad9bdb1d 100644 --- a/34_DebugDraw/include/common.hpp +++ b/34_DebugDraw/include/common.hpp @@ -8,7 +8,6 @@ #include "nbl/examples/common/CEventCallback.hpp" #include "nbl/examples/examples.hpp" -//#include "nbl/CDrawAABB.h" #include "nbl/ext/DebugDraw/CDrawAABB.h" using namespace nbl; From ee9913d8f621d30a09188f8027f5eb590e5664a9 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 13:18:45 +0100 Subject: [PATCH 113/219] Examples 14 and 22 now use precompiled shaders --- 14_Mortons/CMakeLists.txt | 60 ++++++++++++++++++++++- 14_Mortons/app_resources/test.comp.hlsl | 4 +- 14_Mortons/app_resources/test2.comp.hlsl | 6 ++- 14_Mortons/main.cpp | 6 ++- 22_CppCompat/app_resources/test.comp.hlsl | 3 ++ 22_CppCompat/main.cpp | 6 +-- 6 files changed, 76 insertions(+), 9 deletions(-) diff --git a/14_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt index a434ff32a..1c595e8bb 100644 --- a/14_Mortons/CMakeLists.txt +++ b/14_Mortons/CMakeLists.txt @@ -21,4 +21,62 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/testCommon.hlsl + app_resources/testCommon2.hlsl + app_resources/test.comp.hlsl + app_resources/test2.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }, + { + "INPUT": "app_resources/test2.comp.hlsl", + "KEY": "test2", + }, +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/14_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl index 591915109..2a2c465f4 100644 --- a/14_Mortons/app_resources/test.comp.hlsl +++ b/14_Mortons/app_resources/test.comp.hlsl @@ -1,6 +1,7 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) #include "testCommon.hlsl" #include @@ -8,7 +9,7 @@ [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(WORKGROUP_SIZE, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] void main() { @@ -16,3 +17,4 @@ void main() TestExecutor executor; executor(inputTestValues[invID], outputTestValues[invID]); } + diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl index 045ba1bdc..8561faf83 100644 --- a/14_Mortons/app_resources/test2.comp.hlsl +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -1,6 +1,7 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) #include "testCommon2.hlsl" #include @@ -8,11 +9,12 @@ [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(1, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] -void main(uint3 invocationID : SV_DispatchThreadID) +void main() { const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; TestExecutor2 executor; executor(inputTestValues[invID], outputTestValues[invID]); } + diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index 8dc0d0146..d1f9557d2 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -4,6 +4,8 @@ #include #include +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" @@ -44,7 +46,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"test">(m_device.get()); CTester mortonTester(4); // 4 * 128 = 512 tests mortonTester.setupPipeline(pplnSetupData); @@ -59,7 +61,7 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"test2">(m_device.get()); CTester2 mortonTester2(4); mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); diff --git a/22_CppCompat/app_resources/test.comp.hlsl b/22_CppCompat/app_resources/test.comp.hlsl index 98be76c53..9fc6cd75f 100644 --- a/22_CppCompat/app_resources/test.comp.hlsl +++ b/22_CppCompat/app_resources/test.comp.hlsl @@ -1,6 +1,8 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + #include "app_resources/common.hlsl" template @@ -88,6 +90,7 @@ struct device_capabilities2 }; [numthreads(8, 8, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { fill(invocationID, 1); diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 5f7e09f01..a0404f3eb 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -74,18 +74,18 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat tgmathTester.performTestsAndVerifyResults("TgmathTestLog.txt"); } { - /*CIntrinsicsTester::PipelineSetupData pplnSetupData; + CIntrinsicsTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; pplnSetupData.assetMgr = m_assetMgr; pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - pplnSetupData.shaderKey = "intrinsicsTest"; + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); CIntrinsicsTester intrinsicsTester(8); intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt");*/ + intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt"); } m_queue = m_device->getQueue(0, 0); From 04627c7bb708cfed00ddb6de3f289a37bd7a1ff1 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 13:52:04 +0100 Subject: [PATCH 114/219] Every ILogger::log call now uses string literal as its first argument --- 14_Mortons/main.cpp | 1 - 24_ColorSpaceTest/main.cpp | 2 +- 64_EmulatedFloatTest/main.cpp | 8 ++++---- common/include/nbl/examples/Tester/ITester.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index d1f9557d2..df3126359 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -51,7 +51,6 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp CTester mortonTester(4); // 4 * 128 = 512 tests mortonTester.setupPipeline(pplnSetupData); mortonTester.performTestsAndVerifyResults("MortonTestLog.txt"); - } { CTester2::PipelineSetupData pplnSetupData; diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 750756321..15bc3b6da 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -561,7 +561,7 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B const std::string prettyJson = current.data.dump(4); if (options.verbose) - m_logger->log(prettyJson, ILogger::ELL_INFO); + m_logger->log("%s", ILogger::ELL_INFO, prettyJson); system::ISystem::future_t> future; m_system->createFile(future, current.path, system::IFileBase::ECF_WRITE); diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index f3a4a8fd3..ea8def7ba 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -101,7 +101,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso else errorMsgPrefix = "GPU test fail:"; - m_logger->log(errorMsgPrefix.c_str(), ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, errorMsgPrefix.c_str()); m_logFile << errorMsgPrefix << '\n'; }; @@ -122,7 +122,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso ss << std::bitset<64>(expectedValue) << " - expectedValue bit pattern\n"; ss << std::bitset<64>(testValue) << " - testValue bit pattern \n"; - m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, ss.str().c_str()); m_logFile << ss.str() << '\n'; //std::cout << "ULP error: " << std::max(expectedValue, testValue) - std::min(expectedValue, testValue) << "\n\n"; @@ -138,7 +138,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso { std::string inputValuesStr = std::string("for input values: A = ") + std::to_string(a) + std::string(" B = ") + std::to_string(b); - m_logger->log(inputValuesStr.c_str() , ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, inputValuesStr.c_str()); m_logFile << inputValuesStr << '\n'; std::stringstream ss; @@ -146,7 +146,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso ss << "\nexpected value: " << std::boolalpha << bool(expectedValue); ss << "\ntest value: " << std::boolalpha << bool(testValue); - m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, ss.str().c_str()); m_logFile << ss.str() << '\n'; }; diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index aedd2f083..01c4973fc 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -327,7 +327,7 @@ class ITester ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + m_logger->log("%s", system::ILogger::ELL_ERROR, ss.str().c_str()); m_logFile << ss.str() << '\n'; } From 5acd05964180897127d63d68b3db504ea8e46cc2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 14:37:58 +0100 Subject: [PATCH 115/219] All shaders in example 22 are precompiled shaders now --- 22_CppCompat/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index a0404f3eb..6a8e51cf2 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -96,8 +96,9 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/test.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); From 1961a898fd0a91c8e4d5c1a3fcb02df9142e8388 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 20 Dec 2025 10:18:48 +0300 Subject: [PATCH 116/219] Fast clipping, less branches, also - More debug data going to imgui - Little bit of shader code refactoring - "Revert to last" button to go back to last random transformation of the OBB - Added getVertexZNeg() and getVertex() preprocessor branches for faster versions --- .../app_resources/hlsl/Drawing.hlsl | 122 ++-- .../hlsl/SolidAngleVis.frag.hlsl | 639 ++++++++++-------- .../app_resources/hlsl/common.hlsl | 42 +- 72_SolidAngleVisualizer/main.cpp | 90 ++- 4 files changed, 532 insertions(+), 361 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index c3cb5befa..f3f1b4e96 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -16,79 +16,124 @@ float2 sphereToCircle(float3 spherePoint) } } -float4 drawGreatCircleArc(float3 fragPos, float3 points[2], int visibility, float aaWidth) +float drawGreatCircleArc(float3 fragPos, float3 points[2], float aaWidth, float width = 0.01f) { - if (visibility == 0) return float4(0,0,0,0); - float3 v0 = normalize(points[0]); float3 v1 = normalize(points[1]); float3 p = normalize(fragPos); - + float3 arcNormal = normalize(cross(v0, v1)); float dist = abs(dot(p, arcNormal)); - + float dotMid = dot(v0, v1); bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); - - if (!onArc) return float4(0,0,0,0); - + + if (!onArc) + return 0.0f; + float avgDepth = (length(points[0]) + length(points[1])) * 0.5f; float depthScale = 3.0f / avgDepth; - - float baseWidth = (visibility == 1) ? 0.01f : 0.005f; - float width = min(baseWidth * depthScale, 0.02f); - + + width = min(width * depthScale, 0.02f); float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - - float4 edgeColor = (visibility == 1) ? - float4(0.0f, 0.5f, 1.0f, 1.0f) : - float4(1.0f, 0.0f, 0.0f, 1.0f); - - float intensity = (visibility == 1) ? 1.0f : 0.5f; - return edgeColor * alpha * intensity; + + return alpha; } float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) { - float4 color = float4(0,0,0,0); + float4 color = 0; float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); - + + NBL_UNROLL for (int i = 0; i < 12; i++) { - if ((silEdgeMask & (1u << i)) == 0) + // skip silhouette edges + if (silEdgeMask & (1u << i)) + continue; + + int2 edge = allEdges[i]; + + float3 v0 = normalize(getVertex(edge.x)); + float3 v1 = normalize(getVertex(edge.y)); + + bool neg0 = v0.z < 0.0f; + bool neg1 = v1.z < 0.0f; + + // fully hidden + if (neg0 && neg1) + continue; + + float3 p0 = v0; + float3 p1 = v1; + + // clip if needed + if (neg0 ^ neg1) { - int2 edge = allEdges[i]; - float3 edgePoints[2] = { corners[edge.x], corners[edge.y] }; - float4 edgeContribution = drawGreatCircleArc(spherePos, edgePoints, 1, aaWidth); - color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + float t = v0.z / (v0.z - v1.z); + float3 clip = normalize(lerp(v0, v1, t)); + + p0 = neg0 ? clip : v0; + p1 = neg1 ? clip : v1; } + + float3 pts[2] = {p0, p1}; + float4 c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.005f); + color += float4(hiddenEdgeColor * c.a, c.a); } + return color; } float4 drawCorners(float3 spherePos, float2 p, float aaWidth) { - float4 color = float4(0,0,0,0); + float4 color = 0; + + float dotSize = 0.02f; + float innerDotSize = dotSize * 0.5f; + for (int i = 0; i < 8; i++) { - float3 corner3D = normalize(corners[i]); + float3 corner3D = normalize(getVertex(i)); float2 cornerPos = sphereToCircle(corner3D); + float dist = length(p - cornerPos); - float dotSize = 0.02f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); - if (dotAlpha > 0.0f) + + // outer dot + float outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, + dotSize + aaWidth, + dist); + + if (outerAlpha <= 0.0f) + continue; + + float3 dotColor = colorLUT[i]; + color += float4(dotColor * outerAlpha, outerAlpha); + + // ------------------------------------------------- + // inner black dot for hidden corners + // ------------------------------------------------- + if (corner3D.z < 0.0f) { - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); + float innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, + innerDotSize + aaWidth, + dist); + + // ensure it stays inside the outer dot + innerAlpha *= outerAlpha; + + float3 innerColor = float3(0.0, 0.0, 0.0); + color -= float4(innerAlpha.xxx, 0.0f); } } + return color; } float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); - float ringWidth = 0.002f; + float ringWidth = 0.003f; float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); return ringAlpha * float4(1, 1, 1, 1); @@ -114,10 +159,12 @@ int getEdgeVisibility(int edgeIdx) bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); // Silhouette: exactly one face visible - if (visible1 != visible2) return 1; + if (visible1 != visible2) + return 1; // Inner edge: both faces visible - if (visible1 && visible2) return 2; + if (visible1 && visible2) + return 2; // Hidden edge: both faces hidden return 0; @@ -162,11 +209,10 @@ void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSil } } } - + // Simple Write (assuming all fragments calculate the same result) InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); } #endif - #endif // _DEBUG_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 01d166aac..d7ceed943 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -15,351 +15,438 @@ static const float CIRCLE_RADIUS = 0.5f; // --- Geometry Utils --- static const float3 constCorners[8] = { - float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), - float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1) -}; + float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), + float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1)}; static const int2 allEdges[12] = { - {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis - {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Y axis - {0, 4}, {1, 5}, {2, 6}, {3, 7} // Z axis + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis + {0, 2}, + {1, 3}, + {4, 6}, + {5, 7}, // Y axis + {0, 4}, + {1, 5}, + {2, 6}, + {3, 7} // Z axis }; // Adjacency of edges to faces // Corrected Adjacency of edges to faces static const int2 edgeToFaces[12] = { - // Edge Index: | allEdges[i] | Shared Faces: - - /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) - /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) - /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) - /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) - - /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) - /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) - /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) - /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) - - /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) - /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) - /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) - /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) }; static float3 corners[8]; static float3 faceCenters[6] = { - float3(0,0,0), float3(0,0,0), float3(0,0,0), - float3(0,0,0), float3(0,0,0), float3(0,0,0) -}; + float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), + float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0)}; static const float3 localNormals[6] = { - float3(0, 0, -1), // Face 0 (Z-) - float3(0, 0, 1), // Face 1 (Z+) - float3(-1, 0, 0), // Face 2 (X-) - float3(1, 0, 0), // Face 3 (X+) - float3(0, -1, 0), // Face 4 (Y-) - float3(0, 1, 0) // Face 5 (Y+) + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) }; - // TODO: unused, remove later // Vertices are ordered CCW relative to the camera view. static const int silhouettes[27][7] = { - {6, 1, 3, 2, 6, 4, 5}, // 0: Black - {6, 2, 6, 4, 5, 7, 3}, // 1: White - {6, 0, 4, 5, 7, 3, 2}, // 2: Gray - {6, 1, 3, 7, 6, 4, 5,}, // 3: Red - {4, 4, 5, 7, 6, -1, -1}, // 4: Green - {6, 0, 4, 5, 7, 6, 2}, // 5: Blue - {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow - {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta - {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan - {6, 1, 3, 2, 6, 7, 5}, // 9: Orange - {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange - {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange - {4, 1, 3, 7, 5, -1, -1}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink - {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose - {6, 0, 1, 3, 7, 5, 4}, // 15: Purple - {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple - {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo - {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green - {6, 0, 2, 6, 7, 3, 1}, // 19: Lime - {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green - {6, 0, 2, 3, 7, 5, 1}, // 21: Navy - {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue - {6, 0, 4, 6, 2, 3, 1}, // 23: Teal - {6, 0, 2, 3, 7, 5, 4}, // 24: Brown - {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige - {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; // Binary packed silhouettes static const uint32_t binSilhouettes[27] = { - 0b11000000000000101100110010011001, - 0b11000000000000011111101100110010, - 0b11000000000000010011111101100000, - 0b11000000000000101100110111011001, - 0b10000000000000000000110111101100, - 0b11000000000000010110111101100000, - 0b11000000000000100110111011001000, - 0b11000000000000100110111101001000, - 0b11000000000000010110111101001000, - 0b11000000000000101111110010011001, - 0b10000000000000000000011111110010, - 0b11000000000000010011111110100000, - 0b10000000000000000000101111011001, - 0b11000000000000010011111110100000, - 0b10000000000000000000010110100000, - 0b11000000000000100101111011001000, - 0b10000000000000000000100101001000, - 0b11000000000000010110100101001000, - 0b11000000000000001101111110010000, - 0b11000000000000001011111110010000, - 0b11000000000000001011111110100000, - 0b11000000000000001101111011010000, - 0b10000000000000000000001011010000, - 0b11000000000000001011010110100000, - 0b11000000000000100101111011010000, - 0b11000000000000100101001011010000, - 0b11000000000000011010110100101001, + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, }; int getSilhouetteVertex(uint32_t packedSil, int index) { - return (packedSil >> (3 * index)) & 0x7; + return (packedSil >> (3 * index)) & 0x7; } // Get silhouette size int getSilhouetteSize(uint32_t sil) { - return (sil >> 29) & 0x7; - + return (sil >> 29) & 0x7; } // Check if vertex has negative z bool getVertexZNeg(int vertexIdx) { - return normalize(corners[vertexIdx]).z < 0.0f; +#if FAST + float3 localPos = float3( + (vertexIdx & 1) ? 1.0f : -1.0f, + (vertexIdx & 2) ? 1.0f : -1.0f, + (vertexIdx & 4) ? 1.0f : -1.0f); + + float transformedZ = dot(pc.modelMatrix[2].xyz, localPos) + pc.modelMatrix[2].w; + return transformedZ < 0.0f; +#else + return corners[vertexIdx].z < 0.0f; +#endif } -#include "Drawing.hlsl" +float3 getVertex(int vertexIdx) +{ +#if FAST + // Reconstruct local cube corner from index bits + float sx = (vertexIdx & 1) ? 1.0f : -1.0f; + float sy = (vertexIdx & 2) ? 1.0f : -1.0f; + float sz = (vertexIdx & 4) ? 1.0f : -1.0f; + + float4x3 model = transpose(pc.modelMatrix); + + // Transform to world + // Full position, not just Z like getVertexZNeg + return model[0].xyz * sx + + model[1].xyz * sy + + model[2].xyz * sz + + model[3].xyz; + // return mul(pc.modelMatrix, float4(sx, sy, sz, 1.0f)); +#else + return corners[vertexIdx]; +#endif +} +#include "Drawing.hlsl" -void setDebugData(uint32_t sil, int3 region, int configIndex, uint32_t clippedVertexCount) +void setDebugData(uint32_t sil, int3 region, int configIndex) { #if DEBUG_DATA - DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); - DebugDataBuffer[0].region = uint3(region); - DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); - DebugDataBuffer[0].clippedVertexCount = clippedVertexCount; - for (int i = 0; i < 6; i++) - { - DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); - } - DebugDataBuffer[0].silhouette = sil; + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + DebugDataBuffer[0].region = uint3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + for (int i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; #endif } float2 toCircleSpace(float2 uv) { - float2 p = uv * 2.0f - 1.0f; - float aspect = pc.viewport.z / pc.viewport.w; - p.x *= aspect; - return p; + float2 p = uv * 2.0f - 1.0f; + float aspect = pc.viewport.z / pc.viewport.w; + p.x *= aspect; + return p; } -uint32_t packSilhouette(const int s[7]) +uint32_t packSilhouette(const int s[7]) { - uint32_t packed = 0; - int size = s[0] & 0x7; // 3 bits for size - - // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) - for (int i = 1; i <= 6; ++i) { - int v = s[i]; - if (v < 0) v = 0; // replace unused vertices with 0 - packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) - } - - // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) - packed |= (size & 0x7) << 29; - - return packed; + uint32_t packed = 0; + int size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (int i = 1; i <= 6; ++i) + { + int v = s[i]; + if (v < 0) + v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; } void computeCubeGeo() { - for (int i = 0; i < 8; i++) - for (int i = 0; i < 8; i++) - { - float3 localPos = constCorners[i]; - float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - corners[i] = worldPos.xyz; - faceCenters[i / 4] += worldPos / 4.0f; - faceCenters[2 + i % 2] += worldPos / 4.0f; - faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; - } + for (int i = 0; i < 8; i++) + { + float3 localPos = constCorners[i]; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + corners[i] = worldPos.xyz; + faceCenters[i / 4] += worldPos / 4.0f; + faceCenters[2 + i % 2] += worldPos / 4.0f; + faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; + } +} + +// Helper to draw an edge with proper color mapping +float4 drawEdge(int originalEdgeIdx, float3 pts[2], float3 spherePos, float aaWidth, float width = 0.01f) +{ + float4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); +}; + +float4 drawSilhouette(uint32_t vertexCount, uint32_t sil, float3 spherePos, float aaWidth) +{ + float4 color = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (int i = 0; i < 4; i++) + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + + if (vertexCount == 6) + { + NBL_UNROLL + for (int i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + int clipCount = countbits(clipMask); + + // Early exit if fully clipped + if (clipCount == vertexCount) + return color; + + // No clipping needed - fast path + if (clipCount == 0) + { + for (int i = 0; i < vertexCount; i++) + { + int i0 = i; + int i1 = (i + 1) % vertexCount; + + float3 v0 = getVertex(getSilhouetteVertex(sil, i0)); + float3 v1 = getVertex(getSilhouetteVertex(sil, i1)); + float3 pts[2] = {v0, v1}; + + color += drawEdge(i1, pts, spherePos, aaWidth); + } + return color; + } + + // Rotate clip mask so positives come first + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + bool wrapAround = ((clipMask & 1u) != 0u) && + ((clipMask & (1u << (vertexCount - 1))) != 0u); + int rotateAmount = wrapAround + ? firstbitlow(invertedMask) // -> First POSITIVE + : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative + + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + + int positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + int lastPosIdx = positiveCount - 1; + int firstNegIdx = positiveCount; + float3 vLastPos = getVertex(getSilhouetteVertex(rotatedSil, lastPosIdx)); + float3 vFirstNeg = getVertex(getSilhouetteVertex(rotatedSil, firstNegIdx)); + float t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float3 clipA = lerp(vLastPos, vFirstNeg, t); + + float3 vLastNeg = getVertex(getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float3 vFirstPos = getVertex(getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float3 clipB = lerp(vLastNeg, vFirstPos, t); + + // Draw positive edges + NBL_UNROLL + for (int i = 0; i < positiveCount; i++) + { + + float3 v0 = getVertex(getSilhouetteVertex(rotatedSil, i)); + bool useClipA = (i == positiveCount - 1); + float3 v1 = useClipA ? clipA : getVertex(getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); + + float3 pts[2] = {v0, v1}; + color += drawEdge(i + 1, pts, spherePos, aaWidth); + } + + // NP edge + if (clipCount > 0 && clipCount < vertexCount) + { + float3 vFirst = getVertex(getSilhouetteVertex(rotatedSil, 0)); + float3 npPts[2] = {clipB, vFirst}; + color += drawEdge(0, npPts, spherePos, aaWidth); + } + + // Horizon arc + if (clipCount > 0 && clipCount < vertexCount) + { + float3 arcPts[2] = {clipA, clipB}; + color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); + } + +#if DEBUG_DATA + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + { + int transitions = 0; + for (int i = 0; i < vertexCount; i++) + { + bool a = (rotatedClipMask >> i) & 1u; + bool b = (rotatedClipMask >> ((i + 1) % vertexCount)) & 1u; + if (a != b) + transitions++; + } + // transitions must be 0 or 2 + DebugDataBuffer[0].MoreThanTwoBitTransitions = transitions > 2; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; + } +#endif + return color; } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float4 color = float4(0, 0, 0, 0); - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - float2 p = toCircleSpace(vx.uv); + float4 color = float4(0, 0, 0, 0); + for (int i = 0; i < 1; i++) + { + + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + float2 p = toCircleSpace(vx.uv); - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); - float3 spherePos; - if (r2 <= 1.0f) - { - spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); - } - else - { - float uv2Plus1 = r2 + 1.0f; - spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; - } - spherePos = normalize(spherePos); + float3 spherePos; + if (r2 <= 1.0f) + { + spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float uv2Plus1 = r2 + 1.0f; + spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); - computeCubeGeo(); + computeCubeGeo(); - float4x3 columnModel = transpose(pc.modelMatrix); + float4x3 columnModel = transpose(pc.modelMatrix); - float3 obbCenter = columnModel[3].xyz; + float3 obbCenter = columnModel[3].xyz; - float3x3 upper3x3 = (float3x3)columnModel; + float3x3 upper3x3 = (float3x3)columnModel; - float3 rcpScales = rcp(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) - )); + float3 rcpSqScales = rcp(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); - float3 normalizedProj = mul(upper3x3, obbCenter) * rcpScales; + float3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - int3 region = int3( - normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), - normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), - normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1) - ); - int configIndex = region.x + region.y * 3 + region.z * 9; + int3 region = int3( + normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), + normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), + normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1)); - // uint32_t sil = packSilhouette(silhouettes[configIndex]); - uint32_t sil = binSilhouettes[configIndex]; + int configIndex = region.x + region.y * 3 + region.z * 9; - int vertexCount = getSilhouetteSize(sil); - bool longSilhouette = (vertexCount == 6); - uint32_t silEdgeMask = 0; + // uint32_t sil = packSilhouette(silhouettes[configIndex]); + uint32_t sil = binSilhouettes[configIndex]; + + int vertexCount = getSilhouetteSize(sil); + uint32_t silEdgeMask = 0; #if DEBUG_DATA - { - for (int i = 0; i < vertexCount; i++) - { - int vIdx = i % vertexCount; - int v1Idx = (i + 1) % vertexCount; - - int v0Corner = getSilhouetteVertex(sil, vIdx); - int v1Corner = getSilhouetteVertex(sil, v1Idx); - // Mark edge as part of silhouette - for (int e = 0; e < 12; e++) - { - int2 edge = allEdges[e]; - if ((edge.x == v0Corner && edge.y == v1Corner) || - (edge.x == v1Corner && edge.y == v0Corner)) - { - silEdgeMask |= (1u << e); - } - } - } - validateEdgeVisibility(sil, vertexCount, silEdgeMask); - } + { + for (int i = 0; i < vertexCount; i++) + { + int vIdx = i % vertexCount; + int v1Idx = (i + 1) % vertexCount; + + int v0Corner = getSilhouetteVertex(sil, vIdx); + int v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (int e = 0; e < 12; e++) + { + int2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(sil, vertexCount, silEdgeMask); + } #endif - // Build clip mask for vertices below horizon (z < 0) - uint32_t clipMask = 0u; - NBL_UNROLL - for (int i = 0; i < 6; i++) - { - if (i >= vertexCount) break; - clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; - } - - int clipCount = countbits(clipMask); - - // Total clipped vertices - int clippedVertCount = vertexCount + (clipMask != 0u ? (2 - clipCount) : 0); - - // Find rotation amount to place positive vertices first - int rotateAmount = 0; - if (clipMask != 0u) - { - uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask >> (vertexCount - 1)) & 1u); - - rotateAmount = wrapAround ? - ((firstbithigh(invertedMask) + 1) % vertexCount) : - firstbitlow(clipMask); - } - - // Rotate silhouette bits - uint32_t vertexBits = sil & 0x1FFFFFFF; - uint32_t rotatedVertexBits = rotr(vertexBits, rotateAmount * 3, vertexCount * 3); - uint32_t rotatedSil = (sil & 0xE0000000) | rotatedVertexBits; - - // Rotate the clip mask to match - uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); - - // Draw clipped silhouette edges - for (int i = 0; i < clippedVertCount; i++) - { - int nextI = (i + 1) % clippedVertCount; - - int vIdx = i % vertexCount; - int v1Idx = nextI % vertexCount; - - // Extract clip bits directly - bool v0Clipped = (rotatedClipMask >> vIdx) & 1u; - bool v1Clipped = (rotatedClipMask >> v1Idx) & 1u; - - // Skip if both clipped - if (v0Clipped && v1Clipped) continue; - - int v0Corner = getSilhouetteVertex(rotatedSil, vIdx); - int v1Corner = getSilhouetteVertex(rotatedSil, v1Idx); - - float3 v0 = normalize(corners[v0Corner]); - float3 v1 = normalize(corners[v1Corner]); - - float3 points[2] = { corners[v0Corner], corners[v1Corner] }; - - // Clip using bit state - if (v0Clipped) - { - float t = v0.z / (v0.z - v1.z); - points[0] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); - } - else if (v1Clipped) - { - float t = v0.z / (v0.z - v1.z); - points[1] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); - } - - // Draw edge - float4 edgeContribution = drawGreatCircleArc(spherePos, points, 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - - } - - - setDebugData(sil, region, configIndex, clippedVertCount); - - color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); - color += drawCorners(spherePos, p, aaWidth); - color += drawRing(p, aaWidth); - - if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } - - return color; + + uint32_t positiveCount = 0; + color += drawSilhouette(vertexCount, sil, spherePos, aaWidth); + setDebugData(sil, region, configIndex); + + color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); + color += drawCorners(spherePos, p, aaWidth); + color += drawRing(p, aaWidth); + + if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + } + + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index 3c87a48bc..c8532e796 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #define DEBUG_DATA 1 +#define FAST 1 namespace nbl { @@ -13,12 +14,19 @@ namespace nbl { uint32_t3 region; uint32_t silhouetteIndex; - + uint32_t silhouetteVertexCount; uint32_t silhouette; - uint32_t clippedVertexCount; + uint32_t positiveVertCount; uint32_t edgeVisibilityMismatch; + uint32_t clipMask; + uint32_t clipCount; + uint32_t rotatedSil; + uint32_t wrapAround; + uint32_t rotatedClipMask; + uint32_t rotateAmount; + uint32_t MoreThanTwoBitTransitions; uint32_t vertices[6]; }; @@ -29,24 +37,22 @@ namespace nbl }; static const float32_t3 colorLUT[27] = { - float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), - float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), - float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), - float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), - float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), - float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), - float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), - float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), - float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1) - }; + float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1)}; #ifndef __HLSL_VERSION - static const char* colorNames[27] = {"Black", - "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", - "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", - "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", - "Tan/Beige", "Dark Brown" - }; + static const char *colorNames[27] = {"Black", + "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown"}; #endif // __HLSL_VERSION } } diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 1c52547af..64f4cb100 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -475,13 +475,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB - // TODO: a better way to get identity matrix - float32_t3x4 origin = { - 1.0f,0.0f,0.0f,0.0f, - 0.0f,1.0f,0.0f,0.0f, - 0.0f,0.0f,1.0f,0.0f - }; - memcpy(&instance.world, &origin, sizeof(instance.world)); + instance.world = float32_t3x4(1.0f); instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk m_renderer->render(cb, viewParams); } @@ -1112,8 +1106,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); - ImGui::Text("silhouette Clipped VertexCount: %u", m_GPUOutResulData.clippedVertexCount); + ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Text("More Than Two Bit Transitions: %s", m_GPUOutResulData.MoreThanTwoBitTransitions ? "true" : "false"); { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1141,12 +1136,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; } - if (!m_GPUOutResulData.edgeVisibilityMismatch) + if (!m_GPUOutResulData.edgeVisibilityMismatch || !m_GPUOutResulData.MoreThanTwoBitTransitions) { // Reset flag when mismatch is cleared modalShown = false; } - if (m_GPUOutResulData.edgeVisibilityMismatch && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.MoreThanTwoBitTransitions) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care { // Open modal popup only once per configuration ImGui::OpenPopup("Edge Visibility Mismatch Warning"); @@ -1165,10 +1160,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Show configuration info ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); - ImGui::TextWrapped("Region: (%d, %d, %d)", - m_GPUOutResulData.region.x, - m_GPUOutResulData.region.y, - m_GPUOutResulData.region.z); + ImGui::TextWrapped("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); ImGui::Spacing(); ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); @@ -1203,13 +1195,26 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Separator(); // Silhouette mask printed in binary - char buf[33]; - for (int i = 0; i < 32; i++) - buf[i] = (m_GPUOutResulData.silhouette & (1u << (31 - i))) ? '1' : '0'; - buf[32] = '\0'; - ImGui::Text("silhouette: 0x%08X", m_GPUOutResulData.silhouette); - ImGui::Text("binary: %s", buf); + + auto printBin = [](uint32_t bin, const char* name) + { + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + ImGui::Text("%s: 0x%08X", name, bin); + ImGui::Text("binary: 0b%s", buf); + ImGui::Separator(); + }; + printBin(m_GPUOutResulData.silhouette, "Silhouette"); + printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); + + printBin(m_GPUOutResulData.clipCount, "clipCount"); + printBin(m_GPUOutResulData.clipMask, "clipMask"); + printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); + printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); + printBin(m_GPUOutResulData.wrapAround, "wrapAround"); } ImGui::End(); } @@ -1240,29 +1245,56 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR }; static RandomSampler rng(69); // Initialize RNG with seed + + // Helper function to check if cube intersects unit sphere at origin + auto isCubeOutsideUnitSphere = [](const float32_t3& translation, const float32_t3& scale) -> bool { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; + + static TRS lastTRS = {}; if (ImGui::Button("Randomize Translation")) { - m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); } ImGui::SameLine(); - if (ImGui::Button("Randomize Rotation")) { + lastTRS = m_TRS; // Backup before randomizing m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); } ImGui::SameLine(); - if (ImGui::Button("Randomize Scale")) { - m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); } - - ImGui::SameLine(); + //ImGui::SameLine(); if (ImGui::Button("Randomize All")) { - m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); - m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); - m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Revert to Last")) + { + m_TRS = lastTRS; // Restore backed-up TRS } addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); From 1cf7c7cf05e0c2aeb4c8e657fdfe62443a7bc898 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 15:10:53 +0700 Subject: [PATCH 117/219] refactor with new changes --- 34_DebugDraw/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 153a387c5..86cbc8010 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -117,8 +117,8 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti SPushConstantRange simplePcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, - .size = sizeof(ext::debug_draw::SSinglePushConstants) + .offset = offsetof(ext::debug_draw::PushConstants, spc), + .size = sizeof(ext::debug_draw::SSinglePC) }; { ext::debug_draw::DrawAABB::SCreationParameters params = {}; From 12b3de5f3f60601c4c3d6c8cd21fe1b1e0edb600 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 15:21:49 +0700 Subject: [PATCH 118/219] removed unused files, removed unused vars in ex 34 --- 34_DebugDraw/config.json.template | 28 --------------- 34_DebugDraw/main.cpp | 59 ++++--------------------------- 34_DebugDraw/pipeline.groovy | 50 -------------------------- 3 files changed, 6 insertions(+), 131 deletions(-) delete mode 100644 34_DebugDraw/config.json.template delete mode 100644 34_DebugDraw/pipeline.groovy diff --git a/34_DebugDraw/config.json.template b/34_DebugDraw/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/34_DebugDraw/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 86cbc8010..6f699f091 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -53,9 +53,10 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti return false; { + constexpr float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; core::vectorSIMDf cameraPosition(14, 8, 12); core::vectorSIMDf cameraTarget(0, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, zNear, zFar); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), float(WIN_W) / WIN_H, zNear, zFar); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, moveSpeed, rotateSpeed); } @@ -239,7 +240,8 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti drawParams.commandBuffer = cmdbuf; drawParams.cameraMat = viewProjectionMatrix; - drawAABB->renderSingle(drawParams, testAABB, float32_t4{ 1, 0, 0, 1 }); + if (!drawAABB->renderSingle(drawParams, testAABB, float32_t4{ 1, 0, 0, 1 })) + m_logger->log("Unable to draw AABB with single draw pipeline!", ILogger::ELL_ERROR); { using aabb_t = hlsl::shapes::AABB<3, float>; using point_t = aabb_t::point_t; @@ -265,7 +267,8 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti } const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; - drawAABB->render(drawParams, drawFinished, aabbInstances); + if (!drawAABB->render(drawParams, drawFinished, aabbInstances)) + m_logger->log("Unable to draw AABBs with instanced draw pipeline!", ILogger::ELL_ERROR); } cmdbuf->endRenderPass(); @@ -341,52 +344,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti } private: - std::array getVerticesFromAABB(core::aabbox3d& aabb) - { - const auto& pMin = aabb.MinEdge; - const auto& pMax = aabb.MaxEdge; - - std::array vertices; - vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); - - vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); - vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); - vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); - - return vertices; - } - // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers constexpr static inline uint32_t MaxFramesInFlight = 3u; smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; - smart_refctd_ptr m_pipeline; - smart_refctd_ptr m_streamingPipeline; smart_refctd_ptr m_semaphore; smart_refctd_ptr m_cmdPool; uint64_t m_realFrameIx = 0; @@ -397,20 +359,11 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - core::smart_refctd_ptr m_descriptorSetPool; - Camera camera; video::CDumbPresentationOracle oracle; - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - - float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - smart_refctd_ptr drawAABB; hlsl::shapes::AABB<3, float> testAABB = hlsl::shapes::AABB<3, float>{ { -5, -5, -5 }, { 10, 10, -10 } }; - - using streaming_buffer_t = video::StreamingTransientDataBufferST>; - smart_refctd_ptr streamingBuffer; }; NBL_MAIN_FUNC(DebugDrawSampleApp) \ No newline at end of file diff --git a/34_DebugDraw/pipeline.groovy b/34_DebugDraw/pipeline.groovy deleted file mode 100644 index 4c0efec03..000000000 --- a/34_DebugDraw/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CDebugDrawBuilder extends IBuilder -{ - public CDebugDrawBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CDebugDrawBuilder(_agent, _info) -} - -return this \ No newline at end of file From b8b2c638f0c3fcc6a34ec9ca10d4016e46095a64 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 15:22:08 +0700 Subject: [PATCH 119/219] removed unused files in ex 12 --- 12_MeshLoaders/config.json.template | 28 ---------------- 12_MeshLoaders/pipeline.groovy | 50 ----------------------------- 2 files changed, 78 deletions(-) delete mode 100644 12_MeshLoaders/config.json.template delete mode 100644 12_MeshLoaders/pipeline.groovy diff --git a/12_MeshLoaders/config.json.template b/12_MeshLoaders/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/12_MeshLoaders/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/12_MeshLoaders/pipeline.groovy b/12_MeshLoaders/pipeline.groovy deleted file mode 100644 index 7b7c9702a..000000000 --- a/12_MeshLoaders/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CUIBuilder extends IBuilder -{ - public CUIBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CUIBuilder(_agent, _info) -} - -return this \ No newline at end of file From d20b9c67bf5ef5c4d782a13709a78ce59b24e1e4 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 12:20:20 +0100 Subject: [PATCH 120/219] update Mitsuba unit test cases --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index f895f4e3d..7501fce9e 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit f895f4e3d1f72c772267143fe60f891bfe9e8e82 +Subproject commit 7501fce9e2a23d863b30ea8f5bc8d9bee0925cf9 From 8333763f44407f45dfc47d5ae4b1bed3f6d8fb9e Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 01:23:22 +0100 Subject: [PATCH 121/219] can parse all public mitsuba scenes! --- 14_MitsubaLoader/test_scenes.txt | 31 +++++++++++++++++++++++++++---- media | 2 +- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/14_MitsubaLoader/test_scenes.txt b/14_MitsubaLoader/test_scenes.txt index 4211e3cb6..c0228b4c2 100644 --- a/14_MitsubaLoader/test_scenes.txt +++ b/14_MitsubaLoader/test_scenes.txt @@ -1,7 +1,30 @@ ; Here is my Commented line that batch file will skip (started with semicolons) "../media/mitsuba/shapetest.xml" "../media/mitsuba/daily_pt.xml" -;"../media/mitsuba/brdf_eval_test.xml" -;"../media/mitsuba/brdf_eval_test_as.xml" -;"../media/mitsuba/brdf_eval_test_diffuse.xml" -;"../media/mitsuba/brdf_eval_test_lambert.xml" +"../media/mitsuba/brdf_eval_test.xml" +"../media/mitsuba/brdf_eval_test_as.xml" +"../media/mitsuba/brdf_eval_test_diffuse.xml" +"../media/mitsuba/brdf_eval_test_lambert.xml" +"../media/mitsuba/aniso_ies/72_render_0_2.xml" +"../media/mitsuba/bathroom/scene.xml" +"../media/mitsuba/bathroom2/scene.xml" +"../media/mitsuba/bedroom/scene.xml" +"../media/mitsuba/car2/scene.xml" +"../media/mitsuba/coffee/scene.xml" +;"../media/mitsuba/classroom/scene.xml" ; skip because is not supported +"../media/mitsuba/ditt/render_720p.xml" +"../media/mitsuba/ditt/render_2160p.xml" +"../media/mitsuba/ditt/render_cube_lh.xml" +"../media/mitsuba/ditt/render_cube_rh.xml" +"../media/mitsuba/glass-of-water/scene.xml" +"../media/mitsuba/kitchen/scene.xml" +;"../media/mitsuba/lamp/scene.xml" ; skip because is not supported +"../media/mitsuba/living-room/scene.xml" +"../media/mitsuba/living-room-2/scene.xml" +"../media/mitsuba/iso_ies/71_render_0_2.xml" +"../media/mitsuba/messed_up_uvs/31_scene_0_1.xml" +"../media/mitsuba/normalmap_test/render_withnormalmap.xml" +"../media/mitsuba/normalmap_test/render_withoutnormalmap.xml" +"../media/mitsuba/spaceship/scene.xml" +"../media/mitsuba/staircase/scene.xml" +;"../media/mitsuba/staircase2/scene.xml" ; skip because is not supported diff --git a/media b/media index 7501fce9e..9543ffa4f 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 7501fce9e2a23d863b30ea8f5bc8d9bee0925cf9 +Subproject commit 9543ffa4f2deaf2d975bf18bba5782ce836bcc9e From 513c5a736539086c97227643c62c4fbcf2eafa1a Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 02:01:36 +0100 Subject: [PATCH 122/219] oopsie pushed a dangling submodule pointer last commit --- 14_MitsubaLoader/test_scenes.txt | 2 +- media | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/14_MitsubaLoader/test_scenes.txt b/14_MitsubaLoader/test_scenes.txt index c0228b4c2..a5876752c 100644 --- a/14_MitsubaLoader/test_scenes.txt +++ b/14_MitsubaLoader/test_scenes.txt @@ -8,7 +8,7 @@ "../media/mitsuba/aniso_ies/72_render_0_2.xml" "../media/mitsuba/bathroom/scene.xml" "../media/mitsuba/bathroom2/scene.xml" -"../media/mitsuba/bedroom/scene.xml" +;"../media/mitsuba/bedroom/scene.xml" ; we'd need to commit uncompressed 100MB OBJ, and this example doesn't load from ZIP "../media/mitsuba/car2/scene.xml" "../media/mitsuba/coffee/scene.xml" ;"../media/mitsuba/classroom/scene.xml" ; skip because is not supported diff --git a/media b/media index 9543ffa4f..0f7ad42b3 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 9543ffa4f2deaf2d975bf18bba5782ce836bcc9e +Subproject commit 0f7ad42b33abe3143a5d69c4d14b26cf3e538c88 From 3db6e3cef42467eb3fda53f59b78264e43c31ba8 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 02:13:13 +0100 Subject: [PATCH 123/219] make example 14 text parser compatible with CI inputs for odl example 22 --- 14_MitsubaLoader/main.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/14_MitsubaLoader/main.cpp b/14_MitsubaLoader/main.cpp index b698340f2..2bd96ce16 100644 --- a/14_MitsubaLoader/main.cpp +++ b/14_MitsubaLoader/main.cpp @@ -55,6 +55,7 @@ class MitsubaLoaderTest final : public BuiltinResourcesApplication while (++retval Date: Wed, 24 Dec 2025 02:20:07 +0100 Subject: [PATCH 124/219] renumber Mitsuba Loader example to 15 --- {14_MitsubaLoader => 15_MitsubaLoader}/CMakeLists.txt | 0 {14_MitsubaLoader => 15_MitsubaLoader}/main.cpp | 0 {14_MitsubaLoader => 15_MitsubaLoader}/test_scenes.txt | 0 CMakeLists.txt | 2 +- 4 files changed, 1 insertion(+), 1 deletion(-) rename {14_MitsubaLoader => 15_MitsubaLoader}/CMakeLists.txt (100%) rename {14_MitsubaLoader => 15_MitsubaLoader}/main.cpp (100%) rename {14_MitsubaLoader => 15_MitsubaLoader}/test_scenes.txt (100%) diff --git a/14_MitsubaLoader/CMakeLists.txt b/15_MitsubaLoader/CMakeLists.txt similarity index 100% rename from 14_MitsubaLoader/CMakeLists.txt rename to 15_MitsubaLoader/CMakeLists.txt diff --git a/14_MitsubaLoader/main.cpp b/15_MitsubaLoader/main.cpp similarity index 100% rename from 14_MitsubaLoader/main.cpp rename to 15_MitsubaLoader/main.cpp diff --git a/14_MitsubaLoader/test_scenes.txt b/15_MitsubaLoader/test_scenes.txt similarity index 100% rename from 14_MitsubaLoader/test_scenes.txt rename to 15_MitsubaLoader/test_scenes.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a7c6be29..5eb2769d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(13_MaterialCompilerTest) # if (NBL_BUILD_MITSUBA_LOADER) - add_subdirectory(14_MitsubaLoader) + add_subdirectory(15_MitsubaLoader) endif() # Waiting for a refactor From 0a32a90dac1a7c694fb8ab2fe45e528f462c57b6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 04:37:09 +0100 Subject: [PATCH 125/219] UI updates & new modes --- 50.IESViewer/App.hpp | 20 +- 50.IESViewer/AppEvent.cpp | 6 +- 50.IESViewer/AppInit.cpp | 105 +-- 50.IESViewer/AppRender.cpp | 212 +++++- 50.IESViewer/AppUI.cpp | 640 ++++++++++++++---- 50.IESViewer/CMakeLists.txt | 3 +- 50.IESViewer/CSimpleIESRenderer.hpp | 15 +- 50.IESViewer/IES.cpp | 31 +- 50.IESViewer/IES.hpp | 13 +- 50.IESViewer/app_resources/common.hlsl | 3 +- 50.IESViewer/app_resources/false_color.hlsl | 74 ++ 50.IESViewer/app_resources/ies.unified.hlsl | 52 +- .../examples/common/MonoWindowApplication.hpp | 4 +- 13 files changed, 938 insertions(+), 240 deletions(-) create mode 100644 50.IESViewer/app_resources/false_color.hlsl diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index 6d7577016..af6d48792 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -63,6 +63,19 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr m_scene; smart_refctd_ptr m_renderer; Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // TODO: orbit would be better + uint32_t m_plot3DWidth = 640u; + uint32_t m_plot3DHeight = 640u; + float m_plotRadius = 100.0f; + float m_cameraMoveSpeed = 1.0f; + float m_cameraRotateSpeed = 1.0f; + float m_cameraFovDeg = 60.0f; + bool m_cameraControlEnabled = false; + bool m_cameraControlApplied = false; + bool m_fullscreen3D = false; + bool m_wireframeEnabled = false; + bool m_showOctaMapPreview = true; + std::vector m_assetLabels; + std::vector m_candelaDirty; InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; @@ -74,7 +87,8 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp struct { IES::E_MODE view = IES::EM_CDC; - bitflag sphere = this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE; + bitflag sphere = + bitflag(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | this_example::ies::ESM_FALSE_COLOR; } mode; void processMouse(const IMouseEventChannel::range_t& events); @@ -83,6 +97,8 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, bitflag usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT, bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); + bool recreate3DPlotFramebuffers(uint32_t width, uint32_t height); + void applyWindowMode(); template requires AppIESBufferCreationAllowed @@ -105,4 +121,4 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp void uiListener(); }; -#endif // _THIS_EXAMPLE_APP_HPP_ \ No newline at end of file +#endif // _THIS_EXAMPLE_APP_HPP_ diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index 672a77e21..ae040f9f2 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -40,9 +40,13 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e mode.view = IES::EM_CDC; else if (ev.keyCode == nbl::ui::EKC_V) mode.view = IES::EM_OCTAHEDRAL_MAP; + else if (ev.keyCode == nbl::ui::EKC_ESCAPE && m_cameraControlEnabled) + m_cameraControlEnabled = false; + else if (ev.keyCode == nbl::ui::EKC_SPACE) + m_cameraControlEnabled = !m_cameraControlEnabled; if (ev.keyCode == nbl::ui::EKC_Q) m_running = false; } } -} \ No newline at end of file +} diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index cb51bf87a..b0ef40473 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -7,7 +7,6 @@ #include "app_resources/common.hlsl" #include "app_resources/imgui.opts.hlsl" #include "nbl/this_example/builtin/build/spirv/keys.hpp" - #define MEDIA_ENTRY "../../media" #define INPUT_JSON_FILE "../inputs.json" @@ -61,6 +60,13 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto took = std::to_string(elapsed.count()); m_logger->log("Finished loading IES m_assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); } + { + m_assetLabels.clear(); + m_assetLabels.reserve(m_assets.size()); + for (const auto& ies : m_assets) + m_assetLabels.emplace_back(path(ies.key).filename().string()); + } + m_candelaDirty.assign(m_assets.size(), true); m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); { @@ -76,7 +82,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); const auto& resolution = accessor.properties.optimalIESResolution; - textureInfosMapped[i] = CIESProfile::texture_t::createInfo(accessor, resolution, 0.f, true); + textureInfosMapped[i] = CIESProfile::texture_t::createInfo(accessor, resolution, ies.flatten, true); ies.buffers.textureInfo.buffer = textureInfos; ies.buffers.textureInfo.offset = i * sizeof(IESTextureInfo); @@ -248,42 +254,38 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), m_descriptors.data()); { - constexpr auto ViewsCount = 1u; // used to be 4u with debug maps (counted x2 for RO & RW binding but one descriptor) - std::array, ViewsCount + 1u> infos; - #define FILL_INFO(DESC, IX) \ - { \ - auto& info = infos[IX].emplace_back(); \ - info.desc = DESC; \ - info.info.image.imageLayout = IImage::LAYOUT::GENERAL; \ - } + constexpr auto ViewsCount = 1u; // used to be 4u with debug maps (counted x2 for RO & RW binding but one descriptor) + std::array, ViewsCount * 2u + 1u> infos; + auto addInfo = [](auto& list, auto desc, IImage::LAYOUT layout) + { + auto& info = list.emplace_back(); + info.desc = desc; + info.info.image.imageLayout = layout; + }; for (uint32_t i = 0; i < m_assets.size(); ++i) { auto& ies = m_assets[i]; - - FILL_INFO(ies.views.candelaOctahedralMap, 0u) + addInfo(infos[0u], ies.views.candelaOctahedralMap, IImage::LAYOUT::READ_ONLY_OPTIMAL); + addInfo(infos[1u], ies.views.candelaOctahedralMap, IImage::LAYOUT::GENERAL); } - FILL_INFO(generalSampler, ViewsCount); + addInfo(infos.back(), generalSampler, IImage::LAYOUT::READ_ONLY_OPTIMAL); auto* samplerInfo = infos.back().data(); - samplerInfo->info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writes; - for (uint32_t i = 0; i < ViewsCount; ++i) - { - auto& write = writes[i]; - write.count = m_assets.size(); - write.info = infos[i].data(); - write.dstSet = m_descriptors[0u].get(); - write.arrayElement = 0u; - write.binding = i; - } - for (uint32_t i = ViewsCount; i < ViewsCount*2u; ++i) - { - auto ix = i - ViewsCount; - auto& write = writes[i] = writes[ix]; - write.binding = ix + 10u; - } + std::array writes = {}; + auto& sampledWrite = writes[0u]; + sampledWrite.count = m_assets.size(); + sampledWrite.info = infos[0u].data(); + sampledWrite.dstSet = m_descriptors[0u].get(); + sampledWrite.arrayElement = 0u; + sampledWrite.binding = 0u; + + auto& storageWrite = writes[1u]; + storageWrite.count = m_assets.size(); + storageWrite.info = infos[1u].data(); + storageWrite.dstSet = m_descriptors[0u].get(); + storageWrite.arrayElement = 0u; + storageWrite.binding = 10u; auto& write = writes.back(); write.count = 1u; @@ -311,10 +313,12 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) auto ixs = std::to_string(i); // TODO: may actually change it, temporary hardcoding - constexpr auto WIDTH = 640, HEIGHT = 640; + constexpr auto WIDTH = 640; + constexpr auto HEIGHT_2D = WIDTH * 2; + constexpr auto HEIGHT_3D = WIDTH; { - auto color = createImageView(WIDTH, HEIGHT, EF_R8G8B8A8_SRGB, "[2D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + auto color = createImageView(WIDTH, HEIGHT_2D, EF_R8G8B8A8_SRGB, "[2D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); fb2D = m_device->createFramebuffer ( { { @@ -322,14 +326,14 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .depthStencilAttachments = nullptr, .colorAttachments = &color.get(), .width = WIDTH, - .height = HEIGHT + .height = HEIGHT_2D } } ); } { - auto color = createImageView(WIDTH, HEIGHT, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); - auto depth = createImageView(WIDTH, HEIGHT, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + auto color = createImageView(WIDTH, HEIGHT_3D, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + auto depth = createImageView(WIDTH, HEIGHT_3D, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); fb3D = m_device->createFramebuffer ( @@ -338,7 +342,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) .depthStencilAttachments = &depth.get(), .colorAttachments = &color.get(), .width = WIDTH, - .height = HEIGHT + .height = HEIGHT_3D } } ); } @@ -411,14 +415,19 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); + const auto cameraOffset = cameraPosition - cameraTarget; + cameraPosition = cameraTarget + cameraOffset * 1.5f; #ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_window->getWidth()) / float(m_window->getHeight()), 0.1, 10000); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(m_window->getWidth()) / float(m_window->getHeight()), 0.1, 10000); #else const auto& params = m_frameBuffers3D.front()->getCreationParameters(); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(params.width) / float(params.height), 0.1, 10000); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(params.width) / float(params.height), 0.1, 10000); #endif camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + m_cameraMoveSpeed = camera.getMoveSpeed(); + m_cameraRotateSpeed = camera.getRotateSpeed(); + m_cameraControlApplied = !m_cameraControlEnabled; } #ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY @@ -574,6 +583,22 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } onAppInitializedFinish(); + if (m_window && m_winMgr) + applyWindowMode(); return true; -} \ No newline at end of file +} + +void IESViewer::applyWindowMode() +{ + if (!m_window || !m_winMgr) + return; + + m_winMgr->maximize(m_window.get()); + + if (m_surface) + { + if (auto* scRes = m_surface->getSwapchainResources()) + scRes->invalidate(); + } +} diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 4074c7f0c..d4ff5538d 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -5,6 +5,85 @@ #include "App.hpp" #include "app_resources/common.hlsl" +bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) +{ +#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY + return true; +#else + if (width == 0u || height == 0u) + return false; + + if (width == m_plot3DWidth && height == m_plot3DHeight) + return true; + + m_device->waitIdle(); + m_plot3DWidth = width; + m_plot3DHeight = height; + + auto* scRes = static_cast(m_surface->getSwapchainResources()); + auto renderpass = smart_refctd_ptr(scRes->getRenderpass()); + + for (uint32_t i = 0u; i < m_frameBuffers3D.size(); ++i) + { + auto& fb3D = m_frameBuffers3D[i]; + auto ixs = std::to_string(i); + + auto color = createImageView(width, height, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + if (!color) + return false; + + auto depth = createImageView(width, height, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + if (!depth) + return false; + + fb3D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = &depth.get(), + .colorAttachments = &color.get(), + .width = width, + .height = height + } } + ); + if (!fb3D) + return false; + } + + if (ui.it && ui.descriptor) + { + std::array infos; + for (auto& it : infos) + it.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + auto* ix = infos.data(); + ix->desc = smart_refctd_ptr(ui.it->getFontAtlasView()); + ++ix; + for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) + ix->desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; + for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) + ix->desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; + + const auto texturesBinding = ui.it->getCreationParameters().resources.texturesInfo.bindingIx; + auto writes = std::to_array({ IGPUDescriptorSet::SWriteDescriptorSet{ + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = texturesBinding, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = static_cast(infos.size()), + .info = infos.data() + } }); + + if (!m_device->updateDescriptorSets(writes, {})) + return false; + } + + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(width) / float(height), 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); + + return true; +#endif +} + IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) { const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; @@ -12,10 +91,36 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto scRes = static_cast(m_surface->getSwapchainResources()); + const bool windowFocused = m_window->hasInputFocus() || m_window->hasMouseFocus(); + if (!windowFocused && m_cameraControlEnabled) + m_cameraControlEnabled = false; + const bool wantCameraControl = m_cameraControlEnabled && windowFocused; + + const uint32_t windowWidth = m_window->getWidth(); + const uint32_t windowHeight = m_window->getHeight(); + if (windowWidth == 0u || windowHeight == 0u || m_window->isMinimized()) + return {}; + + if (m_cameraControlApplied != wantCameraControl) + { + m_cameraControlApplied = wantCameraControl; + const float moveSpeed = wantCameraControl ? m_cameraMoveSpeed : 0.0f; + const float rotateSpeed = wantCameraControl ? m_cameraRotateSpeed : 0.0f; + camera.setMoveSpeed(moveSpeed); + camera.setRotateSpeed(rotateSpeed); + } + + + #ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY IGPUFramebuffer* const fb2D = nullptr; auto* const fb3D = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); #else + const uint32_t desired3DWidth = windowWidth; + const uint32_t desired3DHeight = windowHeight; + if (!recreate3DPlotFramebuffers(desired3DWidth, desired3DHeight)) + return {}; + auto* const fb2D = m_frameBuffers2D[resourceIx].get(); auto* const fb3D = m_frameBuffers3D[resourceIx].get(); #endif @@ -32,11 +137,56 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } captured; camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); processMouse(events); for (const auto& e : events) captured.mouse.emplace_back(e); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); processKeyboard(events); for (const auto& e : events) captured.keyboard.emplace_back(e); }, m_logger.get()); + if (windowFocused) + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (wantCameraControl) + camera.mouseProcess(events); + processMouse(events); + for (const auto& e : events) + captured.mouse.emplace_back(e); + }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + camera.keyboardProcess(events); + processKeyboard(events); + for (const auto& e : events) + captured.keyboard.emplace_back(e); + }, m_logger.get()); + } + else + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t&) -> void {}, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t&) -> void {}, m_logger.get()); + } camera.endInputProcessing(nextPresentationTimestamp); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); + { + const float maxRadius = m_plotRadius * 0.98f; + const float clampRadius = maxRadius * 0.999f; + auto pos = camera.getPosition(); + const float dist = core::length(pos)[0]; + if (dist > maxRadius) + { + auto forward = camera.getTarget() - pos; + pos.makeSafe3D(); + pos = core::normalize(pos) * clampRadius; + camera.setPosition(pos); + camera.setTarget(pos + forward); + } + } + + auto* cursorControl = m_window->getCursorControl(); + const auto cursorPosition = cursorControl->getPosition(); + const int32_t windowX = m_window->getX(); + const int32_t windowY = m_window->getY(); + const int32_t windowW = static_cast(m_window->getWidth()); + const int32_t windowH = static_cast(m_window->getHeight()); + const bool cursorInsideWindow = + cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && + cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; + cursorControl->setVisible(!(cursorInsideWindow || m_cameraControlApplied)); ext::imgui::UI::SUpdateParameters params = { .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), @@ -50,6 +200,12 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi #endif } + if (m_cameraControlApplied) + { + if (auto* cursor = m_window->getCursorControl()) + cursor->setRelativePosition(m_window.get(), {0.5f, 0.5f}); + } + auto& ies = m_assets[m_activeAssetIx]; const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); @@ -78,9 +234,13 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } auto* const descriptor = m_descriptors[0].get(); - auto* image = ies.getActiveImage(mode.view); + auto* image = ies.getActiveImage(IES::EM_OCTAHEDRAL_MAP); - // Compute + bool needCompute = true; + if (m_activeAssetIx < m_candelaDirty.size()) + needCompute = m_candelaDirty[m_activeAssetIx]; + + if (needCompute) { cb->beginDebugMarker("IES::compute"); IES::barrier(cb, image); @@ -90,18 +250,20 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(pc), &pc); const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / WORKGROUP_DIMENSION + 1u; cb->dispatch(xGroups, xGroups, 1); + IES::barrier(cb, image); cb->endDebugMarker(); + if (m_activeAssetIx < m_candelaDirty.size()) + m_candelaDirty[m_activeAssetIx] = false; } // Graphics { - IES::barrier(cb, image); - #ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY asset::VkExtent3D extent = { m_window->getWidth(), m_window->getHeight() }; #else auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; #endif + const uint32_t plotHeight = extent.height / 2u; asset::SViewport viewport; { @@ -144,8 +306,29 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto* layout = m_graphicsPipeline->getLayout(); cb->bindGraphicsPipeline(m_graphicsPipeline.get()); cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); - cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc), &pc); + asset::SViewport viewport2D = viewport; + viewport2D.width = static_cast(extent.width); + viewport2D.height = static_cast(plotHeight); + VkRect2D scissor2D = scissor; + scissor2D.extent = { extent.width, plotHeight }; + + auto pc2D = pc; + pc2D.mode = mode.view; + cb->setViewport(0u, 1u, &viewport2D); + cb->setScissor(0u, 1u, &scissor2D); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc2D), &pc2D); ext::FullScreenTriangle::recordDrawCall(cb); + + if (m_showOctaMapPreview) + { + viewport2D.y = static_cast(plotHeight); + scissor2D.offset.y = static_cast(plotHeight); + pc2D.mode = IES::EM_OCTAHEDRAL_MAP; + cb->setViewport(0u, 1u, &viewport2D); + cb->setScissor(0u, 1u, &scissor2D); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc2D), &pc2D); + ext::FullScreenTriangle::recordDrawCall(cb); + } } cb->endRenderPass(); cb->endDebugMarker(); @@ -156,6 +339,14 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi info3D.colorClearValues = &d3clearValue; // tmp info3D.depthStencilClearValues = &depthValue; info3D.framebuffer = fb3D; + auto extent3D = fb3D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; + viewport.width = extent3D.width; + viewport.height = extent3D.height; + cb->setViewport(0u, 1u, &viewport); + scissor.extent = { extent3D.width, extent3D.height }; + cb->setScissor(0u, 1u, &scissor); + currentRenderArea.extent = { extent3D.width, extent3D.height }; + info3D.renderArea = currentRenderArea; cb->beginDebugMarker("IES::graphics 3D plot"); cb->beginRenderPass(info3D, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); { @@ -167,7 +358,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = 100.f, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx, .mode = mode.sphere.value }); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = m_plotRadius, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx, .mode = mode.sphere.value, .wireframe = m_wireframeEnabled }); // tear down scene every frame m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; @@ -180,6 +371,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->beginDebugMarker("IES::graphics ImGUI"); viewport.width = m_window->getWidth(); viewport.height = m_window->getHeight(); + cb->setViewport(0u, 1u, &viewport); scissor.extent = { m_window->getWidth(), m_window->getHeight() }; cb->setScissor(0u, 1u, &scissor); currentRenderArea.extent = { m_window->getWidth(),m_window->getHeight() }; @@ -285,4 +477,4 @@ const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::get IGPURenderpass::SCreationParams::DependenciesEnd }; return dependencies; -} \ No newline at end of file +} diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 308c945d2..eeb673f8a 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -5,6 +5,7 @@ #include "App.hpp" #include "imgui/imgui_internal.h" #include "app_resources/common.hlsl" +#include "app_resources/false_color.hlsl" #include "app_resources/imgui.opts.hlsl" using namespace this_example; @@ -17,148 +18,340 @@ void IESViewer::uiListener() info.textureID = ext::imgui::UI::FontAtlasTexId + resourceIx + 1u; info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; - auto& ies = m_assets[m_activeAssetIx]; - const auto name = path(ies.key).filename().string(); + const ImGuiViewport* vp = ImGui::GetMainViewport(); + const ImVec2 viewportPos = vp->Pos; + const ImVec2 viewportSize = vp->Size; + auto* cursorControl = m_window->getCursorControl(); + const auto cursorPosition = cursorControl ? cursorControl->getPosition() : nbl::ui::ICursorControl::SPosition{}; + const int32_t windowX = m_window->getX(); + const int32_t windowY = m_window->getY(); + const int32_t windowW = static_cast(m_window->getWidth()); + const int32_t windowH = static_cast(m_window->getHeight()); + const bool cursorInsideWindow = cursorControl && + cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && + cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; + ImGui::GetIO().MouseDrawCursor = cursorInsideWindow && !m_cameraControlEnabled; + const ImVec2 bottomSize(viewportSize.x, viewportSize.y); + const ImVec2 bottomPos(viewportPos.x, viewportPos.y); + const auto legendColor = [&](float v, bool useFalseColor) -> ImU32 + { + const float clamped = ImClamp(v, 0.0f, 1.0f); + if (useFalseColor) + { + const auto col = this_example::ies::falseColor(clamped); + return ImGui::ColorConvertFloat4ToU32(ImVec4(col.x, col.y, col.z, 1.0f)); + } + return ImGui::ColorConvertFloat4ToU32(ImVec4(clamped, clamped, clamped, 1.0f)); + }; + std::vector assetLabelPtrs; + assetLabelPtrs.reserve(m_assetLabels.size()); + for (const auto& label : m_assetLabels) + assetLabelPtrs.push_back(label.c_str()); + + size_t activeIx = m_activeAssetIx; + if (activeIx >= m_assets.size()) + activeIx = 0u; + int activeIxUi = static_cast(activeIx); + float candelaValue = 0.0f; + bool candelaValid = false; + ImVec2 plotRectMin(0.f, 0.f); + ImVec2 plotRectMax(0.f, 0.f); + bool plotRectValid = false; + bool plotHovered = false; + + auto& ies = m_assets[activeIx]; auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); - const auto& properties = accessor.getProperties(); + const auto& properties = accessor.getProperties(); const float lowerBound = accessor.hAngles.front(); const float upperBound = accessor.hAngles.back(); const bool singleAngle = (upperBound == lowerBound); + constexpr float kMinFlatten = 0.0f; auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); - const ImGuiViewport* vp = ImGui::GetMainViewport(); - const ImVec2 imageSize(640.f, 640.f); - // 2D Plot + + auto updateCameraProjection = [&]() + { + if (m_plot3DWidth == 0u || m_plot3DHeight == 0u) + return; + const float aspect = float(m_plot3DWidth) / float(m_plot3DHeight); + auto projectionMatrix = core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); + }; + + auto draw3DControls = [&](float controlWidth) { - ImDrawList* fg = ImGui::GetForegroundDrawList(); - float x = vp->Pos.x + 8.f; - float y = vp->Pos.y + 8.f; + bool interpolateCandela = + mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::modeToRS(mode.view)); - y += ImGui::GetTextLineHeightWithSpacing(); + if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) + { + if (interpolateCandela) + mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; + else + mode.sphere &= static_cast( + ~this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE + ); + } - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), IES::symmetryToRS(properties.getSymmetry())); - y += ImGui::GetTextLineHeightWithSpacing(); + bool falseColor = + mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), name.c_str()); - y += ImGui::GetTextLineHeightWithSpacing(); + if (ImGui::Checkbox("false color", &falseColor)) + { + if (falseColor) + mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; + else + mode.sphere &= static_cast( + ~this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR + ); + } - char b1[64]; snprintf(b1, sizeof(b1), "%.3f\xC2\xB0", angle); - fg->AddText(ImVec2(x, y), ImGui::GetColorU32(ImGuiCol_Text), b1); - } + bool showOctaMap = m_showOctaMapPreview; + if (ImGui::Checkbox("octahedral map", &showOctaMap)) + m_showOctaMapPreview = showOctaMap; - { - const ImVec2 imageCenter( - vp->Pos.x + vp->Size.x * 0.5f, - vp->Pos.y + vp->Size.y * 0.25f - ); + bool cubePlot = + mode.sphere.hasFlags(this_example::ies::ESM_CUBE); - ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + if (ImGui::Checkbox("cube plot", &cubePlot)) + { + if (cubePlot) + mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_CUBE; + else + mode.sphere &= static_cast( + ~this_example::ies::E_SPHERE_MODE::ESM_CUBE + ); + } - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + bool wireframe = m_wireframeEnabled; + if (ImGui::Checkbox("wireframe", &wireframe)) + m_wireframeEnabled = wireframe; - ImGuiWindowFlags imgFlags = - ImGuiWindowFlags_NoSavedSettings | - ImGuiWindowFlags_NoBringToFrontOnFocus | - ImGuiWindowFlags_NoNav | - ImGuiWindowFlags_NoScrollbar | - ImGuiWindowFlags_NoScrollWithMouse; + bool cameraControl = m_cameraControlEnabled; + if (ImGui::Checkbox("camera control (space)", &cameraControl)) + m_cameraControlEnabled = cameraControl; - if (ImGui::Begin("2D Plot", nullptr, imgFlags)) + bool speedChanged = false; + ImGui::SetNextItemWidth(controlWidth); + speedChanged |= ImGui::SliderFloat("move speed", &m_cameraMoveSpeed, 0.1f, 10.0f, "%.2f", ImGuiSliderFlags_AlwaysClamp); + ImGui::SetNextItemWidth(controlWidth); + speedChanged |= ImGui::SliderFloat("rotate speed", &m_cameraRotateSpeed, 0.1f, 5.0f, "%.2f", ImGuiSliderFlags_AlwaysClamp); + if (speedChanged && m_cameraControlEnabled) { - ImGui::Image(info, imageSize); + camera.setMoveSpeed(m_cameraMoveSpeed); + camera.setRotateSpeed(m_cameraRotateSpeed); } - ImGui::End(); - ImGui::PopStyleVar(2); - } - - { - const float pad = 8.f; - const float sliderW = 74.f; - const float sliderH = ImMin(vp->Size.y - pad * 2.f, 260.f); - ImGui::SetNextWindowPos(ImVec2(vp->Pos.x + vp->Size.x - sliderW - pad, vp->Pos.y + pad), ImGuiCond_Always); - ImGui::SetNextWindowSize(ImVec2(sliderW, sliderH), ImGuiCond_Always); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); - ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | - ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | - ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoBackground; - - if (ImGui::Begin("AngleSliderOverlay", nullptr, flags)) + bool fovChanged = false; + ImGui::SetNextItemWidth(controlWidth); + fovChanged |= ImGui::SliderFloat("fov", &m_cameraFovDeg, 30.0f, 120.0f, "%.0f", ImGuiSliderFlags_AlwaysClamp); + if (fovChanged) + updateCameraProjection(); + + float flatten = ImClamp(ies.flatten, kMinFlatten, 1.0f); + bool flattenChanged = false; + ImGui::SetNextItemWidth(controlWidth); + flattenChanged |= ImGui::SliderFloat("flatten", &flatten, kMinFlatten, 1.0f, "%.3f", ImGuiSliderFlags_AlwaysClamp); + ImGui::SameLine(); + ImGui::SetNextItemWidth(64.0f); + flattenChanged |= ImGui::InputFloat("##flatten_value", &flatten, 0.0f, 0.0f, "%.3f"); + if (flattenChanged) { - ImGui::InvisibleButton("##fader_area", ImGui::GetContentRegionAvail()); - ImVec2 rmin = ImGui::GetItemRectMin(); - ImVec2 rmax = ImGui::GetItemRectMax(); - ImDrawList* dl = ImGui::GetWindowDrawList(); - ImU32 col = IM_COL32(220, 60, 60, 255); + flatten = ImClamp(flatten, kMinFlatten, 1.0f); + ies.flatten = flatten; + if (m_activeAssetIx < m_candelaDirty.size()) + m_candelaDirty[m_activeAssetIx] = true; + auto* mapped = reinterpret_cast( + reinterpret_cast(ies.buffers.textureInfo.buffer->getBoundMemory().memory->getMappedPointer()) + + ies.buffers.textureInfo.offset); + const auto& resolution = accessor.properties.optimalIESResolution; + *mapped = CIESProfile::texture_t::createInfo(accessor, resolution, ies.flatten, true); + + auto bound = ies.buffers.textureInfo.buffer->getBoundMemory(); + if (bound.memory->haveToMakeVisible()) + { + const ILogicalDevice::MappedMemoryRange range( + bound.memory, + bound.offset + ies.buffers.textureInfo.offset, + sizeof(IESTextureInfo)); + m_device->flushMappedMemoryRanges(1, &range); + } + } + }; + + const float panelMargin = 8.f; + const float panelWidth = ImClamp(viewportSize.x * 0.25f, 260.0f, 420.0f); + const float panelMaxHeight = ImMax(240.0f, viewportSize.y * 0.9f); + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always); + ImGui::SetNextWindowSizeConstraints(ImVec2(panelWidth, 0.0f), ImVec2(panelWidth, panelMaxHeight)); + ImGui::SetNextWindowBgAlpha(0.7f); + ImGuiWindowFlags panelFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoResize; + + if (ImGui::Begin("IES Panel", nullptr, panelFlags)) + { + const auto& resolution = accessor.properties.optimalIESResolution; - float knobR = 7.f; - float trackX = rmax.x - 12.f; - float y0 = rmin.y + knobR + 1.f; - float y1 = rmax.y - knobR - 1.f; + char b1[64]; + snprintf(b1, sizeof(b1), "%.3f deg", angle); + if (ImGui::BeginTable("##profile_info", 2, ImGuiTableFlags_SizingStretchProp)) + { + ImGui::TableNextColumn(); + ImGui::TextUnformatted(IES::symmetryToRS(properties.getSymmetry())); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(IES::typeToRS(properties.getType())); + + ImGui::TableNextColumn(); + ImGui::TextUnformatted(IES::versionToRS(properties.getVersion())); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx]); + + ImGui::TableNextColumn(); + ImGui::Text("angles: %u x %u", accessor.hAnglesCount(), accessor.vAnglesCount()); + ImGui::TableNextColumn(); + ImGui::Text("resolution: %u x %u", resolution.x, resolution.y); + + ImGui::TableNextColumn(); + ImGui::Text("max cd: %.3f", properties.maxCandelaValue); + ImGui::TableNextColumn(); + ImGui::Text("avg: %.3f", properties.avgEmmision); + + ImGui::TableNextColumn(); + ImGui::Text("avg full: %.3f", properties.fullDomainAvgEmission); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(b1); + + ImGui::EndTable(); + } - dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + ImGui::Separator(); - if (singleAngle) + const ImVec2 avail = ImGui::GetContentRegionAvail(); + ImVec2 plotSize(0.f, 0.f); + float plotSide = ImMax(0.0f, avail.x); + if (plotSide > 0.0f) + { + plotSize = ImVec2(plotSide, plotSide); + ImVec2 plotPos = ImGui::GetCursorScreenPos(); { - float y = (y0 + y1) * 0.5f; - dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + const char* title = IES::modeToRS(mode.view); + const ImVec2 titleSize = ImGui::CalcTextSize(title); + const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); + ImGui::TextUnformatted(title); } - else + + plotPos = ImGui::GetCursorScreenPos(); + ImGui::Image(info, plotSize, ImVec2(0.f, 0.f), ImVec2(1.f, 0.5f)); + + ImDrawList* dl = ImGui::GetWindowDrawList(); + + const float pad = 6.f; + const float barWidth = 16.f; + const float sliderH = ImMax(0.f, plotSize.y - pad * 2.f); + const float sliderX = plotPos.x + plotSize.x - barWidth - pad; + const float sliderY = plotPos.y + pad; + + if (sliderH > 0.0f) { - for (int i = 0; i < 5; ++i) + ImGui::SetCursorScreenPos(ImVec2(sliderX, sliderY)); + ImGui::InvisibleButton("##angle_slider", ImVec2(barWidth, sliderH)); + ImVec2 rmin = ImGui::GetItemRectMin(); + ImVec2 rmax = ImGui::GetItemRectMax(); + ImU32 col = IM_COL32(220, 60, 60, 255); + + float knobR = 7.f; + float trackX = rmax.x - barWidth * 0.5f; + float y0 = rmin.y + knobR + 1.f; + float y1 = rmax.y - knobR - 1.f; + + dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + + if (singleAngle) { - float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); - float t = (v - lowerBound) / (upperBound - lowerBound); - float y = y1 - t * (y1 - y0); + float y = (y0 + y1) * 0.5f; dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); ImVec2 ts = ImGui::CalcTextSize(tb); dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); } - } + else + { + for (int i = 0; i < 5; ++i) + { + float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); + float t = (v - lowerBound) / (upperBound - lowerBound); + float y = y1 - t * (y1 - y0); + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + } + } - float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); - float knobY = y1 - t * (y1 - y0); - dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); - dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); + float knobY = y1 - t * (y1 - y0); + dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); + dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + + if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + { + float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); + float nt = (y1 - my) / (y1 - y0); + angle = lowerBound + nt * (upperBound - lowerBound); + } + } + } - if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + if (plotSize.x > 0.0f && plotSize.y > 0.0f && m_showOctaMapPreview) + { + ImGui::Spacing(); { - float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); - float nt = (y1 - my) / (y1 - y0); - angle = lowerBound + nt * (upperBound - lowerBound); + const char* title = "Octahedral Map"; + const ImVec2 titleSize = ImGui::CalcTextSize(title); + const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); + ImGui::TextUnformatted(title); } + ImGui::Image(info, plotSize, ImVec2(0.f, 0.5f), ImVec2(1.f, 1.f)); + } + + ImGui::Separator(); + draw3DControls(ImMax(120.0f, ImMin(panelWidth - panelMargin * 2.0f, 260.0f))); + ImGui::Separator(); + + if (!assetLabelPtrs.empty()) + { + ImGui::SetNextItemWidth(ImMin(260.0f, panelWidth - panelMargin * 2.0f)); + if (ImGui::Combo("profile", &activeIxUi, assetLabelPtrs.data(), static_cast(assetLabelPtrs.size()))) + activeIx = static_cast(activeIxUi); } - ImGui::End(); - ImGui::PopStyleVar(2); } + ImGui::End(); ies.zDegree = angle; - + m_activeAssetIx = activeIx; // 3D plot { info.textureID += device_base_t::MaxFramesInFlight; { - const ImVec2 imageCenter( - vp->Pos.x + vp->Size.x * 0.5f, - vp->Pos.y + vp->Size.y * 0.75f - ); - - ImGui::SetNextWindowPos(imageCenter, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + ImGui::SetNextWindowPos(bottomPos, ImGuiCond_Always); + ImGui::SetNextWindowSize(bottomSize, ImGuiCond_Always); ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); ImGuiWindowFlags imgFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoNav | @@ -167,53 +360,232 @@ void IESViewer::uiListener() if (ImGui::Begin("3D Plot", nullptr, imgFlags)) { + const ImVec2 avail = ImGui::GetContentRegionAvail(); + const ImVec2 plotSize(ImMax(0.0f, avail.x), ImMax(0.0f, avail.y)); ImVec2 imgPos = ImGui::GetCursorScreenPos(); - ImGui::Image(info, imageSize); - - const ImGuiStyle& style = ImGui::GetStyle(); - float frameH = ImGui::GetFrameHeight(); - float margin = 6.0f; - - ImVec2 overlayPos( - imgPos.x + margin, - imgPos.y + margin - ); - - bool interpolateCandela = - mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); - - ImGui::SetCursorScreenPos(overlayPos); - if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) - { - if (interpolateCandela) - mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; - else - mode.sphere &= static_cast( - ~this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE - ); - } - - bool falseColor = - mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); - - ImVec2 overlayPos2( - overlayPos.x, - overlayPos.y + frameH + margin - ); - ImGui::SetCursorScreenPos(overlayPos2); - if (ImGui::Checkbox("false color", &falseColor)) - { - if (falseColor) - mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; - else - mode.sphere &= static_cast( - ~this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR - ); - } + ImGui::Image(info, plotSize); + plotRectMin = ImGui::GetItemRectMin(); + plotRectMax = ImGui::GetItemRectMax(); + plotRectValid = true; + plotHovered = ImGui::IsItemHovered(); + + const float margin = 8.0f; + const float barWidth = 16.0f; + const float barHeight = ImMax(80.0f, plotSize.y - margin * 2.0f); + if (plotSize.x > barWidth + margin * 2.0f && plotSize.y > margin * 2.0f) + { + const bool useFalseColorLegend = mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); + ImVec2 barMin(imgPos.x + plotSize.x - barWidth - margin, imgPos.y + margin); + ImVec2 barMax(barMin.x + barWidth, barMin.y + barHeight); + + ImDrawList* dl = ImGui::GetWindowDrawList(); + const int steps = 64; + for (int i = 0; i < steps; ++i) + { + const float t0 = float(i) / float(steps); + const float t1 = float(i + 1) / float(steps); + const float y0 = barMin.y + (1.0f - t1) * barHeight; + const float y1 = barMin.y + (1.0f - t0) * barHeight; + const float v = (t0 + t1) * 0.5f; + const ImU32 col = legendColor(v, useFalseColorLegend); + dl->AddRectFilled(ImVec2(barMin.x, y0), ImVec2(barMax.x, y1), col); + } + dl->AddRect(barMin, barMax, ImGui::GetColorU32(ImGuiCol_Border)); + + const ImU32 textCol = ImGui::GetColorU32(ImGuiCol_Text); + for (uint32_t i = 0u; i < this_example::ies::FalseColorStopCount; ++i) + { + const float stop = this_example::ies::falseColorStop(i); + const float y = barMin.y + (1.0f - stop) * barHeight; + dl->AddLine(ImVec2(barMin.x - 4.0f, y), ImVec2(barMin.x, y), textCol); + const float cdValue = stop * properties.maxCandelaValue; + char label[32]; + snprintf(label, sizeof(label), "%.0f cd", cdValue); + ImVec2 labelSize = ImGui::CalcTextSize(label); + dl->AddText(ImVec2(barMin.x - labelSize.x - 6.0f, y - labelSize.y * 0.5f), textCol, label); + } + } } ImGui::End(); ImGui::PopStyleVar(2); } } -} \ No newline at end of file + + if (plotRectValid && plotHovered && activeIx < m_assets.size()) + { + const float plotW = plotRectMax.x - plotRectMin.x; + const float plotH = plotRectMax.y - plotRectMin.y; + const ImVec2 mousePos = ImGui::GetIO().MousePos; + if (plotW > 1.0f && plotH > 1.0f && + mousePos.x >= plotRectMin.x && mousePos.x <= plotRectMax.x && + mousePos.y >= plotRectMin.y && mousePos.y <= plotRectMax.y) + { + const auto& iesCandela = m_assets[activeIx]; + const auto* profileCandela = iesCandela.getProfile(); + const auto& accessorCandela = profileCandela->getAccessor(); + const auto& propertiesCandela = accessorCandela.getProperties(); + const auto& resolutionCandela = accessorCandela.properties.optimalIESResolution; + + const float u = (mousePos.x - plotRectMin.x) / plotW; + const float v = (mousePos.y - plotRectMin.y) / plotH; + const float ndcX = u * 2.0f - 1.0f; + const float ndcY = v * 2.0f - 1.0f; + + core::matrix4SIMD invViewProj; + if (camera.getConcatenatedMatrix().getInverseTransform(invViewProj)) + { + core::vectorSIMDf nearPoint(ndcX, ndcY, 0.0f, 1.0f); + core::vectorSIMDf farPoint(ndcX, ndcY, 1.0f, 1.0f); + invViewProj.transformVect(nearPoint); + invViewProj.transformVect(farPoint); + nearPoint /= nearPoint.wwww(); + farPoint /= farPoint.wwww(); + + const core::vectorSIMDf origin = camera.getPosition(); + core::vectorSIMDf direction = farPoint - origin; + direction.makeSafe3D(); + direction = core::normalize(direction); + + core::vectorSIMDf hitPos; + bool hit = false; + const bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); + if (cubePlot) + { + float tmin = -1.0e20f; + float tmax = 1.0e20f; + auto update = [&](float originAxis, float dirAxis) -> bool + { + const float eps = 1.0e-6f; + if (core::abs(dirAxis) < eps) + { + if (originAxis < -m_plotRadius || originAxis > m_plotRadius) + return false; + return true; + } + float t1 = (-m_plotRadius - originAxis) / dirAxis; + float t2 = (m_plotRadius - originAxis) / dirAxis; + if (t1 > t2) + { + float tmp = t1; + t1 = t2; + t2 = tmp; + } + tmin = core::max(tmin, t1); + tmax = core::min(tmax, t2); + return tmin <= tmax; + }; + + if (update(origin.x, direction.x) && update(origin.y, direction.y) && update(origin.z, direction.z)) + { + float t = tmax; + if (t < 0.0f) + t = tmin; + if (t >= 0.0f) + { + hitPos = origin + direction * t; + hit = true; + } + } + } + else + { + const float b = core::dot(origin, direction)[0]; + const float c = core::dot(origin, origin)[0] - m_plotRadius * m_plotRadius; + const float disc = b * b - c; + if (disc >= 0.0f) + { + const float sqrtDisc = core::sqrt(disc); + float t = -b + sqrtDisc; + if (t < 0.0f) + t = -b - sqrtDisc; + if (t >= 0.0f) + { + hitPos = origin + direction * t; + hit = true; + } + } + } + + if (hit) + { + core::vectorSIMDf dir = core::normalize(hitPos); + const float sum = core::abs(dir.x) + core::abs(dir.y) + core::abs(dir.z); + core::vectorSIMDf s = dir / sum; + if (s.z < 0.0f) + { + const float sx = s.x; + const float sy = s.y; + s.x = (sx < 0.0f ? -1.0f : 1.0f) * (1.0f - core::abs(sy)); + s.y = (sy < 0.0f ? -1.0f : 1.0f) * (1.0f - core::abs(sx)); + } + + float uvx = s.x * 0.5f + 0.5f; + float uvy = s.y * 0.5f + 0.5f; + + const uint32_t resX = resolutionCandela.x; + const uint32_t resY = resolutionCandela.y; + if (resX > 0u && resY > 0u) + { + const float resFx = static_cast(resX); + const float resFy = static_cast(resY); + + const bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + if (!interpolateCandela) + { + float px = core::floor(uvx * resFx + 0.5f); + float py = core::floor(uvy * resFy + 0.5f); + uvx = px / resFx; + uvy = py / resFy; + } + + const float scaleX = 1.0f - 1.0f / resFx; + const float scaleY = 1.0f - 1.0f / resFy; + const float uvCornerX = (uvx - 0.5f) * scaleX + 0.5f; + const float uvCornerY = (uvy - 0.5f) * scaleY + 0.5f; + + const float tx = uvCornerX * resFx - 0.5f; + const float ty = uvCornerY * resFy - 0.5f; + + int x0 = static_cast(core::floor(tx)); + int y0 = static_cast(core::floor(ty)); + int x1 = x0 + 1; + int y1 = y0 + 1; + const float fx = tx - static_cast(x0); + const float fy = ty - static_cast(y0); + + x0 = ImClamp(x0, 0, static_cast(resX - 1u)); + y0 = ImClamp(y0, 0, static_cast(resY - 1u)); + x1 = ImClamp(x1, 0, static_cast(resX - 1u)); + y1 = ImClamp(y1, 0, static_cast(resY - 1u)); + + const auto info = CIESProfile::texture_t::createInfo(accessorCandela, resolutionCandela, iesCandela.flatten, true); + const auto sample = [&](int x, int y) -> float + { + return CIESProfile::texture_t::eval(accessorCandela, info, nbl::hlsl::uint32_t2(static_cast(x), static_cast(y))); + }; + + const float c00 = sample(x0, y0); + const float c10 = sample(x1, y0); + const float c01 = sample(x0, y1); + const float c11 = sample(x1, y1); + + const float cx0 = c00 + (c10 - c00) * fx; + const float cx1 = c01 + (c11 - c01) * fx; + const float c = cx0 + (cx1 - cx0) * fy; + + candelaValue = c * propertiesCandela.maxCandelaValue; + candelaValid = true; + } + } + } + } + } + + if (candelaValid && !m_cameraControlEnabled) + { + ImGui::BeginTooltip(); + ImGui::Text("candela: %.3f cd", candelaValue); + ImGui::EndTooltip(); + } +} diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt index 70ec73cf7..76108928d 100644 --- a/50.IESViewer/CMakeLists.txt +++ b/50.IESViewer/CMakeLists.txt @@ -19,6 +19,7 @@ target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS app_resources/common.hlsl + app_resources/false_color.hlsl app_resources/imgui.opts.hlsl app_resources/ies.unified.hlsl app_resources/imgui.unified.hlsl @@ -68,4 +69,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} ) -endif() \ No newline at end of file +endif() diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index 9af4a07ca..dc41e7776 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -54,6 +54,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted IGPUDescriptorSet* ds = nullptr; uint16_t texID = 0u; uint16_t mode = this_example::ies::ESM_NONE; + bool wireframe = false; }; // struct SPackedGeometry @@ -166,6 +167,8 @@ class CSimpleIESRenderer final : public core::IReferenceCounted IGPUGraphicsPipeline::SCreationParams params[pipeline_e::Count] = {}; params[pipeline_e::SphereTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; params[pipeline_e::SphereTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "SpherePS" }; + params[pipeline_e::SphereTriangleStripWire].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; + params[pipeline_e::SphereTriangleStripWire].fragmentShader = { .shader = shader.get(),.entryPoint = "SpherePS" }; for (auto i=0; ibindGraphicsPipeline(geo->pipeline.get()); + auto pipeline = geo->pipeline; + if (iesParams.wireframe) + pipeline = m_params.pipelines[SInitParams::PipelineType::SphereTriangleStripWire]; + cmdbuf->bindGraphicsPipeline(pipeline.get()); const auto pc = instance.computePushConstants(viewParams, iesParams); cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,offsetof(hlsl::this_example::ies::PushConstants, sphere),sizeof(pc),&pc); if (geo->indexBuffer) @@ -412,4 +423,4 @@ class CSimpleIESRenderer final : public core::IReferenceCounted }; } -#endif // _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ \ No newline at end of file +#endif // _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ diff --git a/50.IESViewer/IES.cpp b/50.IESViewer/IES.cpp index 92b82fec0..85d87983e 100644 --- a/50.IESViewer/IES.cpp +++ b/50.IESViewer/IES.cpp @@ -56,4 +56,33 @@ const char* IES::symmetryToRS(CIESProfile::properties_t::LuminairePlanesSymmetry default: return "ERROR (symmetry)"; } -} \ No newline at end of file +} + +const char* IES::typeToRS(CIESProfile::properties_t::PhotometricType type) +{ + switch (type) + { + case asset::CIESProfile::properties_t::TYPE_C: + return "TYPE_C"; + case asset::CIESProfile::properties_t::TYPE_B: + return "TYPE_B"; + case asset::CIESProfile::properties_t::TYPE_A: + return "TYPE_A"; + case asset::CIESProfile::properties_t::TYPE_NONE: + default: + return "TYPE_NONE"; + } +} + +const char* IES::versionToRS(CIESProfile::properties_t::Version version) +{ + switch (version) + { + case asset::CIESProfile::properties_t::V_1995: + return "V_1995"; + case asset::CIESProfile::properties_t::V_2002: + return "V_2002"; + default: + return "V_UNKNOWN"; + } +} diff --git a/50.IESViewer/IES.hpp b/50.IESViewer/IES.hpp index 0684c53e2..330d9368d 100644 --- a/50.IESViewer/IES.hpp +++ b/50.IESViewer/IES.hpp @@ -34,12 +34,15 @@ struct IES std::string key; float zDegree = 0.f; + float flatten = 0.0f; const asset::CIESProfile* getProfile() const; video::IGPUImage* getActiveImage(E_MODE mode) const; static const char* modeToRS(E_MODE mode); static const char* symmetryToRS(CIESProfile::properties_t::LuminairePlanesSymmetry symmetry); + static const char* typeToRS(CIESProfile::properties_t::PhotometricType type); + static const char* versionToRS(CIESProfile::properties_t::Version version); template requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) @@ -77,8 +80,8 @@ struct IES if constexpr (newLayout == IImage::LAYOUT::GENERAL) { // READ_ONLY_OPTIMAL -> GENERAL, RW - it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; @@ -88,8 +91,8 @@ struct IES // GENERAL -> READ_ONLY_OPTIMAL, RO it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; - it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; it.oldLayout = IImage::LAYOUT::GENERAL; } @@ -112,4 +115,4 @@ struct IES } }; -#endif // _THIS_EXAMPLE_IES_HPP_ \ No newline at end of file +#endif // _THIS_EXAMPLE_IES_HPP_ diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index bfb3fc007..bc755befb 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -44,7 +44,8 @@ enum E_SPHERE_MODE : uint16_t { ESM_NONE = 0, ESM_OCTAHEDRAL_UV_INTERPOLATE = 1u << 0, - ESM_FALSE_COLOR = 1u << 1 + ESM_FALSE_COLOR = 1u << 1, + ESM_CUBE = 1u << 2 }; struct SpherePC diff --git a/50.IESViewer/app_resources/false_color.hlsl b/50.IESViewer/app_resources/false_color.hlsl new file mode 100644 index 000000000..ffc830ec2 --- /dev/null +++ b/50.IESViewer/app_resources/false_color.hlsl @@ -0,0 +1,74 @@ +#ifndef _THIS_EXAMPLE_FALSE_COLOR_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_FALSE_COLOR_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/tgmath.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace this_example +{ +namespace ies +{ + +NBL_CONSTEXPR_STATIC_INLINE uint32_t FalseColorStopCount = 6u; + +inline float32_t falseColorStop(uint32_t idx) +{ + switch (idx) + { + case 0u: return 0.0f; + case 1u: return 0.15f; + case 2u: return 0.35f; + case 3u: return 0.55f; + case 4u: return 0.75f; + default: return 1.0f; + } +} + +inline float32_t3 falseColor(float32_t v) +{ + v = nbl::hlsl::clamp(v, float32_t(0.0f), float32_t(1.0f)); + v = nbl::hlsl::pow(v, float32_t(0.8f)); + + const float32_t3 c0 = float32_t3(0.0f, 0.0f, 0.0f); + const float32_t3 c1 = float32_t3(0.0f, 0.0f, 0.35f); + const float32_t3 c2 = float32_t3(0.10f, 0.20f, 0.90f); + const float32_t3 c3 = float32_t3(0.70f, 0.05f, 0.80f); + const float32_t3 c4 = float32_t3(1.00f, 0.30f, 1.00f); + const float32_t3 c5 = float32_t3(1.00f, 1.00f, 1.00f); + + if (v < 0.15f) + { + const float32_t t = v / 0.15f; + return c0 + (c1 - c0) * t; + } + else if (v < 0.35f) + { + const float32_t t = (v - 0.15f) / (0.35f - 0.15f); + return c1 + (c2 - c1) * t; + } + else if (v < 0.55f) + { + const float32_t t = (v - 0.35f) / (0.55f - 0.35f); + return c2 + (c3 - c2) * t; + } + else if (v < 0.75f) + { + const float32_t t = (v - 0.55f) / (0.75f - 0.55f); + return c3 + (c4 - c3) * t; + } + else + { + const float32_t t = (v - 0.75f) / (1.0f - 0.75f); + return c4 + (c5 - c4) * t; + } +} + +} +} +} +} + +#endif diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index c57ca95b3..7aafbc1f8 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/ies/texture.hlsl" #include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" +#include "false_color.hlsl" using namespace nbl::hlsl; using namespace nbl::hlsl::this_example::ies; @@ -56,7 +57,15 @@ SInterpolants SphereVS(uint32_t vIx : SV_VertexID) const float32_t2 uv = float32_t2(vIx % res.x, vIx / res.x) * inv; const float32_t3 dir = octahedral_t::uvToDir(uv); - const float32_t3 pos = pc.sphere.radius * dir; + float32_t3 pos = dir; + const bool useCube = (pc.sphere.mode & ESM_CUBE) != 0; + if (useCube) + { + const float32_t3 ad = abs(dir); + const float32_t maxAxis = max(ad.x, max(ad.y, ad.z)); + pos = dir / maxAxis; + } + pos *= pc.sphere.radius; SInterpolants o; o.ndc = math::linalg::promoted_mul(pc.sphere.matrices.worldViewProj, pos); @@ -66,45 +75,6 @@ SInterpolants SphereVS(uint32_t vIx : SV_VertexID) return o; } -float32_t3 falseColor(float32_t v) -{ - v = saturate(v); - v = pow(v, 0.8f); - - const float32_t3 c0 = float32_t3(0.0f, 0.0f, 0.0f); // 0.00 - black - const float32_t3 c1 = float32_t3(0.0f, 0.0f, 0.35f); // 0.15 - very dark blue - const float32_t3 c2 = float32_t3(0.10f, 0.20f, 0.90f); // 0.35 - bright blue - const float32_t3 c3 = float32_t3(0.70f, 0.00f, 0.80f); // 0.55 - violet/magenta - const float32_t3 c4 = float32_t3(1.00f, 0.30f, 1.00f); // 0.75 - bright pink - const float32_t3 c5 = float32_t3(1.00f, 1.00f, 1.00f); // 1.00 - white - - if (v < 0.15f) - { - float32_t t = v / 0.15f; - return lerp(c0, c1, t); - } - else if (v < 0.35f) - { - float32_t t = (v - 0.15f) / (0.35f - 0.15f); - return lerp(c1, c2, t); - } - else if (v < 0.55f) - { - float32_t t = (v - 0.35f) / (0.55f - 0.35f); - return lerp(c2, c3, t); - } - else if (v < 0.75f) - { - float32_t t = (v - 0.55f) / (0.75f - 0.55f); - return lerp(c3, c4, t); - } - else - { - float32_t t = (v - 0.75f) / (1.0f - 0.75f); - return lerp(c4, c5, t); - } -} - [shader("pixel")] float32_t4 SpherePS(SInterpolants input) : SV_Target0 { @@ -199,4 +169,4 @@ float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 default: return float32_t4(0.f, 0.f, 0.f, 0.f); } -} \ No newline at end of file +} diff --git a/common/include/nbl/examples/common/MonoWindowApplication.hpp b/common/include/nbl/examples/common/MonoWindowApplication.hpp index 0f18012c0..a2048b7b0 100644 --- a/common/include/nbl/examples/common/MonoWindowApplication.hpp +++ b/common/include/nbl/examples/common/MonoWindowApplication.hpp @@ -41,7 +41,7 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication params.height = m_initialResolution[1]; params.x = 32; params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE | IWindow::ECF_CAN_MINIMIZE; params.windowCaption = "MonoWindowApplication"; params.callback = windowCallback; const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); @@ -186,4 +186,4 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication }; } -#endif \ No newline at end of file +#endif From 59a434cc64179b223663a6ca6543a4e85a9f58e1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 18:47:14 +0100 Subject: [PATCH 126/219] more updates --- 50.IESViewer/App.hpp | 198 ++++++++++- 50.IESViewer/AppEvent.cpp | 10 + 50.IESViewer/AppInit.cpp | 46 +-- 50.IESViewer/AppRender.cpp | 98 +++--- 50.IESViewer/AppUI.cpp | 497 +++++++++++++++++----------- 50.IESViewer/CSimpleIESRenderer.hpp | 11 +- 50.IESViewer/main.cpp | 6 +- 7 files changed, 590 insertions(+), 276 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index af6d48792..ad7baf03f 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -6,17 +6,25 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/examples/examples.hpp" +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/CSwapchainFramebuffersAndDepth.hpp" +#include "nbl/examples/common/CEventCallback.hpp" +#include "nbl/examples/common/InputSystem.hpp" #include "nbl/ui/ICursorControl.h" -#include "nbl/ext/ImGui/ImGui.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "IES.hpp" #include "CSimpleIESRenderer.hpp" +#include -// 3D plot only, full window render and no imgui -// #define DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY NBL_EXPOSE_NAMESPACES +namespace nbl::ext::imgui +{ + class UI; +} + template concept AppIESByteCount = std::unsigned_integral; @@ -29,9 +37,179 @@ static_assert(alignof(IESTextureInfo) == 4u, "IESTextureInfo must be 4 byte alig template concept AppIESBufferCreationAllowed = AppIESByteCount || AppIESContainer; -class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApplication +class IESWindowedApplication : public virtual SimpleWindowedApplication +{ + using base_t = SimpleWindowedApplication; + +public: + constexpr static inline uint8_t MaxFramesInFlight = 3; + + template + IESWindowedApplication(const hlsl::uint16_t2 _initialResolution, const asset::E_FORMAT _depthFormat, Args&&... args) : + base_t(std::forward(args)...), m_initialResolution(_initialResolution), m_depthFormat(_depthFormat) {} + + using surface_list_t = decltype(std::declval().getSurfaces()); + + inline surface_list_t getSurfaces() const override + { + if (!m_surface) + { + auto windowCallback = make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = make_smart_refctd_ptr(); + params.width = m_initialResolution[0]; + params.height = m_initialResolution[1]; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_CAN_MINIMIZE | IWindow::ECF_CAN_MAXIMIZE | IWindow::ECF_CAN_RESIZE; + params.windowCaption = "IESViewer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + + if (m_surface) + return { {m_surface->getSurface()} }; + + return {}; + } + + inline bool onAppInitialized(core::smart_refctd_ptr&& system) override + { + using namespace nbl::core; + using namespace nbl::video; + if (!MonoSystemMonoLoggerApplication::onAppInitialized(std::move(system))) + return false; + + m_inputSystem = make_smart_refctd_ptr(system::logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + if (!base_t::onAppInitialized(std::move(system))) + return false; + + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + auto scResources = std::make_unique(m_device.get(), m_depthFormat, swapchainParams.surfaceFormat.format, getDefaultSubpassDependencies()); + auto* renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + return true; + } + + inline void workLoopBody() override final + { + using namespace nbl::core; + using namespace nbl::video; + if (m_window && m_surface && !m_window->isMinimized()) + { + if (auto* scRes = m_surface->getSwapchainResources()) + { + if (auto* sc = scRes->getSwapchain()) + { + const auto& params = sc->getCreationParameters().sharedParams; + if (params.width != m_window->getWidth() || params.height != m_window->getHeight()) + { + m_surface->recreateSwapchain(); + return; + } + } + } + } + + const uint32_t framesInFlightCount = hlsl::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + if (m_framesInFlight.size() >= framesInFlightCount) + { + const ISemaphore::SWaitInfo framesDone[] = + { + { + .semaphore = m_framesInFlight.front().semaphore.get(), + .value = m_framesInFlight.front().value + } + }; + if (m_device->blockForSemaphores(framesDone) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + m_framesInFlight.pop_front(); + } + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { renderFrame(nextPresentationTimestamp) }; + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + if (rendered->semaphore) + m_framesInFlight.emplace_back(smart_refctd_ptr(rendered->semaphore), rendered->value); + } + + inline bool keepRunning() override final + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + m_inputSystem = nullptr; + m_device->waitIdle(); + m_framesInFlight.clear(); + m_surface = nullptr; + m_window = nullptr; + return base_t::onAppTerminated(); + } + +protected: + inline void onAppInitializedFinish() + { + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + } + inline const auto& getCurrentAcquire() const { return m_currentImageAcquire; } + + virtual const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const = 0; + virtual video::IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) = 0; + + const hlsl::uint16_t2 m_initialResolution; + const asset::E_FORMAT m_depthFormat; + core::smart_refctd_ptr m_inputSystem; + core::smart_refctd_ptr m_window; + core::smart_refctd_ptr> m_surface; + +private: + struct SSubmittedFrame + { + core::smart_refctd_ptr semaphore; + uint64_t value; + }; + core::deque m_framesInFlight; + video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + video::CDumbPresentationOracle oracle; +}; + +class IESViewer final : public IESWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = MonoWindowApplication; + using device_base_t = IESWindowedApplication; using asset_base_t = BuiltinResourcesApplication; public: @@ -56,13 +234,11 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp smart_refctd_ptr m_semaphore; std::array, device_base_t::MaxFramesInFlight> m_cmdBuffers; -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; -#endif smart_refctd_ptr m_scene; smart_refctd_ptr m_renderer; - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // TODO: orbit would be better + Camera camera; uint32_t m_plot3DWidth = 640u; uint32_t m_plot3DHeight = 640u; float m_plotRadius = 100.0f; @@ -74,6 +250,10 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp bool m_fullscreen3D = false; bool m_wireframeEnabled = false; bool m_showOctaMapPreview = true; + bool m_showHints = true; + bool m_plot2DRectValid = false; + hlsl::float32_t2 m_plot2DRectMin = hlsl::float32_t2(0.f, 0.f); + hlsl::float32_t2 m_plot2DRectMax = hlsl::float32_t2(0.f, 0.f); std::vector m_assetLabels; std::vector m_candelaDirty; @@ -81,7 +261,7 @@ class IESViewer final : public MonoWindowApplication, public BuiltinResourcesApp InputSystem::ChannelReader keyboard; struct { - smart_refctd_ptr it; + smart_refctd_ptr it; smart_refctd_ptr descriptor; } ui; diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index ae040f9f2..3fa8d056e 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -12,6 +12,16 @@ void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { + auto* cursorControl = m_window ? m_window->getCursorControl() : nullptr; + if (!cursorControl || !m_plot2DRectValid) + continue; + const auto cursor = cursorControl->getPosition(); + const float cursorX = static_cast(cursor.x); + const float cursorY = static_cast(cursor.y); + if (cursorX < m_plot2DRectMin.x || cursorX > m_plot2DRectMax.x || + cursorY < m_plot2DRectMin.y || cursorY > m_plot2DRectMax.y) + continue; + auto& ies = m_assets[m_activeAssetIx]; const auto& accessor = ies.getProfile()->getAccessor(); diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index b0ef40473..d0517e2c3 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -3,9 +3,14 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "App.hpp" +#include +#include +#include #include "AppInputParser.hpp" #include "app_resources/common.hlsl" #include "app_resources/imgui.opts.hlsl" +#include "nbl/ext/ImGui/ImGui.h" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/this_example/builtin/build/spirv/keys.hpp" #define MEDIA_ENTRY "../../media" #define INPUT_JSON_FILE "../inputs.json" @@ -199,7 +204,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) if (not descriptorSetLayout) return logFail("Failed to create descriptor set layout!"); - auto range = std::to_array({ {StageFlags.value, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(nbl::hlsl::this_example::ies::CdcPC)} }); + auto range = std::to_array({ {StageFlags.value, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(hlsl::this_example::ies::CdcPC)} }); auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); if (not pipelineLayout) @@ -219,7 +224,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) // Graphics Pipeline { IGPUPipelineBase::SShaderEntryMap specConstants; - const auto orientationAsUint32 = static_cast(hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + const auto orientationAsUint32 = static_cast(SurfaceTransform::FLAG_BITS::IDENTITY_BIT); specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = @@ -300,7 +305,6 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY // frame buffers { // TODO: I will create my own @@ -348,8 +352,6 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) } } } -#endif - auto scRes = static_cast(m_surface->getSwapchainResources()); // geometries for 3D scene @@ -383,7 +385,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) ); const auto& geoParams = m_scene->getInitParams(); - core::vector> polygons(m_assets.size()); + std::vector> polygons(m_assets.size()); for (uint32_t i = 0u; i < m_assets.size(); ++i) { const auto& resolution = m_assets[i].getProfile()->getAccessor().properties.optimalIESResolution; @@ -413,24 +415,29 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) float32_t4(0, 0, 1, 0) ); - core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); - core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); + using core_vec_t = std::remove_cv_t>; + using core_mat_t = std::remove_cv_t>; + const auto toCoreVec3 = [](const float32_t3& v) -> core_vec_t + { + return core_vec_t(v.x, v.y, v.z); + }; + + float32_t3 cameraPosition(-5.81655884f, 2.58630896f, -4.23974705f); + float32_t3 cameraTarget(-0.349590302f, -0.213266611f, 0.317821503f); const auto cameraOffset = cameraPosition - cameraTarget; cameraPosition = cameraTarget + cameraOffset * 1.5f; -#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(m_window->getWidth()) / float(m_window->getHeight()), 0.1, 10000); -#else const auto& params = m_frameBuffers3D.front()->getCreationParameters(); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(params.width) / float(params.height), 0.1, 10000); -#endif - camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + const float aspect = float(params.width) / float(params.height); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); + core_mat_t coreProjection; + std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); + camera = Camera(toCoreVec3(cameraPosition), toCoreVec3(cameraTarget), coreProjection, 1.069f, 0.4f); m_cameraMoveSpeed = camera.getMoveSpeed(); m_cameraRotateSpeed = camera.getRotateSpeed(); m_cameraControlApplied = !m_cameraControlEnabled; } -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY // imGUI { ext::imgui::UI::SCreationParameters params = {}; @@ -447,7 +454,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = shaders.imgui, .fragment = shaders.imgui }); - auto* imgui = (ui.it = ext::imgui::UI::create(std::move(params))).get(); + auto imguiPtr = ext::imgui::UI::create(std::move(params)); + auto* imgui = imguiPtr.get(); + ui.it = smart_refctd_ptr_static_cast(imguiPtr); if (not imgui) return logFail("Failed to create `nbl::ext::imgui::UI` class"); @@ -498,7 +507,6 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) uiListener(); }); } -#endif m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) @@ -598,7 +606,7 @@ void IESViewer::applyWindowMode() if (m_surface) { - if (auto* scRes = m_surface->getSwapchainResources()) - scRes->invalidate(); + m_surface->recreateSwapchain(); } } + diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index d4ff5538d..2d9239284 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -3,13 +3,14 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "App.hpp" +#include +#include +#include "nbl/ext/ImGui/ImGui.h" #include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) { -#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - return true; -#else if (width == 0u || height == 0u) return false; @@ -50,21 +51,22 @@ bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) return false; } - if (ui.it && ui.descriptor) + auto* imgui = static_cast(ui.it.get()); + if (imgui && ui.descriptor) { std::array infos; for (auto& it : infos) it.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; auto* ix = infos.data(); - ix->desc = smart_refctd_ptr(ui.it->getFontAtlasView()); + ix->desc = smart_refctd_ptr(imgui->getFontAtlasView()); ++ix; for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) ix->desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) ix->desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; - const auto texturesBinding = ui.it->getCreationParameters().resources.texturesInfo.bindingIx; + const auto texturesBinding = imgui->getCreationParameters().resources.texturesInfo.bindingIx; auto writes = std::to_array({ IGPUDescriptorSet::SWriteDescriptorSet{ .dstSet = ui.descriptor->getDescriptorSet(), .binding = texturesBinding, @@ -77,11 +79,14 @@ bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) return false; } - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), float(width) / float(height), 0.1f, 10000.0f); - camera.setProjectionMatrix(projectionMatrix); + const float aspect = float(width) / float(height); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); + using core_mat_t = std::remove_cv_t>; + core_mat_t coreProjection; + std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); + camera.setProjectionMatrix(coreProjection); return true; -#endif } IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) @@ -90,15 +95,25 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto* const cb = m_cmdBuffers.data()[resourceIx].get(); auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* imgui = static_cast(ui.it.get()); const bool windowFocused = m_window->hasInputFocus() || m_window->hasMouseFocus(); if (!windowFocused && m_cameraControlEnabled) m_cameraControlEnabled = false; const bool wantCameraControl = m_cameraControlEnabled && windowFocused; - const uint32_t windowWidth = m_window->getWidth(); - const uint32_t windowHeight = m_window->getHeight(); - if (windowWidth == 0u || windowHeight == 0u || m_window->isMinimized()) + uint32_t renderWidth = m_window->getWidth(); + uint32_t renderHeight = m_window->getHeight(); + if (auto* sc = scRes->getSwapchain()) + { + const auto& params = sc->getCreationParameters().sharedParams; + if (params.width && params.height) + { + renderWidth = params.width; + renderHeight = params.height; + } + } + if (renderWidth == 0u || renderHeight == 0u || m_window->isMinimized()) return {}; if (m_cameraControlApplied != wantCameraControl) @@ -112,18 +127,13 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi -#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - IGPUFramebuffer* const fb2D = nullptr; - auto* const fb3D = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); -#else - const uint32_t desired3DWidth = windowWidth; - const uint32_t desired3DHeight = windowHeight; + const uint32_t desired3DWidth = renderWidth; + const uint32_t desired3DHeight = renderHeight; if (!recreate3DPlotFramebuffers(desired3DWidth, desired3DHeight)) return {}; auto* const fb2D = m_frameBuffers2D[resourceIx].get(); auto* const fb3D = m_frameBuffers3D[resourceIx].get(); -#endif cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -165,15 +175,24 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi { const float maxRadius = m_plotRadius * 0.98f; const float clampRadius = maxRadius * 0.999f; - auto pos = camera.getPosition(); - const float dist = core::length(pos)[0]; + using core_vec_t = std::remove_cv_t>; + const auto toHlslVec3 = [](const core_vec_t& v) + { + return float32_t3(v.x, v.y, v.z); + }; + const auto toCoreVec3 = [](const float32_t3& v) + { + return core_vec_t(v.x, v.y, v.z); + }; + auto pos = toHlslVec3(camera.getPosition()); + const float dist = length(pos); if (dist > maxRadius) { - auto forward = camera.getTarget() - pos; - pos.makeSafe3D(); - pos = core::normalize(pos) * clampRadius; - camera.setPosition(pos); - camera.setTarget(pos + forward); + const auto target = toHlslVec3(camera.getTarget()); + const auto forward = target - pos; + pos = normalize(pos) * clampRadius; + camera.setPosition(toCoreVec3(pos)); + camera.setTarget(toCoreVec3(pos + forward)); } } @@ -190,14 +209,13 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi ext::imgui::UI::SUpdateParameters params = { .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), - .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .displaySize = {renderWidth,renderHeight}, .mouseEvents = captured.mouse, .keyboardEvents = captured.keyboard }; -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - ui.it->update(params); -#endif + if (imgui) + imgui->update(params); } if (m_cameraControlApplied) @@ -209,7 +227,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto& ies = m_assets[m_activeAssetIx]; const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); - const auto pc = nbl::hlsl::this_example::ies::CdcPC + const auto pc = hlsl::this_example::ies::CdcPC { .hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(), .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), @@ -258,11 +276,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi // Graphics { -#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY - asset::VkExtent3D extent = { m_window->getWidth(), m_window->getHeight() }; -#else auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; -#endif const uint32_t plotHeight = extent.height / 2u; asset::SViewport viewport; @@ -299,7 +313,6 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi .renderArea = currentRenderArea }; -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY cb->beginDebugMarker("IES::graphics 2D plot"); cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); { @@ -332,7 +345,6 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } cb->endRenderPass(); cb->endDebugMarker(); -#endif const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; auto info3D = info; @@ -367,21 +379,21 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->endRenderPass(); cb->endDebugMarker(); -#ifndef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY cb->beginDebugMarker("IES::graphics ImGUI"); - viewport.width = m_window->getWidth(); viewport.height = m_window->getHeight(); + viewport.width = renderWidth; + viewport.height = renderHeight; cb->setViewport(0u, 1u, &viewport); - scissor.extent = { m_window->getWidth(), m_window->getHeight() }; + scissor.extent = { renderWidth, renderHeight }; cb->setScissor(0u, 1u, &scissor); - currentRenderArea.extent = { m_window->getWidth(),m_window->getHeight() }; + currentRenderArea.extent = { renderWidth, renderHeight }; info.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); info.renderArea = currentRenderArea; cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + if (imgui) { - auto* imgui = ui.it.get(); auto* pipeline = imgui->getPipeline(); cb->bindGraphicsPipeline(pipeline); const auto* ds = ui.descriptor->getDescriptorSet(); @@ -395,7 +407,6 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi } cb->endRenderPass(); cb->endDebugMarker(); -#endif cb->end(); } @@ -478,3 +489,4 @@ const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::get }; return dependencies; } + diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index eeb673f8a..b4c47f5d7 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -3,10 +3,19 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "App.hpp" +#include +#include +#include +#include +#include "imgui/imgui.h" #include "imgui/imgui_internal.h" +#include "nbl/ext/ImGui/ImGui.h" #include "app_resources/common.hlsl" #include "app_resources/false_color.hlsl" #include "app_resources/imgui.opts.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/math/octahedral.hlsl" using namespace this_example; @@ -16,13 +25,13 @@ void IESViewer::uiListener() SImResourceInfo info; info.textureID = ext::imgui::UI::FontAtlasTexId + resourceIx + 1u; - info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + info.samplerIx = (uint16_t)ext::imgui::UI::DefaultSamplerIx::USER; const ImGuiViewport* vp = ImGui::GetMainViewport(); const ImVec2 viewportPos = vp->Pos; const ImVec2 viewportSize = vp->Size; auto* cursorControl = m_window->getCursorControl(); - const auto cursorPosition = cursorControl ? cursorControl->getPosition() : nbl::ui::ICursorControl::SPosition{}; + const auto cursorPosition = cursorControl ? cursorControl->getPosition() : ICursorControl::SPosition{}; const int32_t windowX = m_window->getX(); const int32_t windowY = m_window->getY(); const int32_t windowW = static_cast(m_window->getWidth()); @@ -43,6 +52,16 @@ void IESViewer::uiListener() } return ImGui::ColorConvertFloat4ToU32(ImVec4(clamped, clamped, clamped, 1.0f)); }; + const auto showHint = [&](const char* text) + { + if (!m_showHints || !text || text[0] == '\0') + return; + if (!ImGui::IsItemHovered()) + return; + ImGui::BeginTooltip(); + ImGui::TextUnformatted(text); + ImGui::EndTooltip(); + }; std::vector assetLabelPtrs; assetLabelPtrs.reserve(m_assetLabels.size()); for (const auto& label : m_assetLabels) @@ -58,6 +77,7 @@ void IESViewer::uiListener() ImVec2 plotRectMax(0.f, 0.f); bool plotRectValid = false; bool plotHovered = false; + m_plot2DRectValid = false; auto& ies = m_assets[activeIx]; auto* profile = ies.getProfile(); @@ -69,6 +89,7 @@ void IESViewer::uiListener() const bool singleAngle = (upperBound == lowerBound); constexpr float kMinFlatten = 0.0f; + constexpr size_t kSmallBufSize = 32; auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); auto updateCameraProjection = [&]() @@ -76,14 +97,16 @@ void IESViewer::uiListener() if (m_plot3DWidth == 0u || m_plot3DHeight == 0u) return; const float aspect = float(m_plot3DWidth) / float(m_plot3DHeight); - auto projectionMatrix = core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); - camera.setProjectionMatrix(projectionMatrix); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); + using core_mat_t = std::remove_cv_t>; + core_mat_t coreProjection; + std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); + camera.setProjectionMatrix(coreProjection); }; - auto draw3DControls = [&](float controlWidth) + auto draw3DControls = [&]() { - bool interpolateCandela = - mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) { @@ -94,9 +117,9 @@ void IESViewer::uiListener() ~this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE ); } + showHint("Interpolate candela values in the octahedral map."); - bool falseColor = - mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); + bool falseColor = mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); if (ImGui::Checkbox("false color", &falseColor)) { @@ -107,13 +130,19 @@ void IESViewer::uiListener() ~this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR ); } + showHint("Use false color palette for the 3D plot."); bool showOctaMap = m_showOctaMapPreview; if (ImGui::Checkbox("octahedral map", &showOctaMap)) m_showOctaMapPreview = showOctaMap; + showHint("Show octahedral map preview under the 2D plot."); + + bool showHints = m_showHints; + if (ImGui::Checkbox("show hints", &showHints)) + m_showHints = showHints; + showHint("Toggle help tooltips."); - bool cubePlot = - mode.sphere.hasFlags(this_example::ies::ESM_CUBE); + bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); if (ImGui::Checkbox("cube plot", &cubePlot)) { @@ -124,39 +153,84 @@ void IESViewer::uiListener() ~this_example::ies::E_SPHERE_MODE::ESM_CUBE ); } + showHint("Render the plot on a cube instead of a sphere."); bool wireframe = m_wireframeEnabled; if (ImGui::Checkbox("wireframe", &wireframe)) m_wireframeEnabled = wireframe; + showHint("Show wireframe topology in the 3D plot."); bool cameraControl = m_cameraControlEnabled; if (ImGui::Checkbox("camera control (space)", &cameraControl)) m_cameraControlEnabled = cameraControl; + showHint("Enable camera movement with mouse and keyboard."); + float flatten = ImClamp(ies.flatten, kMinFlatten, 1.0f); bool speedChanged = false; - ImGui::SetNextItemWidth(controlWidth); - speedChanged |= ImGui::SliderFloat("move speed", &m_cameraMoveSpeed, 0.1f, 10.0f, "%.2f", ImGuiSliderFlags_AlwaysClamp); - ImGui::SetNextItemWidth(controlWidth); - speedChanged |= ImGui::SliderFloat("rotate speed", &m_cameraRotateSpeed, 0.1f, 5.0f, "%.2f", ImGuiSliderFlags_AlwaysClamp); + bool fovChanged = false; + bool flattenChanged = false; + if (ImGui::BeginTable("##camera_controls", 2, ImGuiTableFlags_SizingStretchProp)) + { + float labelWidth = 0.0f; + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("move speed").x); + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("rotate speed").x); + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("fov").x); + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("flatten").x); + labelWidth += ImGui::GetStyle().CellPadding.x * 2.0f; + labelWidth = ImMin(labelWidth, ImGui::GetContentRegionAvail().x * 0.6f); + ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, labelWidth); + ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch); + auto sliderRow = [&](const char* label, float* value, float min, float max, const char* fmt, const char* hint) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::AlignTextToFramePadding(); + ImGui::TextUnformatted(label); + showHint(hint); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(label); + const bool changed = ImGui::SliderFloat("##value", value, min, max, fmt, ImGuiSliderFlags_AlwaysClamp); + ImGui::PopID(); + showHint(hint); + return changed; + }; + + speedChanged |= sliderRow("move speed", &m_cameraMoveSpeed, 0.1f, 10.0f, "%.2f", "Camera movement speed."); + speedChanged |= sliderRow("rotate speed", &m_cameraRotateSpeed, 0.1f, 5.0f, "%.2f", "Camera rotation speed."); + fovChanged |= sliderRow("fov", &m_cameraFovDeg, 30.0f, 120.0f, "%.0f", "Camera field of view."); + + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::AlignTextToFramePadding(); + ImGui::TextUnformatted("flatten"); + showHint("Flatten the profile (0..1)."); + ImGui::TableSetColumnIndex(1); + const float inputWidth = ImMax(64.0f, ImGui::CalcTextSize("0.000").x + ImGui::GetStyle().FramePadding.x * 2.0f); + const float spacing = ImGui::GetStyle().ItemInnerSpacing.x; + float sliderWidth = ImGui::GetContentRegionAvail().x - inputWidth - spacing; + if (sliderWidth < 40.0f) + sliderWidth = ImGui::GetContentRegionAvail().x; + ImGui::SetNextItemWidth(sliderWidth); + flattenChanged |= ImGui::SliderFloat("##flatten", &flatten, kMinFlatten, 1.0f, "%.3f", ImGuiSliderFlags_AlwaysClamp); + showHint("Flatten the profile (0..1)."); + ImGui::SameLine(); + ImGui::SetNextItemWidth(inputWidth); + flattenChanged |= ImGui::InputFloat("##flatten_value", &flatten, 0.0f, 0.0f, "%.3f"); + showHint("Enter flatten value manually."); + + ImGui::EndTable(); + } + if (speedChanged && m_cameraControlEnabled) { camera.setMoveSpeed(m_cameraMoveSpeed); camera.setRotateSpeed(m_cameraRotateSpeed); } - bool fovChanged = false; - ImGui::SetNextItemWidth(controlWidth); - fovChanged |= ImGui::SliderFloat("fov", &m_cameraFovDeg, 30.0f, 120.0f, "%.0f", ImGuiSliderFlags_AlwaysClamp); if (fovChanged) updateCameraProjection(); - float flatten = ImClamp(ies.flatten, kMinFlatten, 1.0f); - bool flattenChanged = false; - ImGui::SetNextItemWidth(controlWidth); - flattenChanged |= ImGui::SliderFloat("flatten", &flatten, kMinFlatten, 1.0f, "%.3f", ImGuiSliderFlags_AlwaysClamp); - ImGui::SameLine(); - ImGui::SetNextItemWidth(64.0f); - flattenChanged |= ImGui::InputFloat("##flatten_value", &flatten, 0.0f, 0.0f, "%.3f"); if (flattenChanged) { flatten = ImClamp(flatten, kMinFlatten, 1.0f); @@ -199,34 +273,79 @@ void IESViewer::uiListener() { const auto& resolution = accessor.properties.optimalIESResolution; - char b1[64]; - snprintf(b1, sizeof(b1), "%.3f deg", angle); - if (ImGui::BeginTable("##profile_info", 2, ImGuiTableFlags_SizingStretchProp)) + constexpr size_t kInfoBufSize = 64; + std::array bAngle{}; + std::array bAngles{}; + std::array bRes{}; + std::array bMax{}; + std::array bAvg{}; + std::array bAvgFull{}; + std::snprintf(bAngle.data(), bAngle.size(), "%.3f deg", angle); + std::snprintf(bAngles.data(), bAngles.size(), "angles: %u x %u", accessor.hAnglesCount(), accessor.vAnglesCount()); + std::snprintf(bRes.data(), bRes.size(), "resolution: %u x %u", resolution.x, resolution.y); + std::snprintf(bMax.data(), bMax.size(), "max cd: %.3f", properties.maxCandelaValue); + std::snprintf(bAvg.data(), bAvg.size(), "avg: %.3f", properties.avgEmmision); + std::snprintf(bAvgFull.data(), bAvgFull.size(), "avg full: %.3f", properties.fullDomainAvgEmission); + float leftWidth = 0.0f; + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(IES::symmetryToRS(properties.getSymmetry())).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(IES::versionToRS(properties.getVersion())).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAngles.data()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bMax.data()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAvgFull.data()).x); + leftWidth += ImGui::GetStyle().CellPadding.x * 2.0f; + leftWidth = ImMin(leftWidth, ImGui::GetContentRegionAvail().x * 0.6f); + if (ImGui::BeginTable("##profile_info", 2, ImGuiTableFlags_SizingFixedFit)) { - ImGui::TableNextColumn(); - ImGui::TextUnformatted(IES::symmetryToRS(properties.getSymmetry())); - ImGui::TableNextColumn(); - ImGui::TextUnformatted(IES::typeToRS(properties.getType())); - - ImGui::TableNextColumn(); - ImGui::TextUnformatted(IES::versionToRS(properties.getVersion())); - ImGui::TableNextColumn(); - ImGui::TextUnformatted(assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx]); - - ImGui::TableNextColumn(); - ImGui::Text("angles: %u x %u", accessor.hAnglesCount(), accessor.vAnglesCount()); - ImGui::TableNextColumn(); - ImGui::Text("resolution: %u x %u", resolution.x, resolution.y); - - ImGui::TableNextColumn(); - ImGui::Text("max cd: %.3f", properties.maxCandelaValue); - ImGui::TableNextColumn(); - ImGui::Text("avg: %.3f", properties.avgEmmision); - - ImGui::TableNextColumn(); - ImGui::Text("avg full: %.3f", properties.fullDomainAvgEmission); - ImGui::TableNextColumn(); - ImGui::TextUnformatted(b1); + ImGui::TableSetupColumn("left", ImGuiTableColumnFlags_WidthFixed, leftWidth); + ImGui::TableSetupColumn("right", ImGuiTableColumnFlags_WidthStretch); + auto rightText = [&](const char* text, const char* hint) + { + const float avail = ImGui::GetContentRegionAvail().x; + const float textWidth = ImGui::CalcTextSize(text).x; + const char* displayText = text; + std::string clipped; + if (textWidth > avail && avail > 0.0f) + { + const char* ell = "..."; + const float ellW = ImGui::CalcTextSize(ell).x; + const float target = ImMax(0.0f, avail - ellW); + const int len = static_cast(std::strlen(text)); + int lo = 0; + int hi = len; + while (lo < hi) + { + int mid = (lo + hi + 1) / 2; + float w = ImGui::CalcTextSize(text, text + mid).x; + if (w <= target) + lo = mid; + else + hi = mid - 1; + } + clipped.assign(text, text + lo); + clipped.append(ell); + displayText = clipped.c_str(); + } + const float displayWidth = ImGui::CalcTextSize(displayText).x; + if (displayWidth < avail) + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + (avail - displayWidth)); + ImGui::TextUnformatted(displayText); + showHint(hint); + }; + auto row = [&](const char* left, const char* right, const char* leftHint, const char* rightHint) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(left); + showHint(leftHint); + ImGui::TableSetColumnIndex(1); + rightText(right, rightHint); + }; + + row(IES::symmetryToRS(properties.getSymmetry()), IES::typeToRS(properties.getType()), "IES symmetry mode.", "IES photometric type."); + row(IES::versionToRS(properties.getVersion()), assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx], "IES standard/version.", "Active IES profile file."); + row(bAngles.data(), bRes.data(), "Horizontal and vertical angle count.", "Octahedral map resolution."); + row(bMax.data(), bAvg.data(), "Maximum candela value.", "Average candela value."); + row(bAvgFull.data(), bAngle.data(), "Average candela over full domain.", "Current horizontal angle."); ImGui::EndTable(); } @@ -246,10 +365,17 @@ void IESViewer::uiListener() const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); ImGui::TextUnformatted(title); + showHint("2D candlepower distribution curve."); } plotPos = ImGui::GetCursorScreenPos(); ImGui::Image(info, plotSize, ImVec2(0.f, 0.f), ImVec2(1.f, 0.5f)); + const ImVec2 itemMin = ImGui::GetItemRectMin(); + const ImVec2 itemMax = ImGui::GetItemRectMax(); + m_plot2DRectMin = float32_t2(itemMin.x, itemMin.y); + m_plot2DRectMax = float32_t2(itemMax.x, itemMax.y); + m_plot2DRectValid = true; + showHint("2D candlepower distribution curve."); ImDrawList* dl = ImGui::GetWindowDrawList(); @@ -263,6 +389,7 @@ void IESViewer::uiListener() { ImGui::SetCursorScreenPos(ImVec2(sliderX, sliderY)); ImGui::InvisibleButton("##angle_slider", ImVec2(barWidth, sliderH)); + showHint("Adjust horizontal angle."); ImVec2 rmin = ImGui::GetItemRectMin(); ImVec2 rmax = ImGui::GetItemRectMax(); ImU32 col = IM_COL32(220, 60, 60, 255); @@ -278,9 +405,10 @@ void IESViewer::uiListener() { float y = (y0 + y1) * 0.5f; dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", lowerBound); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + std::array tb{}; + std::snprintf(tb.data(), tb.size(), "%.0f", lowerBound); + ImVec2 ts = ImGui::CalcTextSize(tb.data()); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb.data()); } else { @@ -290,9 +418,10 @@ void IESViewer::uiListener() float t = (v - lowerBound) / (upperBound - lowerBound); float y = y1 - t * (y1 - y0); dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); - char tb[32]; snprintf(tb, sizeof(tb), "%.0f", v); - ImVec2 ts = ImGui::CalcTextSize(tb); - dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb); + std::array tb{}; + std::snprintf(tb.data(), tb.size(), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb.data()); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb.data()); } } @@ -319,19 +448,37 @@ void IESViewer::uiListener() const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); ImGui::TextUnformatted(title); + showHint("Octahedral map preview."); } ImGui::Image(info, plotSize, ImVec2(0.f, 0.5f), ImVec2(1.f, 1.f)); + showHint("Octahedral map preview."); } ImGui::Separator(); - draw3DControls(ImMax(120.0f, ImMin(panelWidth - panelMargin * 2.0f, 260.0f))); + draw3DControls(); ImGui::Separator(); if (!assetLabelPtrs.empty()) { - ImGui::SetNextItemWidth(ImMin(260.0f, panelWidth - panelMargin * 2.0f)); - if (ImGui::Combo("profile", &activeIxUi, assetLabelPtrs.data(), static_cast(assetLabelPtrs.size()))) + ImGui::TextUnformatted("profile"); + ImGui::SameLine(); + if (ImGui::ArrowButton("##profile_prev", ImGuiDir_Up)) + { + activeIx = (activeIx + assetLabelPtrs.size() - 1u) % assetLabelPtrs.size(); + activeIxUi = static_cast(activeIx); + } + ImGui::SameLine(); + if (ImGui::ArrowButton("##profile_next", ImGuiDir_Down)) + { + activeIx = (activeIx + 1u) % assetLabelPtrs.size(); + activeIxUi = static_cast(activeIx); + } + showHint("Select active IES profile. Use up/down arrows."); + ImGui::NewLine(); + ImGui::SetNextItemWidth(ImGui::GetContentRegionAvail().x); + if (ImGui::Combo("##profile", &activeIxUi, assetLabelPtrs.data(), static_cast(assetLabelPtrs.size()))) activeIx = static_cast(activeIxUi); + showHint("Select active IES profile."); } } ImGui::End(); @@ -399,10 +546,10 @@ void IESViewer::uiListener() const float y = barMin.y + (1.0f - stop) * barHeight; dl->AddLine(ImVec2(barMin.x - 4.0f, y), ImVec2(barMin.x, y), textCol); const float cdValue = stop * properties.maxCandelaValue; - char label[32]; - snprintf(label, sizeof(label), "%.0f cd", cdValue); - ImVec2 labelSize = ImGui::CalcTextSize(label); - dl->AddText(ImVec2(barMin.x - labelSize.x - 6.0f, y - labelSize.y * 0.5f), textCol, label); + std::array label{}; + std::snprintf(label.data(), label.size(), "%.0f cd", cdValue); + ImVec2 labelSize = ImGui::CalcTextSize(label.data()); + dl->AddText(ImVec2(barMin.x - labelSize.x - 6.0f, y - labelSize.y * 0.5f), textCol, label.data()); } } } @@ -424,7 +571,6 @@ void IESViewer::uiListener() const auto& iesCandela = m_assets[activeIx]; const auto* profileCandela = iesCandela.getProfile(); const auto& accessorCandela = profileCandela->getAccessor(); - const auto& propertiesCandela = accessorCandela.getProperties(); const auto& resolutionCandela = accessorCandela.properties.optimalIESResolution; const float u = (mousePos.x - plotRectMin.x) / plotW; @@ -432,151 +578,109 @@ void IESViewer::uiListener() const float ndcX = u * 2.0f - 1.0f; const float ndcY = v * 2.0f - 1.0f; - core::matrix4SIMD invViewProj; - if (camera.getConcatenatedMatrix().getInverseTransform(invViewProj)) + float32_t4x4 viewProj; + std::memcpy(&viewProj, camera.getConcatenatedMatrix().pointer(), sizeof(viewProj)); + const auto invViewProj = inverse(viewProj); + + const float32_t4 nearPoint(ndcX, ndcY, 0.0f, 1.0f); + const float32_t4 farPoint(ndcX, ndcY, 1.0f, 1.0f); + auto nearWorld = mul(invViewProj, nearPoint); + auto farWorld = mul(invViewProj, farPoint); + nearWorld /= nearWorld.w; + farWorld /= farWorld.w; + + using core_vec_t = std::remove_cv_t>; + const auto toHlslVec3 = [](const core_vec_t& v) { - core::vectorSIMDf nearPoint(ndcX, ndcY, 0.0f, 1.0f); - core::vectorSIMDf farPoint(ndcX, ndcY, 1.0f, 1.0f); - invViewProj.transformVect(nearPoint); - invViewProj.transformVect(farPoint); - nearPoint /= nearPoint.wwww(); - farPoint /= farPoint.wwww(); - - const core::vectorSIMDf origin = camera.getPosition(); - core::vectorSIMDf direction = farPoint - origin; - direction.makeSafe3D(); - direction = core::normalize(direction); - - core::vectorSIMDf hitPos; - bool hit = false; - const bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); - if (cubePlot) + return float32_t3(v.x, v.y, v.z); + }; + + const float32_t3 origin = toHlslVec3(camera.getPosition()); + const float32_t3 farPos = float32_t3(farWorld); + float32_t3 direction = normalize(farPos - origin); + + float32_t3 hitPos(0.f); + bool hit = false; + const bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); + if (cubePlot) + { + float tmin = -1.0e20f; + float tmax = 1.0e20f; + auto update = [&](float originAxis, float dirAxis) -> bool { - float tmin = -1.0e20f; - float tmax = 1.0e20f; - auto update = [&](float originAxis, float dirAxis) -> bool + const float eps = 1.0e-6f; + if (abs(dirAxis) < eps) { - const float eps = 1.0e-6f; - if (core::abs(dirAxis) < eps) - { - if (originAxis < -m_plotRadius || originAxis > m_plotRadius) - return false; - return true; - } - float t1 = (-m_plotRadius - originAxis) / dirAxis; - float t2 = (m_plotRadius - originAxis) / dirAxis; - if (t1 > t2) - { - float tmp = t1; - t1 = t2; - t2 = tmp; - } - tmin = core::max(tmin, t1); - tmax = core::min(tmax, t2); - return tmin <= tmax; - }; - - if (update(origin.x, direction.x) && update(origin.y, direction.y) && update(origin.z, direction.z)) + if (originAxis < -m_plotRadius || originAxis > m_plotRadius) + return false; + return true; + } + float t1 = (-m_plotRadius - originAxis) / dirAxis; + float t2 = (m_plotRadius - originAxis) / dirAxis; + if (t1 > t2) { - float t = tmax; - if (t < 0.0f) - t = tmin; - if (t >= 0.0f) - { - hitPos = origin + direction * t; - hit = true; - } + float tmp = t1; + t1 = t2; + t2 = tmp; } - } - else + tmin = hlsl::max(tmin, t1); + tmax = hlsl::min(tmax, t2); + return tmin <= tmax; + }; + + if (update(origin.x, direction.x) && update(origin.y, direction.y) && update(origin.z, direction.z)) { - const float b = core::dot(origin, direction)[0]; - const float c = core::dot(origin, origin)[0] - m_plotRadius * m_plotRadius; - const float disc = b * b - c; - if (disc >= 0.0f) + const float t = (tmax < 0.0f) ? tmin : tmax; + if (t >= 0.0f) { - const float sqrtDisc = core::sqrt(disc); - float t = -b + sqrtDisc; - if (t < 0.0f) - t = -b - sqrtDisc; - if (t >= 0.0f) - { - hitPos = origin + direction * t; - hit = true; - } + hitPos = origin + direction * t; + hit = true; } } - - if (hit) + } + else + { + const float b = dot(origin, direction); + const float c = dot(origin, origin) - m_plotRadius * m_plotRadius; + const float disc = b * b - c; + if (disc >= 0.0f) { - core::vectorSIMDf dir = core::normalize(hitPos); - const float sum = core::abs(dir.x) + core::abs(dir.y) + core::abs(dir.z); - core::vectorSIMDf s = dir / sum; - if (s.z < 0.0f) + const float sqrtDisc = sqrt(disc); + const float tFar = -b + sqrtDisc; + const float tNear = -b - sqrtDisc; + const float t = (tFar < 0.0f) ? tNear : tFar; + if (t >= 0.0f) { - const float sx = s.x; - const float sy = s.y; - s.x = (sx < 0.0f ? -1.0f : 1.0f) * (1.0f - core::abs(sy)); - s.y = (sy < 0.0f ? -1.0f : 1.0f) * (1.0f - core::abs(sx)); + hitPos = origin + direction * t; + hit = true; } + } + } - float uvx = s.x * 0.5f + 0.5f; - float uvy = s.y * 0.5f + 0.5f; + if (hit) + { + using octahedral_t = math::OctahedralTransform; + const float32_t3 dir = normalize(hitPos); + float32_t2 uv = octahedral_t::dirToNDC(dir) * 0.5f + float32_t2(0.5f, 0.5f); - const uint32_t resX = resolutionCandela.x; - const uint32_t resY = resolutionCandela.y; - if (resX > 0u && resY > 0u) + const uint32_t resX = resolutionCandela.x; + const uint32_t resY = resolutionCandela.y; + if (resX > 0u && resY > 0u) + { + const float32_t2 res(static_cast(resX), static_cast(resY)); + const bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + if (!interpolateCandela) { - const float resFx = static_cast(resX); - const float resFy = static_cast(resY); - - const bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); - if (!interpolateCandela) - { - float px = core::floor(uvx * resFx + 0.5f); - float py = core::floor(uvy * resFy + 0.5f); - uvx = px / resFx; - uvy = py / resFy; - } - - const float scaleX = 1.0f - 1.0f / resFx; - const float scaleY = 1.0f - 1.0f / resFy; - const float uvCornerX = (uvx - 0.5f) * scaleX + 0.5f; - const float uvCornerY = (uvy - 0.5f) * scaleY + 0.5f; - - const float tx = uvCornerX * resFx - 0.5f; - const float ty = uvCornerY * resFy - 0.5f; - - int x0 = static_cast(core::floor(tx)); - int y0 = static_cast(core::floor(ty)); - int x1 = x0 + 1; - int y1 = y0 + 1; - const float fx = tx - static_cast(x0); - const float fy = ty - static_cast(y0); - - x0 = ImClamp(x0, 0, static_cast(resX - 1u)); - y0 = ImClamp(y0, 0, static_cast(resY - 1u)); - x1 = ImClamp(x1, 0, static_cast(resX - 1u)); - y1 = ImClamp(y1, 0, static_cast(resY - 1u)); - - const auto info = CIESProfile::texture_t::createInfo(accessorCandela, resolutionCandela, iesCandela.flatten, true); - const auto sample = [&](int x, int y) -> float - { - return CIESProfile::texture_t::eval(accessorCandela, info, nbl::hlsl::uint32_t2(static_cast(x), static_cast(y))); - }; - - const float c00 = sample(x0, y0); - const float c10 = sample(x1, y0); - const float c01 = sample(x0, y1); - const float c11 = sample(x1, y1); - - const float cx0 = c00 + (c10 - c00) * fx; - const float cx1 = c01 + (c11 - c01) * fx; - const float c = cx0 + (cx1 - cx0) * fy; - - candelaValue = c * propertiesCandela.maxCandelaValue; - candelaValid = true; + const auto pixel = floor(uv * res + float32_t2(0.5f, 0.5f)); + uv = pixel / res; } + + const auto info = CIESProfile::texture_t::createInfo(accessorCandela, resolutionCandela, iesCandela.flatten, true); + const float32_t2 scale = float32_t2(1.0f, 1.0f) - float32_t2(1.0f, 1.0f) / res; + const float32_t2 uvCorner = (uv - float32_t2(0.5f, 0.5f)) * scale + float32_t2(0.5f, 0.5f); + const float normalized = CIESProfile::texture_t::eval(accessorCandela, info, uvCorner); + candelaValue = info.maxValueRecip > 0.0f ? (normalized / info.maxValueRecip) : 0.0f; + candelaValid = true; } } } @@ -589,3 +693,6 @@ void IESViewer::uiListener() ImGui::EndTooltip(); } } + + + diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index dc41e7776..1b4b52e3d 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -6,6 +6,7 @@ #include "nbl/examples/examples.hpp" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "app_resources/common.hlsl" +#include namespace nbl::examples { @@ -240,8 +241,8 @@ class CSimpleIESRenderer final : public core::IReferenceCounted return false; auto device = const_cast(m_params.layout->getOriginDevice()); - core::vector writes; - core::vector infos; + std::vector writes; + std::vector infos; bool anyFailed = false; auto allocateUTB = [&](const IGeometry::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) { @@ -335,7 +336,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted if (ix>=m_geoms.size()) return; - core::vector deferredFree; + std::vector deferredFree; deferredFree.reserve(3); auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void { @@ -400,7 +401,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted cmdbuf->endDebugMarker(); } - core::vector m_instances; + std::vector m_instances; protected: inline CSimpleIESRenderer(SInitParams&& _params) : m_params(std::move(_params)) {} @@ -418,7 +419,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted } SInitParams m_params; - core::vector m_geoms; + std::vector m_geoms; #undef EXPOSE_NABLA_NAMESPACES }; diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp index cbb3ee535..0659c2fae 100644 --- a/50.IESViewer/main.cpp +++ b/50.IESViewer/main.cpp @@ -8,11 +8,7 @@ #define APP_WINDOW_WIDTH 669*2u #define APP_WINDOW_HEIGHT APP_WINDOW_WIDTH -#ifdef DEBUG_SWPCHAIN_FRAMEBUFFERS_ONLY -#define APP_DEPTH_BUFFER_FORMAT EF_D16_UNORM -#else #define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN -#endif IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), @@ -21,4 +17,4 @@ IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, co } -NBL_MAIN_FUNC(IESViewer) \ No newline at end of file +NBL_MAIN_FUNC(IESViewer) From 81449834f4efd4fe649f7dc706ab202ca92c45f3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 22:36:34 +0100 Subject: [PATCH 127/219] correct namespaces --- 50.IESViewer/App.hpp | 4 +-- 50.IESViewer/AppUI.cpp | 38 ++++++++++++++--------------- 50.IESViewer/CSimpleIESRenderer.hpp | 2 +- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index ad7baf03f..d5e05c6cb 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -267,8 +267,8 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp struct { IES::E_MODE view = IES::EM_CDC; - bitflag sphere = - bitflag(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | this_example::ies::ESM_FALSE_COLOR; + bitflag sphere = + bitflag(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | hlsl::this_example::ies::ESM_FALSE_COLOR; } mode; void processMouse(const IMouseEventChannel::range_t& events); diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index b4c47f5d7..57591b697 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -17,8 +17,6 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/math/octahedral.hlsl" -using namespace this_example; - void IESViewer::uiListener() { const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; @@ -47,7 +45,7 @@ void IESViewer::uiListener() const float clamped = ImClamp(v, 0.0f, 1.0f); if (useFalseColor) { - const auto col = this_example::ies::falseColor(clamped); + const auto col = hlsl::this_example::ies::falseColor(clamped); return ImGui::ColorConvertFloat4ToU32(ImVec4(col.x, col.y, col.z, 1.0f)); } return ImGui::ColorConvertFloat4ToU32(ImVec4(clamped, clamped, clamped, 1.0f)); @@ -106,28 +104,28 @@ void IESViewer::uiListener() auto draw3DControls = [&]() { - bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + bool interpolateCandela = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) { if (interpolateCandela) - mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; + mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; else - mode.sphere &= static_cast( - ~this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE + mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE ); } showHint("Interpolate candela values in the octahedral map."); - bool falseColor = mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); + bool falseColor = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); if (ImGui::Checkbox("false color", &falseColor)) { if (falseColor) - mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; + mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; else - mode.sphere &= static_cast( - ~this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR + mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR ); } showHint("Use false color palette for the 3D plot."); @@ -142,15 +140,15 @@ void IESViewer::uiListener() m_showHints = showHints; showHint("Toggle help tooltips."); - bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); + bool cubePlot = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); if (ImGui::Checkbox("cube plot", &cubePlot)) { if (cubePlot) - mode.sphere |= this_example::ies::E_SPHERE_MODE::ESM_CUBE; + mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE; else - mode.sphere &= static_cast( - ~this_example::ies::E_SPHERE_MODE::ESM_CUBE + mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE ); } showHint("Render the plot on a cube instead of a sphere."); @@ -521,7 +519,7 @@ void IESViewer::uiListener() const float barHeight = ImMax(80.0f, plotSize.y - margin * 2.0f); if (plotSize.x > barWidth + margin * 2.0f && plotSize.y > margin * 2.0f) { - const bool useFalseColorLegend = mode.sphere.hasFlags(this_example::ies::ESM_FALSE_COLOR); + const bool useFalseColorLegend = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); ImVec2 barMin(imgPos.x + plotSize.x - barWidth - margin, imgPos.y + margin); ImVec2 barMax(barMin.x + barWidth, barMin.y + barHeight); @@ -540,9 +538,9 @@ void IESViewer::uiListener() dl->AddRect(barMin, barMax, ImGui::GetColorU32(ImGuiCol_Border)); const ImU32 textCol = ImGui::GetColorU32(ImGuiCol_Text); - for (uint32_t i = 0u; i < this_example::ies::FalseColorStopCount; ++i) + for (uint32_t i = 0u; i < hlsl::this_example::ies::FalseColorStopCount; ++i) { - const float stop = this_example::ies::falseColorStop(i); + const float stop = hlsl::this_example::ies::falseColorStop(i); const float y = barMin.y + (1.0f - stop) * barHeight; dl->AddLine(ImVec2(barMin.x - 4.0f, y), ImVec2(barMin.x, y), textCol); const float cdValue = stop * properties.maxCandelaValue; @@ -601,7 +599,7 @@ void IESViewer::uiListener() float32_t3 hitPos(0.f); bool hit = false; - const bool cubePlot = mode.sphere.hasFlags(this_example::ies::ESM_CUBE); + const bool cubePlot = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); if (cubePlot) { float tmin = -1.0e20f; @@ -668,7 +666,7 @@ void IESViewer::uiListener() if (resX > 0u && resY > 0u) { const float32_t2 res(static_cast(resX), static_cast(resY)); - const bool interpolateCandela = mode.sphere.hasFlags(this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + const bool interpolateCandela = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); if (!interpolateCandela) { const auto pixel = floor(uv * res + float32_t2(0.5f, 0.5f)); diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index 1b4b52e3d..a8cb39d65 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -54,7 +54,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted hlsl::float32_t radius = 1.f; IGPUDescriptorSet* ds = nullptr; uint16_t texID = 0u; - uint16_t mode = this_example::ies::ESM_NONE; + uint16_t mode = hlsl::this_example::ies::ESM_NONE; bool wireframe = false; }; // From 096030db385727f794e5487fbf9478c2f9a201ab Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 10:24:41 +0700 Subject: [PATCH 128/219] Change geometry_inspector example number from 72 to 73 --- {72_GeometryInspector => 73_GeometryInspector}/CMakeLists.txt | 0 .../include/common.hpp | 0 .../include/transform.hpp | 0 {72_GeometryInspector => 73_GeometryInspector}/main.cpp | 0 CMakeLists.txt | 2 +- 5 files changed, 1 insertion(+), 1 deletion(-) rename {72_GeometryInspector => 73_GeometryInspector}/CMakeLists.txt (100%) rename {72_GeometryInspector => 73_GeometryInspector}/include/common.hpp (100%) rename {72_GeometryInspector => 73_GeometryInspector}/include/transform.hpp (100%) rename {72_GeometryInspector => 73_GeometryInspector}/main.cpp (100%) diff --git a/72_GeometryInspector/CMakeLists.txt b/73_GeometryInspector/CMakeLists.txt similarity index 100% rename from 72_GeometryInspector/CMakeLists.txt rename to 73_GeometryInspector/CMakeLists.txt diff --git a/72_GeometryInspector/include/common.hpp b/73_GeometryInspector/include/common.hpp similarity index 100% rename from 72_GeometryInspector/include/common.hpp rename to 73_GeometryInspector/include/common.hpp diff --git a/72_GeometryInspector/include/transform.hpp b/73_GeometryInspector/include/transform.hpp similarity index 100% rename from 72_GeometryInspector/include/transform.hpp rename to 73_GeometryInspector/include/transform.hpp diff --git a/72_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp similarity index 100% rename from 72_GeometryInspector/main.cpp rename to 73_GeometryInspector/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fc61648b1..aae2400b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_GeometryInspector) - add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(73_CooperativeBinarySearch) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 95e057ff89ae5cb3323f5dc1471f6502343ba1e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 17:55:02 +0700 Subject: [PATCH 129/219] Fix example after merge --- 12_MeshLoaders/main.cpp | 17 ++++++++------- 73_GeometryInspector/CMakeLists.txt | 32 +---------------------------- 73_GeometryInspector/main.cpp | 4 ++-- CMakeLists.txt | 4 ++-- 4 files changed, 13 insertions(+), 44 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index c588b0e61..7d4a59825 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -208,7 +208,6 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc drawParams.commandBuffer = cb; drawParams.cameraMat = viewProjMatrix; m_drawAABB->render(drawParams, drawFinished, m_drawBBMode == DBBM_OBB ? m_obbInstances : m_aabbInstances); ->>>>>>> master } #endif cb->endRenderPass(); @@ -480,13 +479,6 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc const auto transformed = hlsl::shapes::util::transform(promotedWorld,promoted); printAABB(transformed,"Transformed"); bound = hlsl::shapes::util::union_(transformed,bound); - const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); - const auto world4x4 = float32_t4x4{ - tmpWorld[0], - tmpWorld[1], - tmpWorld[2], - float32_t4(0, 0, 0, 1) - }; #ifdef NBL_BUILD_DEBUG_DRAW @@ -495,12 +487,19 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc hlsl::float32_t3x4 aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); + const auto world4x4 = float32_t4x4{ + tmpWorld[0], + tmpWorld[1], + tmpWorld[2], + float32_t4(0, 0, 0, 1) + }; + aabbInst.color = { 1,1,1,1 }; aabbInst.transform[0] = tmpWorld[0]; aabbInst.transform[1] = tmpWorld[1]; aabbInst.transform[2] = tmpWorld[2]; aabbInst.transform[3] = float32_t4(0, 0, 0, 1); - aabbInst.transform = math::linalg::promoted_mul(aabbInst.transform, instanceTransform); + aabbInst.transform = math::linalg::promoted_mul(aabbInst.transform, aabbTransform); auto& obbInst = m_obbInstances[i]; const auto& cpuGeom = geometries[i].get(); diff --git a/73_GeometryInspector/CMakeLists.txt b/73_GeometryInspector/CMakeLists.txt index 697399e91..57e32dd63 100644 --- a/73_GeometryInspector/CMakeLists.txt +++ b/73_GeometryInspector/CMakeLists.txt @@ -14,38 +14,8 @@ if(NBL_BUILD_IMGUI AND NBL_BUILD_DEBUG_DRAW) "${NBL_EXT_IMGUI_UI_LIB}" ) - if (NBL_BUILD_MITSUBA_LOADER) - list(APPEND NBL_INCLUDE_SERACH_DIRECTORIES - "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" - ) - list(APPEND NBL_LIBRARIES - "${NBL_EXT_MITSUBA_LOADER_LIB}" - ) - endif() - nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) - endif() - - add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_DEBUG_DRAW_TARGET}) - target_include_directories(${EXECUTABLE_NAME} PUBLIC $) - + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) endif() diff --git a/73_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp index 5fe0421da..029fe5d08 100644 --- a/73_GeometryInspector/main.cpp +++ b/73_GeometryInspector/main.cpp @@ -410,14 +410,14 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR if (m_shouldDrawAABB) { const auto aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(meshInstance.aabb); - debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = hlsl::mul(world4x4, aabbTransform), .color = float32_t4(1, 1, 1, 1)}); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, aabbTransform), .color = float32_t4(1, 1, 1, 1)}); } if (m_shouldDrawOBB) { const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(meshInstance.obb); debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = hlsl::mul(world4x4, obbTransform), .color = float32_t4(0, 0, 1, 1)}); } - m_bbRenderer->render(cb, drawFinished, debugDrawInstances, viewProjMatrix); + m_bbRenderer->render({ cb, viewProjMatrix }, drawFinished, debugDrawInstances); cb->beginDebugMarker("Render ImGui"); const auto uiParams = m_ui.manager->getCreationParameters(); diff --git a/CMakeLists.txt b/CMakeLists.txt index aae2400b8..8543fc152 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,8 +90,8 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) - add_subdirectory(72_GeometryInspector) - add_subdirectory(73_CooperativeBinarySearch) + add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(73_GeometryInspector) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From ab85572234a02e87b071ebf1a42c682a7ae8dbe3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 19:27:22 +0700 Subject: [PATCH 130/219] Updated obbInst transform calculation to use math::linalg::promoted_mul --- 12_MeshLoaders/main.cpp | 8 ++------ 73_GeometryInspector/main.cpp | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index 7d4a59825..a04ede3f4 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -495,11 +495,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc }; aabbInst.color = { 1,1,1,1 }; - aabbInst.transform[0] = tmpWorld[0]; - aabbInst.transform[1] = tmpWorld[1]; - aabbInst.transform[2] = tmpWorld[2]; - aabbInst.transform[3] = float32_t4(0, 0, 0, 1); - aabbInst.transform = math::linalg::promoted_mul(aabbInst.transform, aabbTransform); + aabbInst.transform = math::linalg::promoted_mul(world4x4, aabbTransform); auto& obbInst = m_obbInstances[i]; const auto& cpuGeom = geometries[i].get(); @@ -513,7 +509,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc }); obbInst.color = { 0, 0, 1, 1 }; const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(obb); - obbInst.transform = hlsl::mul(world4x4, obbTransform); + obbInst.transform = math::linalg::promoted_mul(world4x4, obbTransform); #endif } diff --git a/73_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp index 029fe5d08..0ffcb6fa7 100644 --- a/73_GeometryInspector/main.cpp +++ b/73_GeometryInspector/main.cpp @@ -415,7 +415,7 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR if (m_shouldDrawOBB) { const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(meshInstance.obb); - debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = hlsl::mul(world4x4, obbTransform), .color = float32_t4(0, 0, 1, 1)}); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, obbTransform), .color = float32_t4(0, 0, 1, 1)}); } m_bbRenderer->render({ cb, viewProjMatrix }, drawFinished, debugDrawInstances); From d669573a1cd83cd77c5a83d3610704df4b55613c Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 15:57:30 +0100 Subject: [PATCH 131/219] start the example --- 40_PathTracer/CMakeLists.txt | 137 +++ 40_PathTracer/include/common.hpp | 35 + 40_PathTracer/main.cpp | 1526 ++++++++++++++++++++++++++++++ CMakeLists.txt | 11 +- 4 files changed, 1706 insertions(+), 3 deletions(-) create mode 100644 40_PathTracer/CMakeLists.txt create mode 100644 40_PathTracer/include/common.hpp create mode 100644 40_PathTracer/main.cpp diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt new file mode 100644 index 000000000..53d147399 --- /dev/null +++ b/40_PathTracer/CMakeLists.txt @@ -0,0 +1,137 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + + +set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + +list(APPEND NBL_LIBRARIES + imtestengine + "${NBL_EXT_IMGUI_UI_LIB}" +) + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +# TODO: why not use GLOB_RECURSE from above ? +set(DEPENDS + app_resources/common.hlsl + app_resources/light_directional.rcall.hlsl + app_resources/light_point.rcall.hlsl + app_resources/light_spot.rcall.hlsl + app_resources/present.frag.hlsl + app_resources/raytrace.rahit.hlsl + app_resources/raytrace.rchit.hlsl + app_resources/raytrace.rgen.hlsl + app_resources/raytrace.rint.hlsl + app_resources/raytrace.rmiss.hlsl + app_resources/raytrace_procedural.rchit.hlsl + app_resources/raytrace_shadow.rahit.hlsl + app_resources/raytrace_shadow.rmiss.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/raytrace.rgen.hlsl", + "KEY": "raytrace_rgen", + }, + { + "INPUT": "app_resources/raytrace.rchit.hlsl", + "KEY": "raytrace_rchit", + }, + { + "INPUT": "app_resources/raytrace_procedural.rchit.hlsl", + "KEY": "raytrace_procedural_rchit", + }, + { + "INPUT": "app_resources/raytrace.rint.hlsl", + "KEY": "raytrace_rint", + }, + { + "INPUT": "app_resources/raytrace.rahit.hlsl", + "KEY": "raytrace_rahit", + }, + { + "INPUT": "app_resources/raytrace_shadow.rahit.hlsl", + "KEY": "raytrace_shadow_rahit", + }, + { + "INPUT": "app_resources/raytrace.rmiss.hlsl", + "KEY": "raytrace_rmiss", + }, + { + "INPUT": "app_resources/raytrace_shadow.rmiss.hlsl", + "KEY": "raytrace_shadow_rmiss", + }, + { + "INPUT": "app_resources/light_directional.rcall.hlsl", + "KEY": "light_directional_rcall", + }, + { + "INPUT": "app_resources/light_point.rcall.hlsl", + "KEY": "light_point_rcall", + }, + { + "INPUT": "app_resources/light_spot.rcall.hlsl", + "KEY": "light_spot_rcall", + }, + { + "INPUT": "app_resources/present.frag.hlsl", + "KEY": "present_frag", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + + diff --git a/40_PathTracer/include/common.hpp b/40_PathTracer/include/common.hpp new file mode 100644 index 000000000..6727c879c --- /dev/null +++ b/40_PathTracer/include/common.hpp @@ -0,0 +1,35 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + +#include "nbl/examples/examples.hpp" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::application_templates; +using namespace nbl::examples; + +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#include "app_resources/common.hlsl" + +namespace nbl::scene +{ + +struct ReferenceObjectCpu +{ + core::smart_refctd_ptr data; + Material material; + core::matrix3x4SIMD transform; + +}; + +} + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp new file mode 100644 index 000000000..ecaf53b7f --- /dev/null +++ b/40_PathTracer/main.cpp @@ -0,0 +1,1526 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "common.hpp" + +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/indirect_commands.hlsl" + +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + + +class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline uint8_t MaxUITextureCount = 1u; + constexpr static inline uint32_t NumberOfProceduralGeometries = 5; + + static constexpr const char* s_lightTypeNames[E_LIGHT_TYPE::ELT_COUNT] = { + "Directional", + "Point", + "Spot" + }; + + struct ShaderBindingTable + { + SBufferRange raygenGroupRange; + SBufferRange hitGroupsRange; + uint32_t hitGroupsStride; + SBufferRange missGroupsRange; + uint32_t missGroupsStride; + SBufferRange callableGroupsRange; + uint32_t callableGroupsStride; + }; + + +public: + inline RaytracingPipelineApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override + { + auto retval = device_base_t::getRequiredDeviceFeatures(); + retval.rayTracingPipeline = true; + retval.accelerationStructure = true; + retval.rayQuery = true; + return retval; + } + + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.accelerationStructureHostCommands = true; + return retval; + } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WIN_W; + params.height = WIN_H; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "RaytracingPipelineApp"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + // so that we can use the same queue for asset converter and rendering + inline core::vector getQueueRequirements() const override + { + auto reqs = device_base_t::getQueueRequirements(); + reqs.front().requiredFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; + return reqs; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + // Load Custom Shader + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); + return nullptr; + } + + return shader; + }; + + // load shaders + const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" + const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" + const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" + const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" + const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" + const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" + const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" + const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" + const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" + const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" + const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" + const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto gQueue = getGraphicsQueue(); + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + + m_converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + + + // create output images + m_hdrImage = m_device->createImage({ + { + .type = IGPUImage::ET_2D, + .samples = ICPUImage::ESCF_1_BIT, + .format = EF_R16G16B16A16_SFLOAT, + .extent = {WIN_W, WIN_H, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .flags = IImage::ECF_NONE, + .usage = bitflag(IImage::EUF_STORAGE_BIT) | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_SAMPLED_BIT + } + }); + + if (!m_hdrImage || !m_device->allocate(m_hdrImage->getMemoryReqs(), m_hdrImage.get()).isValid()) + return logFail("Could not create HDR Image"); + + m_hdrImageView = m_device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT | IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT, + .image = m_hdrImage, + .viewType = IGPUImageView::E_TYPE::ET_2D, + .format = asset::EF_R16G16B16A16_SFLOAT + }); + + + + // ray trace pipeline and descriptor set layout setup + { + const auto bindings = std::array{ + ICPUDescriptorSetLayout::SBinding{ + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, + } + }; + auto cpuDescriptorSetLayout = core::make_smart_refctd_ptr(bindings); + + const SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, + .offset = 0u, + .size = sizeof(SPushConstants), + }; + const auto cpuPipelineLayout = core::make_smart_refctd_ptr(std::span({ pcRange }), std::move(cpuDescriptorSetLayout), nullptr, nullptr, nullptr); + + const auto pipeline = ICPURayTracingPipeline::create(cpuPipelineLayout.get()); + pipeline->getCachedCreationParams() = { + .flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS, + .maxRecursionDepth = 1, + .dynamicStackSize = true, + }; + + pipeline->getSpecInfos(ESS_RAYGEN)[0] = { + .shader = raygenShader, + .entryPoint = "main", + }; + + pipeline->getSpecInfoVector(ESS_MISS)->resize(EMT_COUNT); + const auto missGroups = pipeline->getSpecInfos(ESS_MISS); + missGroups[EMT_PRIMARY] = { .shader = missShader, .entryPoint = "main" }; + missGroups[EMT_OCCLUSION] = { .shader = missShadowShader, .entryPoint = "main" }; + + auto getHitGroupIndex = [](E_GEOM_TYPE geomType, E_RAY_TYPE rayType) + { + return geomType * ERT_COUNT + rayType; + }; + + const auto hitGroupCount = ERT_COUNT * EGT_COUNT; + pipeline->getSpecInfoVector(ESS_CLOSEST_HIT)->resize(hitGroupCount); + pipeline->getSpecInfoVector(ESS_ANY_HIT)->resize(hitGroupCount); + pipeline->getSpecInfoVector(ESS_INTERSECTION)->resize(hitGroupCount); + + const auto closestHitSpecs = pipeline->getSpecInfos(ESS_CLOSEST_HIT); + const auto anyHitSpecs = pipeline->getSpecInfos(ESS_ANY_HIT); + const auto intersectionSpecs = pipeline->getSpecInfos(ESS_INTERSECTION); + + closestHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = closestHitShader, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = {.shader = anyHitShaderColorPayload, .entryPoint = "main"}; + + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + + closestHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = proceduralClosestHitShader, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; + intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = intersectionHitShader, .entryPoint = "main" }; + + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = {.shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = intersectionHitShader, .entryPoint = "main" }; + + pipeline->getSpecInfoVector(ESS_CALLABLE)->resize(ELT_COUNT); + const auto callableGroups = pipeline->getSpecInfos(ESS_CALLABLE); + callableGroups[ELT_DIRECTIONAL] = { .shader = directionalLightCallShader, .entryPoint = "main" }; + callableGroups[ELT_POINT] = { .shader = pointLightCallShader, .entryPoint = "main" }; + callableGroups[ELT_SPOT] = { .shader = spotLightCallShader, .entryPoint = "main" }; + + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + CAssetConverter::SInputs inputs = {}; + inputs.logger = m_logger.get(); + + const std::array cpuPipelines = { pipeline.get() }; + std::get>(inputs.assets) = cpuPipelines; + + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + auto reservation = converter->reserve(inputs); + auto future = reservation.convert(params); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + + // assign gpu objects to output + auto&& pipelines = reservation.getGPUObjects(); + m_rayTracingPipeline = pipelines[0].value; + const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; + + const std::array dsLayoutPtrs = { gpuDsLayout }; + m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); + m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(core::smart_refctd_ptr(gpuDsLayout)); + + calculateRayTracingStackSize(m_rayTracingPipeline); + + if (!createShaderBindingTable(m_rayTracingPipeline)) + return logFail("Could not create shader binding table"); + + } + + auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); + + if (!createIndirectBuffer()) + return logFail("Could not create indirect buffer"); + + if (!createAccelerationStructuresFromGeometry()) + return logFail("Could not create acceleration structures from geometry creator"); + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + { + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + } + }; + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main", + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } + + // write descriptors + IGPUDescriptorSet::SDescriptorInfo infos[3]; + infos[0].desc = m_gpuTlas; + + infos[1].desc = m_hdrImageView; + if (!infos[1].desc) + return logFail("Failed to create image view"); + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + + infos[2].desc = m_hdrImageView; + infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, + {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, + {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, + }; + m_device->updateDescriptorSets(std::span(writes), {}); + + // gui descriptor setup + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ETC_REPEAT; + params.TextureWrapV = ETC_REPEAT; + params.TextureWrapW = ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getGraphicsQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(m_cameraSetting.fov), + io.DisplaySize.x / io.DisplaySize.y, + m_cameraSetting.zNear, + m_cameraSetting.zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); + Light m_oldLight = m_light; + int light_type = m_light.type; + ImGui::ListBox("LightType", &light_type, s_lightTypeNames, ELT_COUNT); + m_light.type = static_cast(light_type); + if (m_light.type == ELT_DIRECTIONAL) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + } + else if (m_light.type == ELT_POINT) + { + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + } + else if (m_light.type == ELT_SPOT) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + + float32_t dOuterCutoff = hlsl::degrees(acos(m_light.outerCutoff)); + if (ImGui::SliderFloat("Light Outer Cutoff", &dOuterCutoff, 0.0f, 45.0f)) + { + m_light.outerCutoff = cos(hlsl::radians(dOuterCutoff)); + } + } + ImGui::Checkbox("Use Indirect Command", &m_useIndirectCommand); + if (m_light != m_oldLight) + { + m_frameAccumulationCounter = 0; + } + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::End(); + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(60.0f), + WIN_W / WIN_H, + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); + + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + m_api->startCapture(); + + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("RaytracingPipelineApp Frame"); + + const auto viewMatrix = m_camera.getViewMatrix(); + const auto projectionMatrix = m_camera.getProjectionMatrix(); + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + + core::matrix3x4SIMD modelMatrix; + modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); + modelMatrix.setRotation(quaternion(0, 0, 0)); + + core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + if (m_cachedModelViewProjectionMatrix != modelViewProjectionMatrix) + { + m_frameAccumulationCounter = 0; + m_cachedModelViewProjectionMatrix = modelViewProjectionMatrix; + } + core::matrix4SIMD invModelViewProjectionMatrix; + modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, // previous frame read from framgent shader + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + imageBarriers[0].image = m_hdrImage.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = m_frameAccumulationCounter == 0 ? IImage::LAYOUT::UNDEFINED : IImage::LAYOUT::READ_ONLY_OPTIMAL; + imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + // Trace Rays Pass + { + SPushConstants pc; + pc.light = m_light; + pc.proceduralGeomInfoBuffer = m_proceduralGeomInfoBuffer->getDeviceAddress(); + pc.triangleGeomInfoBuffer = m_triangleGeomInfoBuffer->getDeviceAddress(); + pc.frameCounter = m_frameAccumulationCounter; + const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); + pc.camPos = { camPos.X, camPos.Y, camPos.Z }; + memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + + cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); + cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); + cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); + cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); + if (m_useIndirectCommand) + { + cmdbuf->traceRaysIndirect( + SBufferBinding{ + .offset = 0, + .buffer = m_indirectBuffer, + }); + } + else + { + cmdbuf->traceRays( + m_shaderBindingTable.raygenGroupRange, + m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, + m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, + m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, + WIN_W, WIN_H, 1); + } + } + + // pipeline barrier + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }; + imageBarriers[0].image = m_hdrImage.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = IImage::LAYOUT::GENERAL; + imageBarriers[0].newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + { + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WIN_W; + viewport.height = WIN_H; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + const IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDs.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + + cmdbuf->endRenderPass(); + + } + + cmdbuf->endDebugMarker(); + cmdbuf->end(); + + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } + } + + m_window->setCaption("[Nabla Engine] Ray Tracing Pipeline"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + m_api->endCapture(); + m_frameAccumulationCounter++; + } + + inline void update() + { + m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); + m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + } + }, m_logger.get()); + + m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +private: + uint32_t getWorkgroupCount(uint32_t dim, uint32_t size) + { + return (dim + size - 1) / size; + } + + bool createIndirectBuffer() + { + const auto getBufferRangeAddress = [](const SBufferRange& range) + { + return range.buffer->getDeviceAddress() + range.offset; + }; + const auto command = TraceRaysIndirectCommand_t{ + .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygenGroupRange), + .raygenShaderRecordSize = m_shaderBindingTable.raygenGroupRange.size, + .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.missGroupsRange), + .missShaderBindingTableSize = m_shaderBindingTable.missGroupsRange.size, + .missShaderBindingTableStride = m_shaderBindingTable.missGroupsStride, + .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hitGroupsRange), + .hitShaderBindingTableSize = m_shaderBindingTable.hitGroupsRange.size, + .hitShaderBindingTableStride = m_shaderBindingTable.hitGroupsStride, + .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callableGroupsRange), + .callableShaderBindingTableSize = m_shaderBindingTable.callableGroupsRange.size, + .callableShaderBindingTableStride = m_shaderBindingTable.callableGroupsStride, + .width = WIN_W, + .height = WIN_H, + .depth = 1, + }; + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = sizeof(TraceRaysIndirectCommand_t); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), &command).move_into(m_indirectBuffer); + return true; + } + + void calculateRayTracingStackSize(const smart_refctd_ptr& pipeline) + { + const auto raygenStackSize = pipeline->getRaygenStackSize(); + auto getMaxSize = [&](auto ranges, auto valProj) -> uint16_t + { + auto maxValue = 0; + for (const auto& val : ranges) + { + maxValue = std::max(maxValue, std::invoke(valProj, val)); + } + return maxValue; + }; + + const auto closestHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::closestHit); + const auto anyHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::anyHit); + const auto intersectionStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::intersection); + const auto missStackMax = getMaxSize(pipeline->getMissStackSizes(), std::identity{}); + const auto callableStackMax = getMaxSize(pipeline->getCallableStackSizes(), std::identity{}); + auto firstDepthStackSizeMax = std::max(closestHitStackMax, missStackMax); + firstDepthStackSizeMax = std::max(firstDepthStackSizeMax, intersectionStackMax + anyHitStackMax); + m_rayTracingStackSize = raygenStackSize + std::max(firstDepthStackSizeMax, callableStackMax); + } + + bool createShaderBindingTable(const smart_refctd_ptr& pipeline) + { + const auto& limits = m_device->getPhysicalDevice()->getLimits(); + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; + const auto handleSizeAligned = nbl::core::alignUp(handleSize, limits.shaderGroupHandleAlignment); + + auto& raygenRange = m_shaderBindingTable.raygenGroupRange; + + auto& hitRange = m_shaderBindingTable.hitGroupsRange; + const auto hitHandles = pipeline->getHitHandles(); + + auto& missRange = m_shaderBindingTable.missGroupsRange; + const auto missHandles = pipeline->getMissHandles(); + + auto& callableRange = m_shaderBindingTable.callableGroupsRange; + const auto callableHandles = pipeline->getCallableHandles(); + + raygenRange = { + .offset = 0, + .size = core::alignUp(handleSizeAligned, limits.shaderGroupBaseAlignment) + }; + + missRange = { + .offset = raygenRange.size, + .size = core::alignUp(missHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.missGroupsStride = handleSizeAligned; + + hitRange = { + .offset = missRange.offset + missRange.size, + .size = core::alignUp(hitHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.hitGroupsStride = handleSizeAligned; + + callableRange = { + .offset = hitRange.offset + hitRange.size, + .size = core::alignUp(callableHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.callableGroupsStride = handleSizeAligned; + + const auto bufferSize = raygenRange.size + missRange.size + hitRange.size + callableRange.size; + + ICPUBuffer::SCreationParams cpuBufferParams; + cpuBufferParams.size = bufferSize; + auto cpuBuffer = ICPUBuffer::create(std::move(cpuBufferParams)); + uint8_t* pData = reinterpret_cast(cpuBuffer->getPointer()); + + // copy raygen region + memcpy(pData, &pipeline->getRaygen(), handleSize); + + // copy miss region + uint8_t* pMissData = pData + missRange.offset; + for (const auto& handle : missHandles) + { + memcpy(pMissData, &handle, handleSize); + pMissData += m_shaderBindingTable.missGroupsStride; + } + + // copy hit region + uint8_t* pHitData = pData + hitRange.offset; + for (const auto& handle : hitHandles) + { + memcpy(pHitData, &handle, handleSize); + pHitData += m_shaderBindingTable.hitGroupsStride; + } + + // copy callable region + uint8_t* pCallableData = pData + callableRange.offset; + for (const auto& handle : callableHandles) + { + memcpy(pCallableData, &handle, handleSize); + pCallableData += m_shaderBindingTable.callableGroupsStride; + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; + params.size = bufferSize; + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(raygenRange.buffer); + missRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + hitRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + callableRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + } + + return true; + } + + bool createAccelerationStructuresFromGeometry() + { + auto queue = getGraphicsQueue(); + // get geometries into ICPUBuffers + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!pool) + return logFail("Couldn't create Command Pool for geometry creation!"); + + const auto defaultMaterial = Material{ + .ambient = {0.2, 0.1, 0.1}, + .diffuse = {0.8, 0.3, 0.3}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 1.0f, + }; + + auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) + { + core::matrix3x4SIMD transform; + transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); + return transform; + }; + + core::matrix3x4SIMD planeTransform; + planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); + + // triangles geometries + auto geometryCreator = make_smart_refctd_ptr(); + + const auto cpuObjects = std::array{ + scene::ReferenceObjectCpu { + .data = geometryCreator->createRectangle({10, 10}), + .material = defaultMaterial, + .transform = planeTransform, + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1, 1, 1}), + .material = defaultMaterial, + .transform = getTranslationMatrix(0, 0.5f, 0), + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1.5, 1.5, 1.5}), + .material = Material{ + .ambient = {0.1, 0.1, 0.2}, + .diffuse = {0.2, 0.2, 0.8}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 1.0f, + }, + .transform = getTranslationMatrix(-5.0f, 1.0f, 0), + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1.5, 1.5, 1.5}), + .material = Material{ + .ambient = {0.1, 0.2, 0.1}, + .diffuse = {0.2, 0.8, 0.2}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 0.2, + }, + .transform = getTranslationMatrix(5.0f, 1.0f, 0), + }, + }; + + // procedural geometries + using Aabb = IGPUBottomLevelAccelerationStructure::AABB_t; + + smart_refctd_ptr cpuProcBuffer; + { + ICPUBuffer::SCreationParams params; + params.size = NumberOfProceduralGeometries * sizeof(Aabb); + cpuProcBuffer = ICPUBuffer::create(std::move(params)); + } + + core::vector proceduralGeoms; + proceduralGeoms.reserve(NumberOfProceduralGeometries); + auto proceduralGeometries = reinterpret_cast(cpuProcBuffer->getPointer()); + for (int32_t i = 0; i < NumberOfProceduralGeometries; i++) + { + const auto middle_i = NumberOfProceduralGeometries / 2.0; + SProceduralGeomInfo sphere = { + .material = hlsl::_static_cast(Material{ + .ambient = {0.1, 0.05 * i, 0.1}, + .diffuse = {0.3, 0.2 * i, 0.3}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + }), + .center = float32_t3((i - middle_i) * 4.0, 2, 5.0), + .radius = 1, + }; + + proceduralGeoms.push_back(sphere); + const auto sphereMin = sphere.center - sphere.radius; + const auto sphereMax = sphere.center + sphere.radius; + proceduralGeometries[i] = { + vector3d(sphereMin.x, sphereMin.y, sphereMin.z), + vector3d(sphereMax.x, sphereMax.y, sphereMax.z) + }; + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = proceduralGeoms.size() * sizeof(SProceduralGeomInfo); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), proceduralGeoms.data()).move_into(m_proceduralGeomInfoBuffer); + } + + // get ICPUBuffers into ICPUBLAS + // TODO use one BLAS and multiple triangles/aabbs in one + const auto blasCount = std::size(cpuObjects) + 1; + const auto proceduralBlasIdx = std::size(cpuObjects); + + std::array, std::size(cpuObjects)+1u> cpuBlasList; + for (uint32_t i = 0; i < blasCount; i++) + { + auto& blas = cpuBlasList[i]; + blas = make_smart_refctd_ptr(); + + if (i == proceduralBlasIdx) + { + auto aabbs = make_refctd_dynamic_array>>(1u); + auto primitiveCounts = make_refctd_dynamic_array>(1u); + + auto& aabb = aabbs->front(); + auto& primCount = primitiveCounts->front(); + + primCount = NumberOfProceduralGeometries; + aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; + aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); + aabb.geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; // only allow opaque for now + + blas->setGeometries(std::move(aabbs), std::move(primitiveCounts)); + } + else + { + auto triangles = make_refctd_dynamic_array>>(1u); + auto primitiveCounts = make_refctd_dynamic_array>(1u); + + auto& tri = triangles->front(); + + auto& primCount = primitiveCounts->front(); + primCount = cpuObjects[i].data->getPrimitiveCount(); + + tri = cpuObjects[i].data->exportForBLAS(); + tri.geometryFlags = cpuObjects[i].material.isTransparent() ? + IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::NO_DUPLICATE_ANY_HIT_INVOCATION_BIT : + IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; + + blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); + } + + auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; + if (i == proceduralBlasIdx) + blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT; + + blas->setBuildFlags(blasFlags); + blas->setContentHash(blas->computeContentHash()); + } + + auto geomInfoBuffer = ICPUBuffer::create({ std::size(cpuObjects) * sizeof(STriangleGeomInfo) }); + STriangleGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); + + // get ICPUBLAS into ICPUTLAS + auto geomInstances = make_refctd_dynamic_array>(blasCount); + { + uint32_t i = 0; + for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) + { + const auto isProceduralInstance = i == proceduralBlasIdx; + ICPUTopLevelAccelerationStructure::StaticInstance inst; + inst.base.blas = cpuBlasList[i]; + inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); + inst.base.instanceCustomIndex = i; + inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; + inst.base.mask = 0xFF; + inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; + + instance->instance = inst; + } + } + + auto cpuTlas = make_smart_refctd_ptr(); + cpuTlas->setInstances(std::move(geomInstances)); + cpuTlas->setBuildFlags(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); + + // convert with asset converter + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + struct MyInputs : CAssetConverter::SInputs + { + // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all + inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override + { + assert(memoryBacked); + return memoryBacked->getObjectType() != IDeviceMemoryBacked::EOT_BUFFER ? (~0u) : rebarMemoryTypes; + } + + uint32_t rebarMemoryTypes; + } inputs = {}; + inputs.logger = m_logger.get(); + inputs.rebarMemoryTypes = m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); + // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in + // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) + struct MyAllocator final : public IDeviceMemoryAllocator + { + ILogicalDevice* getDeviceForAllocations() const override { return device; } + + SAllocation allocate(const SAllocateInfo& info) override + { + auto retval = device->allocate(info); + // map what is mappable by default so ReBAR checks succeed + if (retval.isValid() && retval.memory->isMappable()) + retval.memory->map({ .offset = 0,.length = info.size }); + return retval; + } + + ILogicalDevice* device; + } myalloc; + myalloc.device = m_device.get(); + inputs.allocator = &myalloc; + + std::array tmpTlas; + std::array tmpBuffers; + std::array tmpGeometries; + std::array, std::size(cpuObjects)> tmpGeometryPatches; + { + tmpTlas[0] = cpuTlas.get(); + tmpBuffers[0] = cpuProcBuffer.get(); + for (uint32_t i = 0; i < cpuObjects.size(); i++) + { + tmpGeometries[i] = cpuObjects[i].data.get(); + tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + } + + std::get>(inputs.assets) = tmpTlas; + std::get>(inputs.assets) = tmpBuffers; + std::get>(inputs.assets) = tmpGeometries; + std::get>(inputs.patches) = tmpGeometryPatches; + } + + auto reservation = converter->reserve(inputs); + { + auto prepass = [&](const auto & references) -> bool + { + auto objects = reservation.getGPUObjects(); + uint32_t counter = {}; + for (auto& object : objects) + { + auto gpu = object.value; + auto* reference = references[counter]; + + if (reference) + { + if (!gpu) + { + m_logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); + return false; + } + } + counter++; + } + return true; + }; + + prepass.template operator() < ICPUTopLevelAccelerationStructure > (tmpTlas); + prepass.template operator() < ICPUBuffer > (tmpBuffers); + prepass.template operator() < ICPUPolygonGeometry > (tmpGeometries); + } + + constexpr auto CompBufferCount = 2; + std::array, CompBufferCount> compBufs = {}; + std::array compBufInfos = {}; + { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs); + compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i = 0; i < CompBufferCount; i++) + compBufInfos[i].cmdbuf = compBufs[i].get(); + } + auto compSema = m_device->createSemaphore(0u); + SIntendedSubmitInfo compute = {}; + compute.queue = queue; + compute.scratchCommandBuffers = compBufInfos; + compute.scratchSemaphore = { + .semaphore = compSema.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT | PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT + }; + // convert + { + smart_refctd_ptr scratchAlloc; + { + constexpr auto MaxAlignment = 256; + constexpr auto MinAllocationSize = 1024; + const auto scratchSize = core::alignUp(reservation.getMaxASBuildScratchSize(false), MaxAlignment); + + + IGPUBuffer::SCreationParams creationParams = {}; + creationParams.size = scratchSize; + creationParams.usage = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + auto scratchBuffer = m_device->createBuffer(std::move(creationParams)); + + auto reqs = scratchBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); + + auto allocation = m_device->allocate(reqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + allocation.memory->map({ .offset = 0,.length = reqs.size }); + + scratchAlloc = make_smart_refctd_ptr( + SBufferRange{0ull, scratchSize, std::move(scratchBuffer)}, + core::allocator(), MaxAlignment, MinAllocationSize + ); + } + + struct MyParams final : CAssetConverter::SConvertParams + { + inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + + uint8_t finalUser; + } params = {}; + params.utilities = m_utils.get(); + params.compute = &compute; + params.scratchForDeviceASBuild = scratchAlloc.get(); + params.finalUser = queue->getFamilyIndex(); + + auto future = reservation.convert(params); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! + if (compute.getFutureScratchSemaphore().value>3) + m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); + + // assign gpu objects to output + auto&& tlases = reservation.getGPUObjects(); + m_gpuTlas = tlases[0].value; + auto&& buffers = reservation.getGPUObjects(); + m_proceduralAabbBuffer = buffers[0].value; + + auto&& gpuPolygonGeometries = reservation.getGPUObjects(); + m_gpuPolygons.resize(gpuPolygonGeometries.size()); + + for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) + { + const auto& cpuObject = cpuObjects[i]; + const auto& gpuPolygon = gpuPolygonGeometries[i].value; + const auto gpuTriangles = gpuPolygon->exportForBLAS(); + + const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; + const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; + + const auto& normalView = gpuPolygon->getNormalView(); + const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; + auto normalType = NT_R32G32B32_SFLOAT; + if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) + normalType = NT_R8G8B8A8_SNORM; + + const auto& indexBufferBinding = gpuTriangles.indexData; + auto& geomInfo = geomInfos[i]; + geomInfo = { + .material = hlsl::_static_cast(cpuObject.material), + .vertexBufferAddress = vertexBufferAddress, + .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, + .normalBufferAddress = normalBufferAddress, + .normalType = normalType, + .indexType = gpuTriangles.indexType, + }; + + m_gpuPolygons[i] = gpuPolygon; + } + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = geomInfoBuffer->getSize(); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), geomInfos).move_into(m_triangleGeomInfoBuffer); + } + + return true; + } + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + uint32_t m_frameAccumulationCounter = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + + struct CameraSetting + { + float fov = 60.f; + float zNear = 0.1f; + float zFar = 10000.f; + float moveSpeed = 1.f; + float rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + + } m_cameraSetting; + Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + + Light m_light = { + .direction = {-1.0f, -1.0f, -0.4f}, + .position = {10.0f, 15.0f, 8.0f}, + .outerCutoff = 0.866025404f, // {cos(radians(30.0f))}, + .type = ELT_DIRECTIONAL + }; + + video::CDumbPresentationOracle m_oracle; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + core::smart_refctd_ptr m_guiDescriptorSetPool; + + core::vector m_gpuIntersectionSpheres; + uint32_t m_intersectionHitGroupIdx; + + core::vector> m_gpuPolygons; + smart_refctd_ptr m_gpuTlas; + smart_refctd_ptr m_instanceBuffer; + + smart_refctd_ptr m_triangleGeomInfoBuffer; + smart_refctd_ptr m_proceduralGeomInfoBuffer; + smart_refctd_ptr m_proceduralAabbBuffer; + smart_refctd_ptr m_indirectBuffer; + + smart_refctd_ptr m_hdrImage; + smart_refctd_ptr m_hdrImageView; + + smart_refctd_ptr m_rayTracingDsPool; + smart_refctd_ptr m_rayTracingDs; + smart_refctd_ptr m_rayTracingPipeline; + uint64_t m_rayTracingStackSize; + ShaderBindingTable m_shaderBindingTable; + + smart_refctd_ptr m_presentDs; + smart_refctd_ptr m_presentDsPool; + smart_refctd_ptr m_presentPipeline; + + smart_refctd_ptr m_converter; + + + core::matrix4SIMD m_cachedModelViewProjectionMatrix; + bool m_useIndirectCommand = false; + +}; +NBL_MAIN_FUNC(RaytracingPipelineApp) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b82f37f..ce81e505b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,9 +76,14 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(34_DebugDraw) add_subdirectory(38_EXRSplit) - # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) - # add_subdirectory(39_DenoiserTonemapper) - # endif() + if (NBL_BUILD_MITSUBA_LOADER) + # if (NBL_BUILD_OPTIX) + # add_subdirectory(39_DenoiserTonemapper) + # endif() + if(NBL_BUILD_IMGUI) + add_subdirectory(40_PathTracer) + endif() + endif() #add_subdirectory(43_SumAndCDFFilters) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) From 3155c5c395b4f6dd69085650348a53b3a50d8d7f Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 16:59:59 +0100 Subject: [PATCH 132/219] remove whats not needed --- 22_RaytracedAO/SimpleJson.cpp | 150 ---------------------------------- 22_RaytracedAO/SimpleJson.h | 78 ------------------ 2 files changed, 228 deletions(-) delete mode 100644 22_RaytracedAO/SimpleJson.cpp delete mode 100644 22_RaytracedAO/SimpleJson.h diff --git a/22_RaytracedAO/SimpleJson.cpp b/22_RaytracedAO/SimpleJson.cpp deleted file mode 100644 index d478991df..000000000 --- a/22_RaytracedAO/SimpleJson.cpp +++ /dev/null @@ -1,150 +0,0 @@ -#include "SimpleJson.h" - -using namespace simplejson; - -// Hackery to emit JSON without using nlohmann/json C++ library (which requires a -// higher level of compiler compliance than is required by SPIRV-Cross -void Stream::begin_json_array() -{ - if (!stack.empty() && stack.top().second) - { - statement_inner(",\n"); - } - statement("["); - ++indent; - stack.emplace(Type::Array, false); -} - -void Stream::end_json_array() -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - { - statement_inner("\n"); - } - --indent; - statement_no_return("]"); - stack.pop(); - if (!stack.empty()) - { - stack.top().second = true; - } -} - -void Stream::emit_json_array_value(const std::string& value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - - if (stack.top().second) - statement_inner(",\n"); - - statement_no_return("\"", value, "\""); - stack.top().second = true; -} - -void Stream::emit_json_array_value(uint32_t value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - statement_inner(",\n"); - statement_no_return(std::to_string(value)); - stack.top().second = true; -} - -void Stream::emit_json_array_value(bool value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - statement_inner(",\n"); - statement_no_return(value ? "true" : "false"); - stack.top().second = true; -} - -void Stream::begin_json_object() -{ - if (!stack.empty() && stack.top().second) - { - statement_inner(",\n"); - } - statement("{"); - ++indent; - stack.emplace(Type::Object, false); -} - -void Stream::end_json_object() -{ - if (stack.empty() || stack.top().first != Type::Object) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - { - statement_inner("\n"); - } - --indent; - statement_no_return("}"); - stack.pop(); - if (!stack.empty()) - { - stack.top().second = true; - } -} - -void Stream::emit_json_key(const std::string& key) -{ - if (stack.empty() || stack.top().first != Type::Object) - std::cerr << "Invalid JSON state"; - - if (stack.top().second) - statement_inner(",\n"); - statement_no_return("\"", key, "\" : "); - stack.top().second = true; -} - -void Stream::emit_json_key_value(const std::string& key, const std::string& value) -{ - emit_json_key(key); - statement_inner("\"", value, "\""); -} - -void Stream::emit_json_key_value(const std::string& key, uint32_t value) -{ - emit_json_key(key); - statement_inner(value); -} - -void Stream::emit_json_key_value(const std::string& key, int32_t value) -{ - emit_json_key(key); - statement_inner(value); -} - -void Stream::emit_json_key_value(const std::string& key, float value) -{ - emit_json_key(key); - statement_inner(to_string(value)); -} - -void Stream::emit_json_key_value(const std::string& key, bool value) -{ - emit_json_key(key); - statement_inner(value ? "true" : "false"); -} - -void Stream::emit_json_key_object(const std::string& key) -{ - emit_json_key(key); - statement_inner("{\n"); - ++indent; - stack.emplace(Type::Object, false); -} - -void Stream::emit_json_key_array(const std::string& key) -{ - emit_json_key(key); - statement_inner("[\n"); - ++indent; - stack.emplace(Type::Array, false); -} \ No newline at end of file diff --git a/22_RaytracedAO/SimpleJson.h b/22_RaytracedAO/SimpleJson.h deleted file mode 100644 index b85e5930c..000000000 --- a/22_RaytracedAO/SimpleJson.h +++ /dev/null @@ -1,78 +0,0 @@ -using namespace std; - -namespace simplejson -{ - enum class Type - { - Object, - Array, - }; - - using State = std::pair; - using Stack = std::stack; - - class Stream - { - Stack stack; - stringstream buffer; - uint32_t indent{ 0 }; - - public: - void begin_json_object(); - void end_json_object(); - void emit_json_key(const std::string& key); - void emit_json_key_value(const std::string& key, const std::string& value); - void emit_json_key_value(const std::string& key, bool value); - void emit_json_key_value(const std::string& key, uint32_t value); - void emit_json_key_value(const std::string& key, int32_t value); - void emit_json_key_value(const std::string& key, float value); - void emit_json_key_object(const std::string& key); - void emit_json_key_array(const std::string& key); - - void begin_json_array(); - void end_json_array(); - void emit_json_array_value(const std::string& value); - void emit_json_array_value(uint32_t value); - void emit_json_array_value(bool value); - - std::string str() const - { - return buffer.str(); - } - - private: - inline void statement_indent() - { - for (uint32_t i = 0; i < indent; i++) - buffer << " "; - } - - template - inline void statement_inner(T&& t) - { - buffer << std::forward(t); - } - - template - inline void statement_inner(T&& t, Ts &&... ts) - { - buffer << std::forward(t); - statement_inner(std::forward(ts)...); - } - - template - inline void statement(Ts &&... ts) - { - statement_indent(); - statement_inner(std::forward(ts)...); - buffer << '\n'; - } - - template - void statement_no_return(Ts &&... ts) - { - statement_indent(); - statement_inner(std::forward(ts)...); - } - }; -} // namespace simplejson From b0c3479fd183a27c6c6a94eb98ec33ad76bcb92f Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 22:30:25 +0100 Subject: [PATCH 133/219] Draft out the Scene class a little bit --- 40_PathTracer/CMakeLists.txt | 14 +- 40_PathTracer/include/common.hpp | 4 - 40_PathTracer/include/renderer/CRenderer.h | 200 ++++ 40_PathTracer/include/renderer/CScene.h | 77 ++ 40_PathTracer/include/renderer/SAASequence.h | 1046 ++++++++++++++++++ 40_PathTracer/main.cpp | 57 +- 40_PathTracer/src/renderer/CRenderer.cpp | 61 + 7 files changed, 1419 insertions(+), 40 deletions(-) create mode 100644 40_PathTracer/include/renderer/CRenderer.h create mode 100644 40_PathTracer/include/renderer/CScene.h create mode 100644 40_PathTracer/include/renderer/SAASequence.h create mode 100644 40_PathTracer/src/renderer/CRenderer.cpp diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 53d147399..3873c6641 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -6,14 +6,18 @@ endif() set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/src" ) - -list(APPEND NBL_LIBRARIES - imtestengine +list(APPEND NBL_LIBRARIES + imguizmo "${NBL_EXT_IMGUI_UI_LIB}" ) - -nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +list(APPEND NBL_EXAMPLE_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CRenderer.cpp" +) +list(APPEND NBL_ +) +nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "${}" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) diff --git a/40_PathTracer/include/common.hpp b/40_PathTracer/include/common.hpp index 6727c879c..b33e7abfa 100644 --- a/40_PathTracer/include/common.hpp +++ b/40_PathTracer/include/common.hpp @@ -1,8 +1,6 @@ #ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ #define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#include "nbl/examples/examples.hpp" - using namespace nbl; using namespace nbl::core; using namespace nbl::hlsl; @@ -10,8 +8,6 @@ using namespace nbl::system; using namespace nbl::asset; using namespace nbl::ui; using namespace nbl::video; -using namespace nbl::application_templates; -using namespace nbl::examples; #include "nbl/ui/ICursorControl.h" #include "nbl/ext/ImGui/ImGui.h" diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h new file mode 100644 index 000000000..dcd1caed0 --- /dev/null +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -0,0 +1,200 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_RENDERER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_RENDERER_H_INCLUDED_ + + +#include "renderer/CScene.h" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include +#include +#include + + +namespace nbl::this_example +{ + +class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + enum class RenderMode : uint8_t + { + Previs, + Beauty//, + //Albedo, + //Normal, + //Motion + }; + // TODO: move this somewhere else + struct DenoiserArgs + { + std::filesystem::path bloomFilePath; + float bloomScale = 0.0f; + float bloomIntensity = 0.0f; + std::string tonemapperArgs = ""; + }; + + // + constexpr static video::SPhysicalDeviceFeatures RequiredDeviceFeatures() + { + video::SPhysicalDeviceFeatures retval = {}; + retval.rayTracingPipeline = true; + retval.accelerationStructure = true; + return retval; + } + // + constexpr static video::SPhysicalDeviceFeatures PreferredDeviceFeatures() + { + auto retval = RequiredDeviceFeatures(); + retval.accelerationStructureHostCommands = true; + return retval; + } + + struct SCachedCreationParams + { + //! Brief guideline to good path depth limits + // Want to see stuff with indirect lighting on the other side of a pane of glass + // 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light + // Want to see through a glass box, vase, or office + // 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light + // pick higher numbers for better GI and less bias + static inline constexpr uint32_t DefaultPathDepth = 8u; + // TODO: Upload only a subsection of the sample sequence to the GPU, so we can use more samples without trashing VRAM + static inline constexpr uint32_t MaxFreeviewSamples = 0x10000u; + + inline operator bool() const + { + if (!graphicsQueue || !computeQueue || !uploadQueue) + return false; + if (!utilities) + return false; + if (graphicsQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + if (computeQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + if (uploadQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + return true; + } + + video::IQueue* graphicsQueue = nullptr; + video::IQueue* computeQueue = nullptr; + video::IQueue* uploadQueue = nullptr; + // + core::smart_refctd_ptr utilities = nullptr; + }; + static core::smart_refctd_ptr create(SCachedCreationParams&& params); + + // + inline video::ILogicalDevice* getDevice() { return m_params.utilities->getLogicalDevice(); } + + protected: + struct SConstructorParams : SCachedCreationParams + { + core::smart_refctd_ptr converter; + + // per pipeline UBO, with fast updates + core::smart_refctd_ptr uboDS; + // descriptor set for a scene shall contain sampled textures and compiled materials + core::smart_refctd_ptr sceneDS; + + // rendering pipelines + core::smart_refctd_ptr preVis; + core::smart_refctd_ptr pathTracing; + + // rwmc resolve, autoexposure first pass + core::smart_refctd_ptr rwmcResolveAndLumaMeasure; // TODO: autoexposure, and first axis of FFT + // TODO: motion vector stuff + // compute and apply exposure, interleave into OptiX input formats, etc. + core::smart_refctd_ptr preOptiXDenoise; // TODO + // TODO: OIDN denoise + // deinterlave from OptiX output format, perform first axis of FFT + core::smart_refctd_ptr postOptiXDenoise; // TODO + // second axis FFT, spectrum multiply and iFFT + core::smart_refctd_ptr secondAxisBloom; // TODO + // first axis iFFT, tonemap, encode into final EXR format + core::smart_refctd_ptr secondAxisFFTTonemap; // TODO + + // Present + core::smart_refctd_ptr presentRenderpass; + core::smart_refctd_ptr regularPresent; + core::smart_refctd_ptr cubemapPresent; // TODO + }; + inline CRenderer(SConstructorParams&& _params) : m_params(std::move(_params)) {} + virtual inline ~CRenderer() {} + + SConstructorParams m_params; +#if 0 + // semi persistent data + nbl::io::path sampleSequenceCachePath; + struct SampleSequence + { + public: + static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t); + SampleSequence() : bufferView() {} + + // one less because first path vertex uses a different sequence + static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;} + nbl::core::smart_refctd_ptr createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount); + + // from cache + void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr&& buff); + // regenerate + nbl::core::smart_refctd_ptr createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount); + + auto getBufferView() const {return bufferView;} + + private: + nbl::core::smart_refctd_ptr bufferView; + } sampleSequence; + uint16_t maxPathDepth; + uint16_t noRussianRouletteDepth : 15; + uint16_t hideEnvironment : 1; + uint32_t maxSensorSamples; + + nbl::core::matrix3x4SIMD m_prevView; + nbl::core::matrix4x3 m_prevCamTform; + nbl::core::aabbox3df m_sceneBound; + uint32_t m_framesDispatched; + float m_maxAreaLightLuma; + vec2 m_rcpPixelSize; + uint64_t m_totalRaysCast; + StaticViewData_t m_staticViewData; + RaytraceShaderCommonData_t m_raytraceCommonData; + + // Resources used for envmap sampling + nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; +#endif +}; + +} + +// +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + private: + using enum_t = nbl::this_example::CRenderer::RenderMode; + + public: + static inline std::string __call(const enum_t value) + { + switch (value) + { + case enum_t::Beauty: + return "Beauty"; + case enum_t::Previs: + return "Previs"; + default: + break; + } + return ""; + } +}; +} +#endif diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h new file mode 100644 index 000000000..1b37900be --- /dev/null +++ b/40_PathTracer/include/renderer/CScene.h @@ -0,0 +1,77 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SCENE_H_INCLUDED_ + + +#include "nabla.h" +// TODO: move to `io` +#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" + + +namespace nbl::this_example +{ + +class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + struct SCachedCreationParams + { + inline operator bool() const + { + if (!scene || !metadata) + return false; + return true; + } + + // + core::smart_refctd_ptr scene; + // + core::smart_refctd_ptr metadata; + }; + static core::smart_refctd_ptr create(SCachedCreationParams&& params); + + protected: + struct SConstructorParams : SCachedCreationParams + { + // descriptor set for a scene shall contain sampled textures and compiled materials + core::smart_refctd_ptr sceneDS; + +#if 0 + nbl::core::aabbox3df m_sceneBound; + float m_maxAreaLightLuma; + StaticViewData_t m_staticViewData; + RaytraceShaderCommonData_t m_raytraceCommonData; + // Resources used for envmap sampling + nbl::core::smart_refctd_ptr m_finalEnvmap; +#endif + }; + inline CScene(SConstructorParams&& _params) : m_params(std::move(_params)) {} + virtual inline ~CScene() {} + + SConstructorParams m_params; +#if 0 + // TODO: sensor stuff + uint16_t hideEnvironment : 1; + uint32_t maxSensorSamples; + + uint32_t m_framesDispatched; + vec2 m_rcpPixelSize; + uint64_t m_totalRaysCast; + StaticViewData_t m_staticViewData; + RaytraceShaderCommonData_t m_raytraceCommonData; + + nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; + nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; + nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; + nbl::core::smart_refctd_ptr m_maskAcc; + +#endif + // TODO: for Material Compiler + //std::future compileShadersFuture; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/SAASequence.h b/40_PathTracer/include/renderer/SAASequence.h new file mode 100644 index 000000000..460e9ee69 --- /dev/null +++ b/40_PathTracer/include/renderer/SAASequence.h @@ -0,0 +1,1046 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl::this_example +{ + +// +struct SAASequence +{ + using type_t = nbl::hlsl::float32_t2; + + inline operator std::span() const {return {Data,Size};} + + static constexpr inline uint32_t Size = 1024; + constexpr static inline type_t Data[Size] = + { + {0.229027962000000, 0.100901043000000}, + {0.934988661250000, 0.900492937500000}, + {0.693936740750000, 0.477888665000000}, + {0.396013875250000, 0.867381653000000}, + {0.151208663250000, 0.331649132250000}, + {0.919338615000000, 0.306386117750000}, + {0.454737456500000, 0.597940860250000}, + {0.911951413000000, 0.584874565000000}, + {0.471331207500000, 0.117509299250000}, + {0.724981748000000, 0.988645892000000}, + {0.227727943750000, 0.553082892250000}, + {0.927148254750000, 0.059077206250000}, + {0.170420940250000, 0.853803466500000}, + {0.369496963250000, 0.372492160250000}, + {0.709055501500000, 0.719526612750000}, + {0.708593019750000, 0.236308825250000}, + {0.053515783250000, 0.244794542562500}, + {0.759417624125000, 0.846532545187500}, + {0.572365454937500, 0.341559262437500}, + {0.269128942562500, 0.962581831375000}, + {0.246508261687500, 0.286661635812500}, + {0.819542439062500, 0.459099133812500}, + {0.411348913687500, 0.737420359250000}, + {0.896647944437500, 0.717554343125000}, + {0.358057598000000, 0.050206801437500}, + {0.605871046250000, 0.779868041500000}, + {0.036816445812500, 0.506511135625000}, + {0.806931985937500, 0.138270723062500}, + {0.045020470000000, 0.818334270875000}, + {0.433264399500000, 0.254739200375000}, + {0.556258709500000, 0.559776624000000}, + {0.611048395312500, 0.162518625750000}, + {0.028918631812500, 0.053438072375000}, + {0.856252533125000, 0.916712681500000}, + {0.580344816187500, 0.463534157062500}, + {0.291334488000000, 0.774756179000000}, + {0.157847279187500, 0.464948199125000}, + {0.775478249937500, 0.320623736250000}, + {0.306258709500000, 0.653526624000000}, + {0.798533046937500, 0.552896543187500}, + {0.349953270437500, 0.123764825500000}, + {0.534027961437500, 0.969931745937500}, + {0.122488661312500, 0.681742937625000}, + {0.849003468812500, 0.216845413250000}, + {0.145343900750000, 0.962506045625000}, + {0.395929912437500, 0.488477370312500}, + {0.675219736437500, 0.601237158875000}, + {0.728921568625000, 0.053308823500000}, + {0.153721825125000, 0.145597505062500}, + {0.852763510375000, 0.797682223125000}, + {0.644595719312500, 0.367380713687500}, + {0.475934665312500, 0.787623234375000}, + {0.037670496437500, 0.386130180750000}, + {0.916111850937500, 0.403604173437500}, + {0.307256453062500, 0.518207928812500}, + {0.836158139312500, 0.677526975812500}, + {0.291525812500000, 0.197831715312500}, + {0.632543215125000, 0.896220934750000}, + {0.039235045687500, 0.629605464812500}, + {0.927263875375000, 0.179881653187500}, + {0.036335975187500, 0.990626511375000}, + {0.458406617875000, 0.372877193062500}, + {0.545614665812500, 0.676662283062500}, + {0.606815968812500, 0.044970413250000}, + {0.031533697125000, 0.184836288625000}, + {0.943869562500000, 0.830155934062500}, + {0.607026984312500, 0.286243495000000}, + {0.385468447812500, 0.923477959062500}, + {0.211591778000000, 0.432717372437500}, + {0.959561740812500, 0.477888665062500}, + {0.340921091062500, 0.599871303750000}, + {0.770926812125000, 0.740443845937500}, + {0.492972183312500, 0.243769330562500}, + {0.520086204062500, 0.865883539250000}, + {0.194132187625000, 0.711586172812500}, + {0.867832801875000, 0.029377324812500}, + {0.018898352500000, 0.755166315812500}, + {0.294110519250000, 0.340476317312500}, + {0.645436781125000, 0.669120978187500}, + {0.537010584750000, 0.070669853500000}, + {0.161951413000000, 0.209874565062500}, + {0.786335975187500, 0.990626511375000}, + {0.525681985937500, 0.419520723062500}, + {0.287619562500000, 0.834550465312500}, + {0.100299557750000, 0.367542953000000}, + {0.787670496437500, 0.386130180750000}, + {0.425010132750000, 0.666850725937500}, + {0.959417841312500, 0.712724761625000}, + {0.259027114250000, 0.027505482375000}, + {0.706747124500000, 0.863983912687500}, + {0.118758709500000, 0.559776624000000}, + {0.979834653750000, 0.076596529437500}, + {0.076814113250000, 0.879551982187500}, + {0.458038607062500, 0.495297691687500}, + {0.676899749875000, 0.533654791000000}, + {0.739509651750000, 0.162886922875000}, + {0.130635833000000, 0.032884578937500}, + {0.995486845875000, 0.879726983937500}, + {0.681683761437500, 0.415213866187500}, + {0.471888733500000, 0.975077322375000}, + {0.002080578437500, 0.292317740812500}, + {0.982026984312500, 0.286243495000000}, + {0.291525812500000, 0.713456715312500}, + {0.803515783250000, 0.619794542562500}, + {0.363736251000000, 0.241491573500000}, + {0.581375603187500, 0.850024182625000}, + {0.126134788437500, 0.739345154625000}, + {0.807256990625000, 0.025225260812500}, + {0.214063133312500, 0.979178170312500}, + {0.279120068187500, 0.455460706437500}, + {0.521614411125000, 0.748128257250000}, + {0.541375661500000, 0.191916865812500}, + {0.092374240812500, 0.093123040062500}, + {0.819780017000000, 0.863865176562500}, + {0.723535390937500, 0.290673655562500}, + {0.333626471625000, 0.991508772375000}, + {0.180081879937500, 0.273337083437500}, + {0.884853249937500, 0.353826861250000}, + {0.486489450437500, 0.649922456187500}, + {0.970355124125000, 0.588720045187500}, + {0.411054041562500, 0.190728892687500}, + {0.670557598000000, 0.782628676437500}, + {0.176686781125000, 0.590995978187500}, + {0.923484185187500, 0.119472166250000}, + {0.229834653750000, 0.826596529437500}, + {0.402229645500000, 0.427815757250000}, + {0.614887300500000, 0.582390020187500}, + {0.721331207625000, 0.117509299250000}, + {0.221780261562500, 0.160787322875000}, + {0.980871046250000, 0.779868041500000}, + {0.521614411125000, 0.498128257250000}, + {0.462698109750000, 0.855009158437500}, + {0.102148981812500, 0.485181351500000}, + {0.790505847500000, 0.272359588500000}, + {0.357263913000000, 0.553624565062500}, + {0.852875617687500, 0.518271589687500}, + {0.412788910312500, 0.072860258937500}, + {0.739509651750000, 0.912886922875000}, + {0.244715387500000, 0.610882883562500}, + {0.931245437000000, 0.247161473000000}, + {0.118495619500000, 0.827404835625000}, + {0.356241537562500, 0.307793951312500}, + {0.739954645312500, 0.601971750750000}, + {0.652229645500000, 0.240315757250000}, + {0.085230272750000, 0.149967825937500}, + {0.790487853250000, 0.802468641250000}, + {0.742972183312500, 0.431269330562500}, + {0.338023546687500, 0.864140358375000}, + {0.161359195250000, 0.386030244500000}, + {0.979622565375000, 0.415764143437500}, + {0.344324410875000, 0.743490102812500}, + {0.850000234687500, 0.588036936812500}, + {0.478921568625000, 0.053308823500000}, + {0.575878945812500, 0.904948635625000}, + {0.066809364125000, 0.711985215062500}, + {0.842374240812500, 0.093123040062500}, + {0.072833622000000, 0.943689044750000}, + {0.473982478062500, 0.309619342562500}, + {0.643468702500000, 0.727011596187500}, + {0.661784804062500, 0.096504548000000}, + {0.075593410125000, 0.020665263437500}, + {0.846367111937500, 0.980869489750000}, + {0.584417841312500, 0.402177886625000}, + {0.419264650000000, 0.807665176000000}, + {0.108911798812500, 0.274823750687500}, + {0.842214949125000, 0.395649388625000}, + {0.424460011250000, 0.541515061937500}, + {0.914875915625000, 0.525088448500000}, + {0.276815978250000, 0.138406141250000}, + {0.682946765937500, 0.941192325375000}, + {0.243922631125000, 0.674414353500000}, + {0.983747165312500, 0.225123234375000}, + {0.209039534812500, 0.919381743937500}, + {0.317979460312500, 0.396931190375000}, + {0.595789254625000, 0.645833852812500}, + {0.589063133312500, 0.229178170312500}, + {0.201732996437500, 0.034567680750000}, + {0.911951413000000, 0.959874565062500}, + {0.669465614812500, 0.307270670687500}, + {0.442773254937500, 0.918452206312500}, + {0.228659227750000, 0.498372525687500}, + {0.864786425062500, 0.258916160937500}, + {0.366015783250000, 0.682294542562500}, + {0.832368054687500, 0.749523853312500}, + {0.475148582250000, 0.180782790250000}, + {0.543804934062500, 0.806559289687500}, + {0.041831345187500, 0.574164114062500}, + {0.981787063687500, 0.014769301562500}, + {0.167325380812500, 0.796656456375000}, + {0.305883847687500, 0.260168413875000}, + {0.736593646187500, 0.544303438875000}, + {0.595631980687500, 0.113338942562500}, + {0.233747165312500, 0.225123234375000}, + {0.881420496437500, 0.854880180750000}, + {0.514120916562500, 0.361726452125000}, + {0.262088408375000, 0.897305108937500}, + {0.040764352812500, 0.448613484812500}, + {0.882527922875000, 0.453355770312500}, + {0.486593646187500, 0.544303438875000}, + {0.944193581437500, 0.650074503000000}, + {0.403764392000000, 0.003513614062500}, + {0.647805652187500, 0.839498260375000}, + {0.004346402437500, 0.700568695812500}, + {0.863684364125000, 0.149485215062500}, + {0.075593410125000, 0.770665263437500}, + {0.260573230937500, 0.378374937125000}, + {0.606947383687500, 0.518907935937500}, + {0.522543332062500, 0.131538315687500}, + {0.115674527437500, 0.213752289562500}, + {0.978861550187500, 0.943531534250000}, + {0.716222608750000, 0.357993041500000}, + {0.396123640562500, 0.988911053812500}, + {0.116417439062500, 0.427849133812500}, + {0.960375986437500, 0.355143408875000}, + {0.396123640562500, 0.613911053812500}, + {0.771872079375000, 0.679610301562500}, + {0.407651999187500, 0.129979780250000}, + {0.610967390562500, 0.957538983500000}, + {0.099030910125000, 0.622227763437500}, + {0.792605235062500, 0.213450866625000}, + {0.231787063687500, 0.764769301562500}, + {0.345102996500000, 0.451097146437500}, + {0.537639116937500, 0.609792796562500}, + {0.670557598000000, 0.032628676437500}, + {0.161148929812500, 0.091845178375000}, + {0.915446201000000, 0.774126136937500}, + {0.542495165812500, 0.275647345250000}, + {0.316935423562500, 0.923529210750000}, + {0.209068937250000, 0.340693571625000}, + {0.770926812125000, 0.490443845937500}, + {0.462732614000000, 0.712224844000000}, + {0.887121046250000, 0.643149291500000}, + {0.302373640562500, 0.082661053812500}, + {0.728921568625000, 0.803308823500000}, + {0.181258709500000, 0.653526624000000}, + {0.977987853250000, 0.146218641250000}, + {0.016702341062500, 0.927996303750000}, + {0.467374240812500, 0.431013665062500}, + {0.706158315437500, 0.659556033875000}, + {0.669301523312500, 0.186625825562500}, + {0.039461819812500, 0.116237049750000}, + {0.798533046937500, 0.927896543187500}, + {0.636245165812500, 0.463147345250000}, + {0.358057598000000, 0.800206801437500}, + {0.057953992187500, 0.323742603312500}, + {0.838196808062500, 0.318240323625000}, + {0.288441098000000, 0.563378403500000}, + {0.981947383687500, 0.518907935937500}, + {0.348181288187500, 0.183212688250000}, + {0.506420496437500, 0.917380180750000}, + {0.164875915625000, 0.525088448500000}, + {0.787802165812500, 0.082912283062500}, + {0.134409779187500, 0.902448199125000}, + {0.408376747562500, 0.326245734125000}, + {0.584561740812500, 0.727888665062500}, + {0.534266910125000, 0.009900787187500}, + {0.192731703031250, 0.122610961484375}, + {0.886403666453125, 0.919165570765625}, + {0.740362459437500, 0.479082682703125}, + {0.381143881328125, 0.825372076343750}, + {0.177058600328125, 0.355660703968750}, + {0.880411986109375, 0.304388585781250}, + {0.489954645359375, 0.601971750750000}, + {0.931245437015625, 0.622161472968750}, + {0.495691442609375, 0.089534956375000}, + {0.748617226093750, 0.964032599359375}, + {0.200716102781250, 0.538594564312500}, + {0.902762098828125, 0.040629656437500}, + {0.167366403703125, 0.826817667671875}, + {0.326986691484375, 0.360298081343750}, + {0.725793624375000, 0.689620323765625}, + {0.725003755000000, 0.197653489734375}, + {0.019203528312500, 0.219887995546875}, + {0.789461819796875, 0.866237049781250}, + {0.602987853250000, 0.364968641265625}, + {0.286530910140625, 0.997227763453125}, + {0.197691088203125, 0.299653371203125}, + {0.864509651765625, 0.475386922921875}, + {0.409509265921875, 0.695098575390625}, + {0.924966150343750, 0.731175805390625}, + {0.366549151562500, 0.016182260046875}, + {0.620446765921875, 0.753692325390625}, + {0.004126035234375, 0.512365679515625}, + {0.759204111453125, 0.126627783906250}, + {0.039461819796875, 0.866237049781250}, + {0.385324830531250, 0.282537197156250}, + {0.529500805046875, 0.539659192578125}, + {0.620486845921875, 0.129726983984375}, + {0.040287232453125, 0.022961294593750}, + {0.871648411546875, 0.886075859718750}, + {0.567731703031250, 0.497610961484375}, + {0.271219649968750, 0.785940731265625}, + {0.149813081953125, 0.495059302156250}, + {0.752354406031250, 0.336633136296875}, + {0.267950605953125, 0.630717656421875}, + {0.763064970921875, 0.526211358984375}, + {0.350302165859375, 0.082912283093750}, + {0.541375661546875, 0.941916865828125}, + {0.067888875390625, 0.648631653203125}, + {0.817282235640625, 0.240645457843750}, + {0.176686781125000, 0.965995978171875}, + {0.414407018781250, 0.458335411421875}, + {0.635381359671875, 0.622395865796875}, + {0.696580598671875, 0.010305563015625}, + {0.146140435203125, 0.181972166265625}, + {0.853197227578125, 0.768215064734375}, + {0.631158461921875, 0.330667103468750}, + {0.443098689046875, 0.770526325000000}, + {0.008860189078125, 0.404241883828125}, + {0.920499240812500, 0.436873040078125}, + {0.274931749687500, 0.517395920968750}, + {0.872488661328125, 0.681742937687500}, + {0.273658139312500, 0.240026975812500}, + {0.686178846875000, 0.902720720890625}, + {0.022328994562500, 0.659601535312500}, + {0.889064677375000, 0.139944156000000}, + {0.041831345203125, 0.949164114093750}, + {0.443262299140625, 0.313206182765625}, + {0.553107937421875, 0.637161480234375}, + {0.576361266343750, 0.010049207671875}, + {0.024757727531250, 0.155556940859375}, + {0.954885609765625, 0.864774783453125}, + {0.576988498046875, 0.268435650828125}, + {0.378272153750000, 0.889096529468750}, + {0.243922631171875, 0.424414353515625}, + {0.993449504812500, 0.487462829328125}, + {0.315047772046875, 0.590538342781250}, + {0.757436479140625, 0.715431613031250}, + {0.454737456671875, 0.222940860375000}, + {0.506538910328125, 0.822860258968750}, + {0.223798019234375, 0.699851317375000}, + {0.839514399500000, 0.012551700359375}, + {0.013378945812500, 0.811198635640625}, + {0.259404890625000, 0.333637616328125}, + {0.674460011265625, 0.635265061968750}, + {0.552179912484375, 0.113477370312500}, + {0.133506990359375, 0.242482936484375}, + {0.792605235078125, 0.963450866640625}, + {0.556245437015625, 0.434661472968750}, + {0.302640369765625, 0.866357903265625}, + {0.104025812484375, 0.322831715312500}, + {0.788292439093750, 0.420036633796875}, + {0.383947288453125, 0.645595154640625}, + {0.987679025703125, 0.720586141078125}, + {0.310166960328125, 0.049274940406250}, + {0.692634651765625, 0.826949422921875}, + {0.066739180750000, 0.551367061812500}, + {0.954885609765625, 0.114774783453125}, + {0.106252533187500, 0.916712681484375}, + {0.490362459437500, 0.479082682703125}, + {0.646028602781250, 0.509297689312500}, + {0.696508847734375, 0.182043413890625}, + {0.167639399500000, 0.008157169109375}, + {0.942731703031250, 0.935110961484375}, + {0.682010510203125, 0.383364961312500}, + {0.444750986453125, 0.993815283906250}, + {0.012293182515625, 0.265019109265625}, + {0.943520700468750, 0.285664643703125}, + {0.256436740812500, 0.727888665078125}, + {0.792605235078125, 0.588450866640625}, + {0.323828669343750, 0.228345414000000}, + {0.589727949703125, 0.818705937671875}, + {0.146647944500000, 0.717554343109375}, + {0.763378945812500, 0.061198635640625}, + {0.245217761562500, 0.944967010375000}, + {0.302009651765625, 0.475386922921875}, + {0.508157018781250, 0.708335411421875}, + {0.552058600328125, 0.230660703968750}, + {0.076470961921875, 0.065042103468750}, + {0.839060384390625, 0.826948487828125}, + {0.743383847734375, 0.260168413890625}, + {0.361729406031250, 0.961633136296875}, + {0.130411986109375, 0.304388585781250}, + {0.913057411375000, 0.372578939312500}, + {0.450791960328125, 0.635700721656250}, + {0.994715387546875, 0.610882883562500}, + {0.396123640625000, 0.238911053828125}, + {0.635171568625000, 0.803308823531250}, + {0.134436274500000, 0.588235294109375}, + {0.893091363734375, 0.085389815609375}, + {0.204885609765625, 0.864774783453125}, + {0.419763913046875, 0.397374565093750}, + {0.589063133281250, 0.604178170375000}, + {0.692634651765625, 0.076949422921875}, + {0.192731703031250, 0.185110961484375}, + {0.951814247656250, 0.756306315203125}, + {0.506689186515625, 0.439765218421875}, + {0.456461826015625, 0.821001137000000}, + {0.083707248625000, 0.461775383828125}, + {0.764249240812500, 0.280623040078125}, + {0.323579912671875, 0.557221730578125}, + {0.818079645359375, 0.508221750750000}, + {0.435674328421875, 0.095052115796875}, + {0.725148582281250, 0.930782790234375}, + {0.211591777984375, 0.620217372437500}, + {0.901467761562500, 0.194967010375000}, + {0.114509651765625, 0.873824422921875}, + {0.350302165859375, 0.270412283093750}, + {0.713196808109375, 0.568240323640625}, + {0.631158461921875, 0.205667103468750}, + {0.121648411546875, 0.136075859718750}, + {0.807256990687500, 0.775225260828125}, + {0.748441255000000, 0.385153489734375}, + {0.322881990687500, 0.822100260828125}, + {0.170499240812500, 0.436873040078125}, + {0.976735045687500, 0.379605464812500}, + {0.326988498046875, 0.705935650828125}, + {0.849030910140625, 0.622227763453125}, + {0.456628942609375, 0.025081831375000}, + {0.603718978906250, 0.881272112125000}, + {0.087671365468750, 0.733711546609375}, + {0.858316099875000, 0.063684800093750}, + {0.079233855890625, 0.980882302687500}, + {0.498831558375000, 0.275019330593750}, + {0.683006746078125, 0.696560873750000}, + {0.634421685203125, 0.070155760015625}, + {0.100941098000000, 0.000878403515625}, + {0.868983666328125, 0.946905808593750}, + {0.622823333015625, 0.407884578921875}, + {0.380701413046875, 0.772374565093750}, + {0.070966777984375, 0.276467372437500}, + {0.869730392156250, 0.411764705875000}, + {0.390973727828125, 0.533716984406250}, + {0.934919978109375, 0.561328326953125}, + {0.267725152796875, 0.170350753109375}, + {0.650556467859375, 0.939171186375000}, + {0.208092013671875, 0.656130963328125}, + {0.939854406031250, 0.211633136296875}, + {0.227987853250000, 0.896218641265625}, + {0.362037827187500, 0.411778179156250}, + {0.575564970921875, 0.682461358984375}, + {0.603861550250000, 0.193531534234375}, + {0.245936791328125, 0.056280808593750}, + {0.931245437015625, 0.997161472968750}, + {0.674341363734375, 0.272889815609375}, + {0.475148582281250, 0.930782790234375}, + {0.196690920375000, 0.490305748390625}, + {0.823073230953125, 0.290484312156250}, + {0.349801672015625, 0.643614082703125}, + {0.816809364156250, 0.711985215093750}, + {0.442773254953125, 0.168452206343750}, + {0.559175124515625, 0.768645847968750}, + {0.012608312281250, 0.564660480046875}, + {0.951732996484375, 0.034567680765625}, + {0.130635833015625, 0.782884578921875}, + {0.295859197828125, 0.295320202140625}, + {0.712431749687500, 0.517395920968750}, + {0.572626461875000, 0.068089897125000}, + {0.211591777984375, 0.245217372437500}, + {0.901756746078125, 0.821560873750000}, + {0.512364601359375, 0.315328221531250}, + {0.275838593406250, 0.932598880093750}, + {0.007956102718750, 0.451497525703125}, + {0.924966150343750, 0.481175805390625}, + {0.454495282953125, 0.559257955593750}, + {0.978187903312500, 0.673257136171875}, + {0.416103249937500, 0.041326861265625}, + {0.664447403859375, 0.864416693968750}, + {0.033521988046875, 0.696631179015625}, + {0.852837228421875, 0.184355089812500}, + {0.090934062750000, 0.810372893375000}, + {0.275746963312500, 0.411554660312500}, + {0.588037245453125, 0.558795337875000}, + {0.554922654015625, 0.160357524531250}, + {0.072833622000000, 0.193689044750000}, + {0.964063133281250, 0.979178170375000}, + {0.708517234312500, 0.319548392906250}, + {0.432256990687500, 0.962725260828125}, + {0.068079645359375, 0.414471750750000}, + {0.963190877593750, 0.324420555781250}, + {0.411502470921875, 0.573086358984375}, + {0.800162124781250, 0.669611615750000}, + {0.387554934109375, 0.150309289718750}, + {0.579945004250000, 0.965966294140625}, + {0.065522102093750, 0.599326277234375}, + {0.761255117500000, 0.204583567718750}, + {0.196950541453125, 0.770728070765625}, + {0.344324410937500, 0.493490102843750}, + {0.510111550250000, 0.568531534234375}, + {0.636389399500000, 0.008157169109375}, + {0.128530229140625, 0.090431613031250}, + {0.883566727531250, 0.752475196796875}, + {0.552206093281250, 0.309003302484375}, + {0.348181288187500, 0.933212688265625}, + {0.227987853250000, 0.364968641265625}, + {0.771924527437500, 0.448127289609375}, + {0.489679912484375, 0.745313307812500}, + {0.927148254953125, 0.684077206343750}, + {0.264066255000000, 0.103903489734375}, + {0.740057568187500, 0.764054456484375}, + {0.148947313656250, 0.630208463203125}, + {0.974161986109375, 0.179388585781250}, + {0.010457836296875, 0.893541028515625}, + {0.498441255000000, 0.385153489734375}, + {0.744468904421875, 0.637380397421875}, + {0.678301531546875, 0.136423458078125}, + {0.010191088203125, 0.112153371203125}, + {0.774757727531250, 0.905556940859375}, + {0.674229406031250, 0.446008136296875}, + {0.319922419343750, 0.784986039000000}, + {0.011042923812500, 0.349437248625000}, + {0.821379264859375, 0.354136628500000}, + {0.257237357453125, 0.579287821171875}, + {0.948151308765625, 0.522112716656250}, + {0.318520700468750, 0.160664643703125}, + {0.543804934109375, 0.900309289718750}, + {0.130607996843750, 0.519919121093750}, + {0.811627065421875, 0.071665408953125}, + {0.160867175625000, 0.931752899046875}, + {0.428297719390625, 0.362355138953125}, + {0.609505036140625, 0.690144858781250}, + {0.504804041609375, 0.003228892687500}, + {0.216196606265625, 0.064729040234375}, + {0.901736845921875, 0.879726983984375}, + {0.708719649968750, 0.453909481265625}, + {0.415337952218750, 0.849024716109375}, + {0.134853249937500, 0.353826861265625}, + {0.903787097500000, 0.267080047484375}, + {0.479025812484375, 0.572831715312500}, + {0.876605124109375, 0.604345045187500}, + {0.456461826015625, 0.071001137000000}, + {0.709494562484375, 0.955644215312500}, + {0.231947383703125, 0.518907935984375}, + {0.932230392156250, 0.013327205875000}, + {0.145086204046875, 0.865883539265625}, + {0.350930456281250, 0.348899376265625}, + {0.725034838203125, 0.739106496203125}, + {0.739954645359375, 0.226971750750000}, + {0.042605235078125, 0.213450866640625}, + {0.811627065421875, 0.821665408953125}, + {0.587735274500000, 0.312719600875000}, + {0.307230392156250, 0.950827205875000}, + {0.217487057734375, 0.273792953031250}, + {0.854401308765625, 0.440081466656250}, + {0.384747995484375, 0.706561433531250}, + {0.899206688062500, 0.699456330843750}, + {0.334068937265625, 0.028193571656250}, + {0.576864665859375, 0.801662283093750}, + {0.041360180171875, 0.539240991515625}, + {0.780622165328125, 0.170435734406250}, + {0.025074889437500, 0.841885738250000}, + {0.412686740812500, 0.274763665078125}, + {0.551686781125000, 0.512870978171875}, + {0.574633261734375, 0.138224135796875}, + {0.058436791328125, 0.056280808593750}, + {0.822110274500000, 0.890844600875000}, + {0.618449504812500, 0.487462829328125}, + {0.264066255000000, 0.807028489734375}, + {0.132527922859375, 0.453355770328125}, + {0.807953992203125, 0.323742603312500}, + {0.302148254953125, 0.684077206343750}, + {0.794171695281250, 0.522748994546875}, + {0.372686140625000, 0.110004803828125}, + {0.507741981953125, 0.951245734125000}, + {0.107097304093750, 0.651192048015625}, + {0.822833622000000, 0.193689044750000}, + {0.181245437015625, 0.997161472968750}, + {0.384747995484375, 0.456561433531250}, + {0.662226998781250, 0.569583683906250}, + {0.727197083078125, 0.021632137984375}, + {0.184988661328125, 0.150492937687500}, + {0.873243045828125, 0.810942332640625}, + {0.684963615078125, 0.357045297593750}, + {0.461525611203125, 0.759528012437500}, + {0.025718904421875, 0.410817897421875}, + {0.897009311359375, 0.420948834468750}, + {0.263037063734375, 0.546019301578125}, + {0.857097304093750, 0.651192048015625}, + {0.252866199843750, 0.205957512640625}, + {0.665602115484375, 0.895166775859375}, + {0.056996963312500, 0.684992160312500}, + {0.918804934109375, 0.150309289718750}, + {0.019203528312500, 0.969887995546875}, + {0.485853633703125, 0.339220435984375}, + {0.509352115484375, 0.676416775859375}, + {0.564952190359375, 0.039350341546875}, + {0.061178846875000, 0.152720720890625}, + {0.979027962734375, 0.850901043359375}, + {0.618986693593750, 0.251067502968750}, + {0.416788412578125, 0.890801763843750}, + {0.191935423609375, 0.392279210781250}, + {0.959424527437500, 0.448127289609375}, + {0.360967390625000, 0.582538983515625}, + {0.802390435203125, 0.744472166265625}, + {0.498617226093750, 0.214032599359375}, + {0.542495165859375, 0.838147345281250}, + {0.211457644609375, 0.742513028953125}, + {0.837488317609375, 0.030941206375000}, + {0.038430456281250, 0.786399376265625}, + {0.290324504812500, 0.370275329328125}, + {0.678301531546875, 0.667673458078125}, + {0.510613942796875, 0.106955928328125}, + {0.170736691484375, 0.235298081343750}, + {0.759083993796875, 0.997656627843750}, + {0.539572313656250, 0.380208463203125}, + {0.255388875390625, 0.867381653203125}, + {0.071379264859375, 0.354136628500000}, + {0.758860189078125, 0.404241883828125}, + {0.420321786390625, 0.636298924375000}, + {0.978659227718750, 0.748372525703125}, + {0.287503755000000, 0.010153489734375}, + {0.739679912484375, 0.870313307812500}, + {0.089315978250000, 0.513406141265625}, + {0.943869562484375, 0.080155934062500}, + {0.076564677375000, 0.913381656000000}, + {0.444542439093750, 0.459099133796875}, + {0.633162999796875, 0.540307445062500}, + {0.709818581500000, 0.157887002984375}, + {0.167325380828125, 0.046656456390625}, + {0.977987853250000, 0.896218641265625}, + {0.652229645515625, 0.427815757218750}, + {0.492972183375000, 0.993769330593750}, + {0.040505847500000, 0.272359588500000}, + {0.962978249937500, 0.260076861265625}, + {0.302640369765625, 0.741357903265625}, + {0.768208405500000, 0.610922261187500}, + {0.318273390046875, 0.193320190000000}, + {0.619905641343750, 0.853941035859375}, + {0.181258709546875, 0.747276624031250}, + {0.757229657953125, 0.013359518093750}, + {0.244715387546875, 0.985882883562500}, + {0.259008847734375, 0.494543413890625}, + {0.554055652187500, 0.714498260375000}, + {0.534027961468750, 0.219931745984375}, + {0.069780017046875, 0.113865176609375}, + {0.848982478109375, 0.872119342578125}, + {0.713196808109375, 0.271365323640625}, + {0.363736251062500, 0.991491573531250}, + {0.150433761421875, 0.282401366203125}, + {0.901208663437500, 0.331649132359375}, + {0.463675308375000, 0.681269330593750}, + {0.949633261734375, 0.606974135796875}, + {0.384851754687500, 0.208333852843750}, + {0.668614601359375, 0.752828221531250}, + {0.181245437015625, 0.622161472968750}, + {0.895086204046875, 0.115883539265625}, + {0.193869562484375, 0.830155934062500}, + {0.387335340921875, 0.396347453890625}, + {0.564854406031250, 0.586633136296875}, + {0.745691442609375, 0.089534956375000}, + {0.245486845921875, 0.129726983984375}, + {0.982811359250000, 0.811790368250000}, + {0.536503468843750, 0.474657913296875}, + {0.489679912484375, 0.870313307812500}, + {0.067888875390625, 0.492381653203125}, + {0.751398662484375, 0.307813307812500}, + {0.352987853250000, 0.521218641265625}, + {0.869468904421875, 0.543630397421875}, + {0.400000938734375, 0.102147747421875}, + {0.716287097500000, 0.892080047484375}, + {0.204945004250000, 0.590966294140625}, + {0.883506990359375, 0.242482936484375}, + {0.065143307734375, 0.844593734281250}, + {0.327001686515625, 0.299140218421875}, + {0.748446786390625, 0.573798924375000}, + {0.665686274500000, 0.213235294109375}, + {0.115683153500000, 0.178056211000000}, + {0.757883424281250, 0.796209072156250}, + {0.713222390562500, 0.397222857359375}, + {0.362679025703125, 0.845586141078125}, + {0.147009311359375, 0.420948834468750}, + {0.954472218843750, 0.404345413296875}, + {0.363836204046875, 0.740883539265625}, + {0.821904890625000, 0.583637616328125}, + {0.492832801906250, 0.029377324812500}, + {0.599161986109375, 0.929388585781250}, + {0.096331207625000, 0.695634299281250}, + {0.848982478109375, 0.122119342578125}, + {0.099003468843750, 0.966845413296875}, + {0.462431749687500, 0.267395920968750}, + {0.677354460328125, 0.725544471656250}, + {0.650008709546875, 0.122276624031250}, + {0.123243045828125, 0.060942332640625}, + {0.822833622000000, 0.943689044750000}, + {0.586591777984375, 0.432717372437500}, + {0.408903485687500, 0.776738982078125}, + {0.080479406031250, 0.305383136296875}, + {0.818079645359375, 0.414471750750000}, + {0.431931985968750, 0.513270723078125}, + {0.903169756421875, 0.555146535265625}, + {0.285517059468750, 0.166546514046875}, + {0.675219736453125, 0.976237158906250}, + {0.238942076937500, 0.643155269500000}, + {0.964063133281250, 0.229178170375000}, + {0.192731703031250, 0.935110961484375}, + {0.348181288187500, 0.401962688265625}, + {0.618003220203125, 0.658636770343750}, + {0.588190877593750, 0.199420555781250}, + {0.216958200468750, 0.007344331203125}, + {0.883506990359375, 0.992482936484375}, + {0.636042923812500, 0.294749748625000}, + {0.481304934109375, 0.900309289718750}, + {0.237679025703125, 0.470586141078125}, + {0.831750421625000, 0.262285054609375}, + {0.341264392046875, 0.675388614109375}, + {0.864509651765625, 0.725386922921875}, + {0.481304934109375, 0.150309289718750}, + {0.507180456281250, 0.786399376265625}, + {0.033602444796875, 0.600612049781250}, + {0.962250867203125, 0.054211353312500}, + {0.151703992203125, 0.761242603312500}, + {0.261464799453125, 0.261330050531250}, + {0.740667841375000, 0.511552886640625}, + {0.615093996609375, 0.088785852218750}, + {0.228861550250000, 0.193531534234375}, + {0.920420940359375, 0.853803466546875}, + {0.541111850968750, 0.345010423484375}, + {0.305472183375000, 0.900019330593750}, + {0.044319650703125, 0.478886922328125}, + {0.892725152796875, 0.482850753109375}, + {0.490667841375000, 0.511552886640625}, + {0.971780261562500, 0.629537322875000}, + {0.387554934109375, 0.056559289718750}, + {0.662939186515625, 0.814765218421875}, + {0.052390435203125, 0.744472166265625}, + {0.826564677375000, 0.163381656000000}, + {0.103197227578125, 0.768215064734375}, + {0.306265020015625, 0.415613958984375}, + {0.575564970921875, 0.526211358984375}, + {0.505411986109375, 0.183294835781250}, + {0.079233855890625, 0.230882302687500}, + {0.939854406031250, 0.961633136296875}, + {0.747488661328125, 0.369242937687500}, + {0.399926992500000, 0.954583567718750}, + {0.122446606265625, 0.392854040234375}, + {0.987719736453125, 0.351237158906250}, + {0.425219736453125, 0.601237158906250}, + {0.774082801906250, 0.642658574812500}, + {0.416299322109375, 0.161398788656250}, + {0.602987853250000, 0.989968641265625}, + {0.089514399500000, 0.575051700359375}, + {0.786335975187500, 0.240626511406250}, + {0.231815968843750, 0.794970413296875}, + {0.318750014656250, 0.443002308546875}, + {0.505566445812500, 0.623698635640625}, + {0.637067640531250, 0.039603148984375}, + {0.180883847734375, 0.072668413890625}, + {0.917325380828125, 0.796656456390625}, + {0.526756746078125, 0.259060873750000}, + {0.334878599875000, 0.893762925093750}, + {0.225425998046875, 0.315310650828125}, + {0.794319650703125, 0.478886922328125}, + {0.445957801906250, 0.724689824812500}, + {0.915440720703125, 0.656963966125000}, + {0.302640369765625, 0.116357903265625}, + {0.698976641187500, 0.774455388406250}, + {0.177148254953125, 0.684077206343750}, + {0.949633261734375, 0.138224135796875}, + {0.045314677375000, 0.913381656000000}, + {0.458038607125000, 0.401547691687500}, + {0.713980484156250, 0.625879926296875}, + {0.643255797593750, 0.155068738921875}, + {0.025074889437500, 0.091885738250000}, + {0.766702341031250, 0.927996303765625}, + {0.664447403859375, 0.489416693968750}, + {0.352787232453125, 0.772961294593750}, + {0.025478249937500, 0.320623736265625}, + {0.854074830531250, 0.352849697156250}, + {0.286530910140625, 0.622227763453125}, + {0.977727943875000, 0.553082892328125}, + {0.350685461906250, 0.153596186453125}, + {0.557230392156250, 0.880514705875000}, + {0.153169756421875, 0.555146535265625}, + {0.789461819796875, 0.116237049781250}, + {0.168804934109375, 0.900309289718750}, + {0.377355421296875, 0.330038940000000}, + {0.612679025703125, 0.720586141078125}, + {0.508506990359375, 0.054982936484375}, + {0.196917624109375, 0.096532545187500}, + {0.910867175625000, 0.931752899046875}, + {0.728171685203125, 0.443690916265625}, + {0.435674328421875, 0.845052115796875}, + {0.182230392156250, 0.325827205875000}, + {0.884087952218750, 0.286524716109375}, + {0.448834987281250, 0.579381097156250}, + {0.884436274500000, 0.588235294109375}, + {0.439752211921875, 0.123635853468750}, + {0.688559795171875, 0.981591765453125}, + {0.198151308765625, 0.522112716656250}, + {0.901703992203125, 0.011242603312500}, + {0.128530229140625, 0.840431613031250}, + {0.317826892046875, 0.321872989109375}, + {0.696508847734375, 0.744543413890625}, + {0.688847218843750, 0.216845413296875}, + {0.004724588125000, 0.188791578953125}, + {0.787802165859375, 0.832912283093750}, + {0.567052101359375, 0.371480565281250}, + {0.302148254953125, 0.996577206343750}, + {0.243986693593750, 0.251067502968750}, + {0.825795788375000, 0.474201552750000}, + {0.430456688062500, 0.699456330843750}, + {0.931258709546875, 0.747276624031250}, + {0.325564970921875, 0.057461358984375}, + {0.575724486109375, 0.777044835781250}, + {0.058436791328125, 0.525030808593750}, + {0.759808761421875, 0.157401366203125}, + {0.010191088203125, 0.862153371203125}, + {0.411476654468750, 0.296344298265625}, + {0.505607996843750, 0.519919121093750}, + {0.563898662484375, 0.182813307812500}, + {0.007229657953125, 0.013359518093750}, + {0.826564677375000, 0.913381656000000}, + {0.615744562484375, 0.440019215312500}, + {0.273049196593750, 0.752824731078125}, + {0.126134788453125, 0.489345154640625}, + {0.776528750687500, 0.346344691359375}, + {0.271093019843750, 0.673808825390625}, + {0.766504140937500, 0.549462718109375}, + {0.321690920375000, 0.115305748390625}, + {0.552058600328125, 0.980660703968750}, + {0.086158139312500, 0.677526975812500}, + {0.868983666328125, 0.196905808593750}, + {0.133506990359375, 0.992482936484375}, + {0.428389216390625, 0.479768192062500}, + {0.642183234343750, 0.594837245046875}, + {0.693414534828125, 0.060006743953125}, + {0.177263875390625, 0.179881653203125}, + {0.822881453125000, 0.799457928828125}, + {0.657602996515625, 0.342698708937500}, + {0.447833622000000, 0.795251544750000}, + {0.049908315421875, 0.432993533953125}, + {0.884744019140625, 0.384203634359375}, + {0.303495121031250, 0.531469981562500}, + {0.829156508796875, 0.635903919875000}, + {0.279003945812500, 0.225261135640625}, + {0.626154293906250, 0.912625724750000}, + {0.009546365468750, 0.639961546609375}, + {0.886403666453125, 0.169165570765625}, + {0.052259883703125, 0.979845435984375}, + {0.459494562484375, 0.330644215312500}, + {0.524813081953125, 0.651309302156250}, + {0.606787063734375, 0.014769301578125}, + {0.010457836296875, 0.143541028515625}, + {0.947626461875000, 0.818089897125000}, + {0.571917624109375, 0.284032545187500}, + {0.416299322109375, 0.911398788656250}, + {0.236033046906250, 0.396646543203125}, + {0.990797719390625, 0.456105138953125}, + {0.333626471656250, 0.616508772390625}, + {0.787209695250000, 0.716482502812500}, + {0.439756778562500, 0.194376370593750}, + {0.552179912484375, 0.863477370312500}, + {0.243449504812500, 0.737462829328125}, + {0.822881453125000, 0.049457928828125}, + {0.058436791328125, 0.806280808593750}, + {0.267237456671875, 0.347940860375000}, + {0.659100916609375, 0.628228892687500}, + {0.525109796625000, 0.082597554609375}, + {0.135951233515625, 0.201639822421875}, + {0.761255117500000, 0.954583567718750}, + {0.556490320328125, 0.399823343937500}, + {0.258466777984375, 0.838967372437500}, + {0.104074830531250, 0.352849697156250}, + {0.803389216390625, 0.386018192062500}, + {0.392068237890625, 0.666186056234375}, + {0.984505036140625, 0.690144858781250}, + {0.277932351500000, 0.052908284062500}, + {0.713196808109375, 0.833865323640625}, + {0.102915496515625, 0.537034646437500}, + {0.979027962734375, 0.100901043359375}, + {0.106115002812500, 0.885378765484375}, + {0.473302101359375, 0.455953221531250}, + {0.685199429843750, 0.551526284734375}, + {0.706158315421875, 0.128306033953125}, + {0.147265783328125, 0.057294542578125}, + {0.959039534828125, 0.919381743953125}, + {0.646013875390625, 0.398631653203125}, + {0.458517234312500, 0.944548392906250}, + {0.056758487734375, 0.295253452109375}, + {0.988836204046875, 0.303383539265625}, + {0.254309364156250, 0.711985215093750}, + {0.775799306890625, 0.567053766171875}, + {0.340921091031250, 0.224871303765625}, + {0.620506746078125, 0.821560873750000}, + {0.160012464359375, 0.690720392781250}, + {0.788430456281250, 0.036399376265625}, + {0.220355124109375, 0.963720045187500}, + {0.287648582281250, 0.493282790234375}, + {0.536503468843750, 0.724657913296875}, + {0.552148254953125, 0.246577206343750}, + {0.116843560203125, 0.107753416265625}, + {0.842374240812500, 0.843123040078125}, + {0.696912145562500, 0.310820697562500}, + {0.330881907437500, 0.958779769828125}, + {0.169338615078125, 0.306386117906250}, + {0.927058600328125, 0.355660703968750}, + {0.477915496515625, 0.630784646437500}, + {0.947107614062500, 0.571599844062500}, + {0.411298019234375, 0.231101317375000}, + {0.634196201015625, 0.774126137000000}, + {0.133506990359375, 0.617482936484375}, + {0.911148929828125, 0.091845178421875}, + {0.229027962734375, 0.850901043359375}, + {0.433436791328125, 0.431280808593750}, + {0.570900611203125, 0.618903012437500}, + {0.691994851500000, 0.099783284062500}, + {0.212500058671875, 0.147009234203125}, + {0.951732996484375, 0.784567680765625}, + {0.538479240078125, 0.442006235093750}, + {0.495691442609375, 0.839534956375000}, + {0.092295940359375, 0.445600341546875}, + {0.806758487734375, 0.295253452109375}, + {0.374367956281250, 0.505149376265625}, + {0.822584307093750, 0.538276272453125}, + {0.381143881328125, 0.075372076343750}, + {0.746648411546875, 0.886075859718750}, + {0.225690524703125, 0.572657414093750}, + {0.908135803781250, 0.224055233687500}, + {0.100557411375000, 0.856953939312500}, + {0.326749240812500, 0.257185540078125}, + {0.703382877203125, 0.594522854968750}, + {0.635381359671875, 0.247395865796875}, + {0.091503945812500, 0.170573635640625}, + {0.773093560203125, 0.773280760015625}, + {0.699864601359375, 0.424703221531250}, + {0.337978249937500, 0.822576861265625}, + {0.177278602781250, 0.415547689312500}, + {0.950716102781250, 0.382344564312500}, + {0.345102996515625, 0.701097146437500}, + {0.850941098000000, 0.563378403515625}, + {0.447833622000000, 0.045251544750000}, + {0.584417841375000, 0.933427886640625}, + {0.117581880000000, 0.710837083484375}, + {0.864601654468750, 0.093219298265625}, + {0.115674527437500, 0.963752289609375}, + {0.438901999203125, 0.285253217765625}, + {0.641702341031250, 0.693621303765625}, + {0.662939186515625, 0.064765218421875}, + {0.102763510390625, 0.047682223171875}, + {0.821904890625000, 0.958637616328125}, + {0.617697313656250, 0.380208463203125}, + {0.387554934109375, 0.806559289718750}, + {0.096212938656250, 0.263020963203125}, + {0.867422654015625, 0.379107524531250}, + {0.383386910937500, 0.555990102843750}, + {0.880607996843750, 0.519919121093750}, + {0.252252211921875, 0.162698353468750}, + {0.642183234343750, 0.969837245046875}, + {0.196583993796875, 0.685156627843750}, + {0.963190877593750, 0.199420555781250}, + {0.199633261734375, 0.888224135796875}, + {0.350685461906250, 0.434846186453125}, + {0.614399749906250, 0.627404791062500}, + {0.608747165328125, 0.225123234406250}, + {0.244958663437500, 0.019149132359375}, + {0.880021551015625, 0.966470884812500}, + {0.686627065421875, 0.259165408953125}, + {0.463246963312500, 0.880304660312500}, + {0.209424527437500, 0.448127289609375}, + {0.850148582281250, 0.305782790234375}, + {0.321802423796875, 0.647870625703125}, + {0.853921568625000, 0.709558823531250}, + {0.496648411546875, 0.136075859718750}, + {0.534266910125000, 0.759900787234375}, + {0.018208405500000, 0.610922261187500}, + {0.981815968843750, 0.044970413296875}, + {0.147265783328125, 0.807294542578125}, + {0.278764594718750, 0.293912076453125}, + {0.697168203437500, 0.508447093109375}, + {0.569936479140625, 0.090431613031250}, + {0.199633261734375, 0.231974135796875}, + {0.878530229140625, 0.840431613031250}, + {0.530111691484375, 0.313423081343750}, + {0.287503755000000, 0.877340989734375}, + {0.020926812156250, 0.490443845953125}, + {0.925118529859375, 0.463551966546875}, + {0.460281853234375, 0.514086682671875}, + {0.946583993796875, 0.685156627843750}, + {0.432953992203125, 0.011242603312500}, + {0.681996963312500, 0.841242160312500}, + {0.035440756328125, 0.741967535328125}, + {0.826814113265625, 0.129551982203125}, + {0.102763510390625, 0.797682223171875}, + {0.257250986453125, 0.431315283906250}, + {0.601735045687500, 0.535855464812500}, + {0.523947313656250, 0.161458463203125}, + {0.089514399500000, 0.200051700359375}, + {0.998871711468750, 0.969931745984375}, + {0.725557411375000, 0.325703939312500}, + {0.411054041609375, 0.940728892687500}, + {0.092214949156250, 0.395649388656250}, + {0.947571808109375, 0.318240323640625}, + {0.385150528859375, 0.598791693968750}, + {0.760340045031250, 0.666060247875000}, + {0.385468447859375, 0.173477959078125}, + {0.619715387546875, 0.985882883562500}, + {0.110693313734375, 0.604613051578125}, + {0.759083993796875, 0.247656627843750}, + {0.198151308765625, 0.803362716656250}, + {0.368295322046875, 0.475490672062500}, + {0.522431987421875, 0.579676484406250}, + {0.668614601359375, 0.002828221531250}, + {0.159061291453125, 0.115786836312500}, + {0.885343915375000, 0.797979216453125}, + {0.502049350968750, 0.290322923484375}, + {0.361960011265625, 0.916515061968750}, + {0.212257727531250, 0.374306940859375}, + {0.808006746078125, 0.446560873750000}, + {0.489786425109375, 0.696416160921875}, + {0.914775229140625, 0.625807223171875}, + {0.272394756234375, 0.082105300000000}, + {0.697833622000000, 0.795251544750000}, + {0.146614411140625, 0.654378257218750}, + {0.959039534828125, 0.169381743953125}, + {0.056931985968750, 0.888270723078125}, + {0.493765020015625, 0.415613958984375}, + {0.727875617718750, 0.674521589734375}, + {0.646232996843750, 0.172262871093750}, + {0.052267234312500, 0.085173392906250}, + {0.794171695281250, 0.897748994546875}, + {0.664407018781250, 0.458335411421875}, + {0.318273390046875, 0.755820190000000}, + {0.046247165328125, 0.350123234406250}, + {0.865799202015625, 0.333466330906250}, + {0.285498339406250, 0.583855022421875}, + {0.956147102093750, 0.505576277234375}, + {0.318949826718750, 0.141763441546875}, + {0.510429611953125, 0.887119489765625}, + {0.184919978109375, 0.561328326953125}, + {0.759417624109375, 0.096532545187500}, + {0.130403602781250, 0.937032064312500}, + {0.394595719296875, 0.367380713734375}, + {0.567731703031250, 0.747610961484375}, + {0.538716806515625, 0.039836274843750} + }; +}; + +} \ No newline at end of file diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index ecaf53b7f..46ffed947 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -1,34 +1,43 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "common.hpp" -#include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + +#include "nbl/examples/examples.hpp" + +#include "renderer/CRenderer.h" + + +// TODO remove #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include "common.hpp" #include "nbl/builtin/hlsl/indirect_commands.hlsl" -#include "nbl/examples/common/BuiltinResourcesApplication.hpp" +using namespace nbl::application_templates; +using namespace nbl::examples; +using namespace nbl::this_example; -class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +class PathTracingApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { using device_base_t = SimpleWindowedApplication; using asset_base_t = BuiltinResourcesApplication; - using clock_t = std::chrono::steady_clock; - constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; + constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; // TODO: remove constexpr static inline uint32_t MaxFramesInFlight = 3u; - constexpr static inline uint8_t MaxUITextureCount = 1u; - constexpr static inline uint32_t NumberOfProceduralGeometries = 5; + constexpr static inline uint8_t MaxUITextureCount = 1u; // TODO: remove + constexpr static inline uint32_t NumberOfProceduralGeometries = 5; // TODO: remove - static constexpr const char* s_lightTypeNames[E_LIGHT_TYPE::ELT_COUNT] = { + static constexpr const char* s_lightTypeNames[E_LIGHT_TYPE::ELT_COUNT] = { // TODO: remove "Directional", "Point", "Spot" }; - struct ShaderBindingTable + struct ShaderBindingTable // TODO: remove { SBufferRange raygenGroupRange; SBufferRange hitGroupsRange; @@ -41,25 +50,19 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui public: - inline RaytracingPipelineApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) - { - } + inline PathTracingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override { auto retval = device_base_t::getRequiredDeviceFeatures(); - retval.rayTracingPipeline = true; - retval.accelerationStructure = true; - retval.rayQuery = true; - return retval; + return retval.unionWith(CRenderer::RequiredDeviceFeatures()); } inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override { auto retval = device_base_t::getPreferredDeviceFeatures(); - retval.accelerationStructureHostCommands = true; - return retval; + return retval.unionWith(CRenderer::PreferredDeviceFeatures()); } inline core::vector getSurfaces() const override @@ -67,7 +70,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui if (!m_surface) { { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)); IWindow::SCreationParams params = {}; params.callback = core::make_smart_refctd_ptr(); params.width = WIN_W; @@ -90,14 +93,6 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui return {}; } - // so that we can use the same queue for asset converter and rendering - inline core::vector getQueueRequirements() const override - { - auto reqs = device_base_t::getQueueRequirements(); - reqs.front().requiredFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; - return reqs; - } - inline bool onAppInitialized(smart_refctd_ptr&& system) override { m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -1523,4 +1518,4 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui bool m_useIndirectCommand = false; }; -NBL_MAIN_FUNC(RaytracingPipelineApp) \ No newline at end of file +NBL_MAIN_FUNC(PathTracingApp) \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp new file mode 100644 index 000000000..396d3439c --- /dev/null +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/CRenderer.h" +#include "renderer/SAASequence.h" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +namespace nbl::this_example +{ +using namespace nbl::asset; +using namespace nbl::video; + +// +core::smart_refctd_ptr CRenderer::create(SCachedCreationParams&& _params) +{ + if (!_params) + return nullptr; + SConstructorParams params = {std::move(_params)}; + + // + ILogicalDevice* device = params.utilities->getLogicalDevice(); + + // + params.converter = CAssetConverter::create({.device=device,.optimizer={}}); + + // create the layouts + { + // one descriptor layout to rule them all + { + // bindless textures + // bindless storage images + // bindless buffer views + // bindless buffer storage views + } + + // but many push constant ranges + // and first descriptor set layout for 1 UBO to put image indices and BDA (fast swap at will) + } + + // create the pipelines + { + // TODO + } + + // the renderpass: custom dependencies, but everything else fixed from outside (format, and number of subpasses) + { +// params.presentRenderpass = device->createRenderpass(); + } + + // present pipelines + { + // TODO + } + + return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); +} + +} \ No newline at end of file From 57fcb0d5fa691705c00af9152ee95a84f474c702 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 23:40:43 +0100 Subject: [PATCH 134/219] add the loaders, link mitsuba loader --- 40_PathTracer/CMakeLists.txt | 3 + 40_PathTracer/include/io/CSceneLoader.h | 73 +++++++++++++++++ 40_PathTracer/include/renderer/CRenderer.h | 28 +++---- 40_PathTracer/include/renderer/CScene.h | 29 +++---- 40_PathTracer/main.cpp | 20 +++++ 40_PathTracer/src/io/CSceneLoader.cpp | 91 ++++++++++++++++++++++ 40_PathTracer/src/renderer/CRenderer.cpp | 24 +++++- 7 files changed, 233 insertions(+), 35 deletions(-) create mode 100644 40_PathTracer/include/io/CSceneLoader.h create mode 100644 40_PathTracer/src/io/CSceneLoader.cpp diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 3873c6641..aa11ee33e 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -5,14 +5,17 @@ endif() set(NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/src" ) list(APPEND NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" imguizmo "${NBL_EXT_IMGUI_UI_LIB}" ) list(APPEND NBL_EXAMPLE_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/io/CSceneLoader.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CRenderer.cpp" ) list(APPEND NBL_ diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h new file mode 100644 index 000000000..d2cccf8c7 --- /dev/null +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -0,0 +1,73 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_LOADER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SCENE_LOADER_H_INCLUDED_ + + +#include "nabla.h" + +#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" + + +namespace nbl::this_example +{ + +class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + struct SCachedCreationParams + { + core::smart_refctd_ptr assMan = nullptr; + system::logger_opt_smart_ptr logger = nullptr; + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const + { + if (!assMan) + return false; + return true; + } + }; + static core::smart_refctd_ptr create(SCreationParams&& params); + + struct SLoadResult + { + struct SSensor + { + }; + + inline operator bool() const + { + if (!scene || !sensors.empty()) + return false; + return true; + } + + // + core::smart_refctd_ptr scene = {}; + // + core::vector sensors; + // TODO: for Material Compiler + //std::future compileShadersFuture = {}; + }; + struct SLoadParams + { + system::path relPath = ""; + system::path workingDirectory = ""; + }; + SLoadResult load(SLoadParams&& _params); + + protected: + struct SConstructorParams : SCachedCreationParams + { + }; + inline CSceneLoader(SConstructorParams&& _params) : m_params(std::move(_params)) {} + virtual inline ~CSceneLoader() {} + + SConstructorParams m_params; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index dcd1caed0..b3227fff7 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -86,20 +86,24 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // core::smart_refctd_ptr utilities = nullptr; }; - static core::smart_refctd_ptr create(SCachedCreationParams&& params); + struct SCreationParams : SCachedCreationParams + { + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); // inline video::ILogicalDevice* getDevice() { return m_params.utilities->getLogicalDevice(); } + // + core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); + protected: struct SConstructorParams : SCachedCreationParams { - core::smart_refctd_ptr converter; - // per pipeline UBO, with fast updates - core::smart_refctd_ptr uboDS; + core::smart_refctd_ptr uboDSLayout; // descriptor set for a scene shall contain sampled textures and compiled materials - core::smart_refctd_ptr sceneDS; + core::smart_refctd_ptr sceneDSLayout; // rendering pipelines core::smart_refctd_ptr preVis; @@ -150,20 +154,6 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl private: nbl::core::smart_refctd_ptr bufferView; } sampleSequence; - uint16_t maxPathDepth; - uint16_t noRussianRouletteDepth : 15; - uint16_t hideEnvironment : 1; - uint32_t maxSensorSamples; - - nbl::core::matrix3x4SIMD m_prevView; - nbl::core::matrix4x3 m_prevCamTform; - nbl::core::aabbox3df m_sceneBound; - uint32_t m_framesDispatched; - float m_maxAreaLightLuma; - vec2 m_rcpPixelSize; - uint64_t m_totalRaysCast; - StaticViewData_t m_staticViewData; - RaytraceShaderCommonData_t m_raytraceCommonData; // Resources used for envmap sampling nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index 1b37900be..e2ae0718b 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -5,10 +5,7 @@ #define _NBL_THIS_EXAMPLE_C_SCENE_H_INCLUDED_ -#include "nabla.h" -// TODO: move to `io` -#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" -#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#include "io/CSceneLoader.h" namespace nbl::this_example @@ -19,26 +16,34 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable public: struct SCachedCreationParams { + }; + struct SCreationParams : SCachedCreationParams + { + CSceneLoader::SLoadResult load = {}; + video::CAssetConverter* converter = nullptr; + inline operator bool() const { - if (!scene || !metadata) + if (!load) return false; + // converter can be null, we can make a new one return true; } - - // - core::smart_refctd_ptr scene; - // - core::smart_refctd_ptr metadata; }; - static core::smart_refctd_ptr create(SCachedCreationParams&& params); + + // TODO: figure out whats constant, and whats state that can be passed around + inline std::span getSensors() const {return m_params.sensors;} + + // TODO: function to initialize per-sensor stuff protected: + friend class CRenderer; struct SConstructorParams : SCachedCreationParams { // descriptor set for a scene shall contain sampled textures and compiled materials core::smart_refctd_ptr sceneDS; + core::vector sensors; #if 0 nbl::core::aabbox3df m_sceneBound; float m_maxAreaLightLuma; @@ -69,8 +74,6 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable nbl::core::smart_refctd_ptr m_maskAcc; #endif - // TODO: for Material Compiler - //std::future compileShadersFuture; }; } diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 46ffed947..35bde0709 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -17,6 +17,7 @@ #include "nbl/builtin/hlsl/indirect_commands.hlsl" +using namespace nbl::core; using namespace nbl::application_templates; using namespace nbl::examples; using namespace nbl::this_example; @@ -102,6 +103,25 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; + + // TODO: move new members + smart_refctd_ptr m_sceneLoader; + smart_refctd_ptr m_renderer; + + // set up the scene loader + m_sceneLoader = CSceneLoader::create({{ + .assMan = smart_refctd_ptr(m_assetMgr), + .logger = smart_refctd_ptr(m_logger) + }}); + + // + m_renderer = CRenderer::create({{ + .graphicsQueue = getGraphicsQueue(), + .computeQueue = getComputeQueue(), + .uploadQueue = getTransferUpQueue(), + .utilities = smart_refctd_ptr(m_utils) + }}); + // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp new file mode 100644 index 000000000..ebf5ba7c3 --- /dev/null +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "io/CSceneLoader.h" + +#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ext::MitsubaLoader; + +// +smart_refctd_ptr CSceneLoader::create(SCreationParams&& _params) +{ + if (!_params) + return nullptr; + SConstructorParams params = {std::move(_params)}; + + // add the loaders + { + auto* const assMan = params.assMan.get(); + auto* const system = assMan->getSystem(); + + bool success = true; + success = success && assMan->addAssetLoader(make_smart_refctd_ptr(smart_refctd_ptr(system)))!=0xdeadbeefu; + // some of our test scenes won't load without the `.serialized` support + success = success && assMan->addAssetLoader(make_smart_refctd_ptr()) != 0xdeadbeefu; + + if (!success) + { + params.logger.log("Could not add Mitsuba Asset Loaders", ILogger::ELL_ERROR); + return nullptr; + } + } + + return core::smart_refctd_ptr(new CSceneLoader(std::move(params)),core::dont_grab); +} + +auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult +{ + IAssetLoader::SAssetLoadParams params = {}; + params.workingDirectory = _params.workingDirectory; + params.logger = m_params.logger.get().get(); + const auto relPath = _params.relPath.lexically_normal().string(); + auto asset = m_params.assMan->getAsset(relPath,params); + const auto type = asset.getAssetType(); + if (asset.getContents().empty() || type!=IAsset::E_TYPE::ET_SCENE) + { + m_params.logger.log( + "Failed to Load Mitsuba scene from \"%s\" with working directory \"%s\" type is %d", + ILogger::ELL_ERROR,relPath.c_str(),_params.workingDirectory.lexically_normal().string().c_str(),type // TODO: specialize `system::impl::to_string_helper` for IAsset::E_TYPE + ); + return {}; + } + m_params.logger.log("Loaded %s",ILogger::ELL_INFO,relPath.c_str()); + + const auto* const untypedMeta = asset.getMetadata(); + if (!untypedMeta || strcmpi(untypedMeta->getLoaderName(),CMitsubaMetadata::LoaderName)!=0) + { + params.logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); + return {}; + } + const auto* const meta = static_cast(untypedMeta); + + // + core::vector sensors; + auto& _sensors = meta->m_global.m_sensors; + if (_sensors.empty()) + { + params.logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); + return {}; + } + else + { + sensors.resize(_sensors.size()); + //for () // TODO: load the stuff + } + + // TODO: any CPU-side touch-ups we need to do, like Material IR options + + return { + .scene = IAsset::castDown(asset.getContents()[0]), + .sensors = std::move(sensors) + }; +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 396d3439c..e30b8f7f0 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -14,7 +14,7 @@ using namespace nbl::asset; using namespace nbl::video; // -core::smart_refctd_ptr CRenderer::create(SCachedCreationParams&& _params) +core::smart_refctd_ptr CRenderer::create(SCreationParams&& _params) { if (!_params) return nullptr; @@ -23,8 +23,6 @@ core::smart_refctd_ptr CRenderer::create(SCachedCreationParams&& _par // ILogicalDevice* device = params.utilities->getLogicalDevice(); - // - params.converter = CAssetConverter::create({.device=device,.optimizer={}}); // create the layouts { @@ -58,4 +56,24 @@ core::smart_refctd_ptr CRenderer::create(SCachedCreationParams&& _par return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); } + +core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& _params) +{ + if (!_params) + return nullptr; + auto converter = core::smart_refctd_ptr(_params.converter); + + CScene::SConstructorParams params = {std::move(_params)}; + + // new cache if none provided + if (!converter) + converter = CAssetConverter::create({.device=getDevice(),.optimizer={}}); + + // build the BLAS and TLAS + { + } + + return core::smart_refctd_ptr(new CScene(std::move(params)),core::dont_grab); +} + } \ No newline at end of file From d0a557552696161cd84528325577b7e2a0700d36 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 30 Dec 2025 09:55:18 +0100 Subject: [PATCH 135/219] delete uneccessary files --- 22_RaytracedAO/config.json | 28 ----------- 22_RaytracedAO/config.json.template | 28 ----------- 22_RaytracedAO/cull.comp | 74 ----------------------------- 22_RaytracedAO/extractCubemap.bat | 40 ---------------- 22_RaytracedAO/fillVisBuffer.frag | 36 -------------- 22_RaytracedAO/mergeCubemap.bat | 32 ------------- 22_RaytracedAO/pipeline.groovy | 50 ------------------- 22_RaytracedAO/test_scenes.txt | 46 ------------------ 22_RaytracedAO/virtualGeometry.glsl | 42 ---------------- 9 files changed, 376 deletions(-) delete mode 100644 22_RaytracedAO/config.json delete mode 100644 22_RaytracedAO/config.json.template delete mode 100644 22_RaytracedAO/cull.comp delete mode 100644 22_RaytracedAO/extractCubemap.bat delete mode 100644 22_RaytracedAO/fillVisBuffer.frag delete mode 100644 22_RaytracedAO/mergeCubemap.bat delete mode 100644 22_RaytracedAO/pipeline.groovy delete mode 100644 22_RaytracedAO/test_scenes.txt delete mode 100644 22_RaytracedAO/virtualGeometry.glsl diff --git a/22_RaytracedAO/config.json b/22_RaytracedAO/config.json deleted file mode 100644 index be48a8b6c..000000000 --- a/22_RaytracedAO/config.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "requiredOptions": [ "NBL_BUILD_MITSUBA_LOADER", "NBL_BUILD_RADEON_RAYS" ] - }, - "profiles": [ - { - "backend": "", - "platform": "", - "buildModes": [], - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [ - ], - "command": [ - ], - "outputs": [ - ] - } - ] -} \ No newline at end of file diff --git a/22_RaytracedAO/config.json.template b/22_RaytracedAO/config.json.template deleted file mode 100644 index abfc8e387..000000000 --- a/22_RaytracedAO/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [ "NBL_BUILD_MITSUBA_LOADER", "NBL_BUILD_RADEON_RAYS" ] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/22_RaytracedAO/cull.comp b/22_RaytracedAO/cull.comp deleted file mode 100644 index 7e214e555..000000000 --- a/22_RaytracedAO/cull.comp +++ /dev/null @@ -1,74 +0,0 @@ -#version 430 core - -#include "rasterizationCommon.h" -layout(local_size_x = WORKGROUP_SIZE) in; - -#include - -#include -layout(set=1, binding=0, row_major) writeonly restrict buffer PerInstancePerCamera -{ - DrawData_t data[]; -} instanceDataPerCamera; -layout(set=1, binding=1, std430, row_major) restrict readonly buffer PerInstanceCull -{ - CullData_t cullData[]; -}; -layout(set=1, binding=2, std430) restrict coherent buffer IndirectDraws -{ - nbl_glsl_DrawElementsIndirectCommand_t draws[]; -} commandBuff[2]; - - - -layout(push_constant, row_major) uniform PushConstants -{ - CullShaderData_t data; -} pc; - - - -#include -#include - - -// base instance remains unchanged -// we just do atomic add on the instance count -void main() -{ - for (uint drawCommandGUID=gl_GlobalInvocationID.x; drawCommandGUID=pc.data.maxGlobalInstanceCount) - return; - - // fetch instance data - const CullData_t batchInstanceData = cullData[batchInstanceID]; - const uint batchInstanceGUID = batchInstanceData.batchInstanceGUID; - - const nbl_glsl_ext_Mitsuba_Loader_instance_data_t instanceData = InstData.data[batchInstanceGUID]; - const mat4x3 worldMatrix = instanceData.tform; - const mat4 MVP = nbl_glsl_pseudoMul4x4with4x3(pc.data.viewProjMatrix,worldMatrix); - - // cull - bool notCulled = true; - if (false) - { - const mat2x3 bbox = mat2x3(batchInstanceData.aabbMinEdge,batchInstanceData.aabbMaxEdge); - notCulled = nbl_glsl_couldBeVisible(MVP,bbox); - } - - // set up MDI - if (notCulled) - { - const uint drawCommandGUID = batchInstanceData.drawCommandGUID; - const uint drawInstanceID = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].baseInstance+ - atomicAdd(commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].instanceCount,1u); - - instanceDataPerCamera.data[drawInstanceID].MVP = MVP; - // use the MSB to denote if face orientation should be flipped - instanceDataPerCamera.data[drawInstanceID].backfacingBit_batchInstanceGUID = batchInstanceGUID|((instanceData.determinantSignBit^floatBitsToUint(pc.data.viewProjDeterminant))&0x80000000u); - instanceDataPerCamera.data[drawInstanceID].firstIndex = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].firstIndex; - } -} \ No newline at end of file diff --git a/22_RaytracedAO/extractCubemap.bat b/22_RaytracedAO/extractCubemap.bat deleted file mode 100644 index b3ad104c4..000000000 --- a/22_RaytracedAO/extractCubemap.bat +++ /dev/null @@ -1,40 +0,0 @@ -REM @echo off - -REM examplary usage: -REM mergeCubemap.bat 64 64 mergedImage.png stripeFormat.png - -set cropOffsetX0=%1 -set cropOffsetY0=%2 - -set in=%3 -set out=%4 - -REM set extracted image size -for /f "tokens=*" %%s in ('magick identify -format "%%w" %in%') do set sz=%%s -set /a paddedSize = sz/3 - -set /a realSize = paddedSize-2*cropOffsetX0 - -set /a cropOffsetX1 = cropOffsetX0+paddedSize -set /a cropOffsetX2 = cropOffsetX0+paddedSize*2 -set /a cropOffsetX3 = cropOffsetX0+paddedSize*3 -set /a cropOffsetX4 = cropOffsetX0+paddedSize*4 -set /a cropOffsetX5 = cropOffsetX0+paddedSize*5 -set /a cropOffsetY1 = paddedSize+64 - -set /a x0 = 0 -set /a x1 = realSize -set /a x2 = 2*realSize -set /a x3 = 3*realSize -set /a x4 = 5*realSize -set /a x5 = 4*realSize - -set /a stripWidth = realSize*6 -magick convert -size %stripWidth%x%realSize% canvas:none ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX0%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x0%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX2%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x1%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX1%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x2%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX2%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x3%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX0%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x4%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX1%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x5%+0 ) -composite ^ -%out% \ No newline at end of file diff --git a/22_RaytracedAO/fillVisBuffer.frag b/22_RaytracedAO/fillVisBuffer.frag deleted file mode 100644 index 88a18455a..000000000 --- a/22_RaytracedAO/fillVisBuffer.frag +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#version 460 core -#extension GL_EXT_shader_16bit_storage : require -#include - - -#define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0 -#include "virtualGeometry.glsl" - -#include -layout(location = 2) flat in uint BackfacingBit_BatchInstanceGUID; -layout(location = 3) flat in uint drawCmdFirstIndex; - -uint nbl_glsl_barycentric_frag_getDrawID() {return BackfacingBit_BatchInstanceGUID&0x7fffffffu;} -vec3 nbl_glsl_barycentric_frag_getVertexPos(in uint batchInstanceGUID, in uint primID, in uint primsVx) -{ - const uint ix = nbl_glsl_VG_fetchTriangleVertexIndex(primID*3u+drawCmdFirstIndex,primsVx); - return nbl_glsl_fetchVtxPos(ix,InstData.data[batchInstanceGUID]); -} - - -layout(location = 0) out uvec4 frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2; // should it be called backfacing or frontfacing? - - -void main() -{ - vec2 bary = nbl_glsl_barycentric_frag_get(); - - const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[1] = packUnorm2x16(bary); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[2] = packHalf2x16(dFdx(bary)); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[3] = packHalf2x16(dFdy(bary)); -} diff --git a/22_RaytracedAO/mergeCubemap.bat b/22_RaytracedAO/mergeCubemap.bat deleted file mode 100644 index eda734094..000000000 --- a/22_RaytracedAO/mergeCubemap.bat +++ /dev/null @@ -1,32 +0,0 @@ -@echo off - -REM the ordering of the cubemap faces is irrelevant as long as extractCubemap knows what has been merged together here -set first=%1 -set second=%2 -set third=%3 -set fourth=%4 -set fifth=%5 -set sixth=%6 -set output=%~dpn7 - -REM examplary usage: -REM mergeCubemap.bat first.png second.png third.png fourth.png fifth.png sixth.png outputImageName - -REM set image size -for /f "tokens=*" %%s in ('magick identify -format "%%w" %first%') do set sz=%%s - -REM set image fromat -for /f "tokens=*" %%s in ('magick identify -format "%%m" %first%') do set format=%%s - -set /a szx2=2*sz -set /a outputWidth=3*sz -set /a outputHeight=2*sz - -magick convert -size %outputwidth%x%outputHeight% canvas:none ^ --draw "image over 0,0 0,0 '%sixth%'" ^ --draw "image over %sz%,0 0,0 '%fourth%'" ^ --draw "image over %szx2%,0 0,0 '%third%'" ^ --draw "image over 0,%sz% 0,0 '%first%'" ^ --draw "image over %sz%,%sz% 0,0 '%fifth%'" ^ --draw "image over %szx2%,%sz% 0,0 '%second%'" ^ -%output%.%format% \ No newline at end of file diff --git a/22_RaytracedAO/pipeline.groovy b/22_RaytracedAO/pipeline.groovy deleted file mode 100644 index 04729bc71..000000000 --- a/22_RaytracedAO/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CRaytracedAOBuilder extends IBuilder -{ - public CRaytracedAOBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CRaytracedAOBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/22_RaytracedAO/test_scenes.txt b/22_RaytracedAO/test_scenes.txt deleted file mode 100644 index e751c9bbc..000000000 --- a/22_RaytracedAO/test_scenes.txt +++ /dev/null @@ -1,46 +0,0 @@ -; Here is my Commented line that batch file will skip (started with semicolons) -; "relative/dir/from/bin/folder/to/scene.zip something.xml -; Copy your test files into "Scenes" folder besides "bin" -"../../media/mitsuba/staircase2.zip scene.xml" -"..\Scenes\unity.zip 33_render_1_1.xml" -"..\Scenes\unity.zip 34_render_2_1.xml" -"..\Scenes\unity.zip 35_render_3_1.xml" -"..\Scenes\unity.zip 36_render_4_2.xml" -"..\Scenes\unity.zip 37_render_5_2.xml" -"..\Scenes\unity.zip 38_render_6_2.xml" -"..\Scenes\unity.zip 39_render_7_2.xml" -"..\Scenes\unity.zip 40_render_8_2.xml" -"..\Scenes\unity.zip 41_render_9_2.xml" -"..\Scenes\unity.zip 45_render_10_2.xml" -"..\Scenes\unity.zip 46_render_11_2.xml" -"..\Scenes\unity.zip 47_render_12_2.xml" -"..\Scenes\unity.zip 48_render_13_2.xml" -"..\Scenes\unity.zip 49_render_14_2.xml" -"..\Scenes\unity.zip 50_render_15_2.xml" -"..\Scenes\unity.zip 51_render_16_2.xml" -"..\Scenes\unity.zip 52_render_17_2.xml" -"..\Scenes\unity.zip 53_render_18_1.xml" -"..\Scenes\unity.zip 54_render_19_2.xml" -"..\Scenes\unity.zip 55_render_20_2.xml" -"..\Scenes\31521.zip 19_render_0_1.xml" -"..\Scenes\31797.zip 6_render_0_2.xml" -"..\Scenes\32222 does not stop loading.zip 12_render_9_1.xml" -"..\Scenes\4k strange - all normalmapped.zip" -"..\Scenes\bathroom.zip" -"..\Scenes\bathroom2.zip" -"..\Scenes\bedroom.zip" -"..\Scenes\coffee.zip" -"..\Scenes\cornell-box.zip" -"..\Scenes\glass-of-water.zip" -"..\Scenes\kitchen.zip" -"..\Scenes\lamp.zip" -"..\Scenes\living-room-2.zip" -"..\Scenes\living-room-3.zip" -"..\Scenes\living-room.zip" -"..\Scenes\spaceship.zip" -"..\Scenes\staircase.zip" -"..\Scenes\strangetexturedball.zip 22_render_0_1.xml" -"..\Scenes\sunscene.zip 2_render_0_2.xml" -"..\Scenes\t1 normals crash.zip 16_render_0_1.xml" -"..\Scenes\veach-ajar.zip" -"..\Scenes\veach-bidir.zip" \ No newline at end of file diff --git a/22_RaytracedAO/virtualGeometry.glsl b/22_RaytracedAO/virtualGeometry.glsl deleted file mode 100644 index 422c939f9..000000000 --- a/22_RaytracedAO/virtualGeometry.glsl +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _VIRTUAL_GEOMETRY_GLSL_INCLUDED_ -#define _VIRTUAL_GEOMETRY_GLSL_INCLUDED_ - -#include "common.h" - -#define _NBL_VG_USE_SSBO -#define _NBL_VG_SSBO_DESCRIPTOR_SET 1 -#define _NBL_VG_USE_SSBO_UVEC2 -#define _NBL_VG_SSBO_UVEC2_BINDING 0 -#define _NBL_VG_USE_SSBO_INDEX -#define _NBL_VG_SSBO_INDEX_BINDING 1 -// TODO: remove after Doom Eternal position quantization trick -#define _NBL_VG_USE_SSBO_UVEC3 -#define _NBL_VG_SSBO_UVEC3_BINDING 2 -#include - - -#include - - -vec3 nbl_glsl_fetchVtxPos(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.padding1; - return nbl_glsl_VG_attribFetch_RGB32_SFLOAT(va,vtxID); -} - -vec3 nbl_glsl_fetchVtxNormal(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uint codedNormal = nbl_glsl_VG_attribFetch3u(va,vtxID)[0]; - return normalize(nbl_glsl_decodeRGB10A2_SNORM(codedNormal).xyz); -} - -vec2 nbl_glsl_fetchVtxUV(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uvec2 codedUV = nbl_glsl_VG_attribFetch3u(va,vtxID).yz; - return vec2(uintBitsToFloat(codedUV.x), uintBitsToFloat(codedUV.y)); -} - - -#endif From 2aa93fdb41386f01e6788fd97db1cf3ec4d473e8 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 30 Dec 2025 10:00:08 +0100 Subject: [PATCH 136/219] start loading scenes, ZIP included --- 40_PathTracer/main.cpp | 13 +++++++ 40_PathTracer/src/io/CSceneLoader.cpp | 48 ++++++++++++++++++++---- 40_PathTracer/src/renderer/CRenderer.cpp | 2 + 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 35bde0709..d02272774 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -122,6 +122,19 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes .utilities = smart_refctd_ptr(m_utils) }}); + // TODO: tmp code + auto scene_daily_pt = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", + .workingDirectory = localOutputCWD + }); + // the UI would have you load the zip first, then present a dropdown of what to load + // but still need to support archive mount for cmdline load +#if 0 // this particular zip goes down an unsupported path in our zip loader + auto scene_bedroom = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", + .workingDirectory = localOutputCWD + }); +#endif // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index ebf5ba7c3..f4177fb75 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -45,18 +45,52 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult IAssetLoader::SAssetLoadParams params = {}; params.workingDirectory = _params.workingDirectory; params.logger = m_params.logger.get().get(); - const auto relPath = _params.relPath.lexically_normal().string(); - auto asset = m_params.assMan->getAsset(relPath,params); - const auto type = asset.getAssetType(); - if (asset.getContents().empty() || type!=IAsset::E_TYPE::ET_SCENE) + + auto* const assMan = m_params.assMan.get(); + // handle archive stuff + const auto relPath = _params.relPath.lexically_normal(); + auto* const system = assMan->getSystem(); + core::stack archiveStack; + for (auto it=relPath.begin(); it!=relPath.end();) + { + const auto ext = (it++)->extension().string(); + if (strcmpi(ext.c_str(),".zip")==0) + { + // some N4950 defect makes it impossible + //const auto archPath = system::path(relPath.begin(),it); + const auto archPath = std::accumulate(relPath.begin(),it,system::path(),[](const system::path& lhs, const system::path& rhs)->system::path + { + return lhs/rhs; + } + ); + auto archive = system->openFileArchive(archPath); + archiveStack.push(archive.get()); + system->mount(std::move(archive)); + } + } + + const auto relPathStr = relPath.string(); + auto asset = assMan->getAsset(relPathStr,params); + if (asset.getContents().empty()) { m_params.logger.log( - "Failed to Load Mitsuba scene from \"%s\" with working directory \"%s\" type is %d", - ILogger::ELL_ERROR,relPath.c_str(),_params.workingDirectory.lexically_normal().string().c_str(),type // TODO: specialize `system::impl::to_string_helper` for IAsset::E_TYPE + "Failed to Load Mitsuba scene from \"%s\" with working directory \"%s\"", + ILogger::ELL_ERROR,relPathStr.c_str(),_params.workingDirectory.lexically_normal().string().c_str() ); return {}; } - m_params.logger.log("Loaded %s",ILogger::ELL_INFO,relPath.c_str()); + m_params.logger.log("Loaded %s",ILogger::ELL_INFO,relPathStr.c_str()); + + // now unmount the archives + for (; !archiveStack.empty(); archiveStack.pop()) + system->unmount(archiveStack.top()); + + const auto type = asset.getAssetType(); + if (type!=IAsset::E_TYPE::ET_SCENE) + { + params.logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); + return {}; + } const auto* const untypedMeta = asset.getMetadata(); if (!untypedMeta || strcmpi(untypedMeta->getLoaderName(),CMitsubaMetadata::LoaderName)!=0) diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index e30b8f7f0..de9c9a5c4 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -73,6 +73,8 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& { } + // fill out the render classes but don't init yet + return core::smart_refctd_ptr(new CScene(std::move(params)),core::dont_grab); } From 3bcb7473fc3f277bd811683a328710a70c92f869 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 30 Dec 2025 11:36:00 +0100 Subject: [PATCH 137/219] print GitInfo in ex 40, flesh out the loaded Sensor struct --- 40_PathTracer/include/io/CSceneLoader.h | 75 +++++++++++++++++++++++++ 40_PathTracer/main.cpp | 38 +++++++++++++ 40_PathTracer/src/io/CSceneLoader.cpp | 11 +++- 3 files changed, 121 insertions(+), 3 deletions(-) diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index d2cccf8c7..f33babb42 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -6,6 +6,7 @@ #include "nabla.h" +#include "nbl/builtin/hlsl/cpp_compat/promote.hlsl" #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" @@ -36,6 +37,80 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov { struct SSensor { + using type_e = ext::MitsubaLoader::CElementSensor::Type; + + struct SConstants + { + struct DenoiserArgs + { + // where the FFT bloom kernel is + system::path bloomFilePath = {}; + float bloomScale = 0.0f; + float bloomIntensity = 0.0f; + std::string tonemapperArgs = ""; + }; + + constexpr static inline uint32_t MaxWidth = 0x1u<<(sizeof(uint16_t)*8-2); + constexpr static inline uint32_t MaxHeight = MaxWidth; + constexpr static inline uint32_t MaxCascadeCount = 15; + + system::path outputFilePath = {}; + DenoiserArgs denoiserInfo = {}; + // + uint32_t width = 0u; + uint32_t height = 0u; + // do we need to keep the crops? + int32_t cropWidth = 0u; + int32_t cropHeight = 0u; + // could the offsets be dynamic ? + int32_t cropOffsetX = 0u; + int32_t cropOffsetY = 0u; + // + type_e type = type_e::INVALID; + // + uint8_t rightHandedCamera : 1 = true; + uint8_t cascadeCount : 4 = 1; + } constants = {}; + // these could theoretically change without recreating session resources + struct SMutable + { + constexpr static inline uint8_t MaxClipPlanes = 6; + + inline uint8_t getClipPlaneCount() + { + using namespace nbl::hlsl; + for (uint8_t i=0; i(0.f); + const auto& rhs = clipPlanes[i].xyz; + if (any(glsl::notEqual(lhs,rhs))) + continue; + return i; + } + return MaxClipPlanes; + } + + // + std::array clipPlanes = {}; + float cascadeLuminanceBase = core::nan(); + float cascadeLuminanceStart = core::nan(); + } mutableDefaults = {}; + // these can change without having to reset accumulations, etc. + struct SDynamic + { + constexpr static inline float DefaultRotateSpeed = 300.0f; + constexpr static inline float DefaultZoomSpeed = 1.0f; + constexpr static inline float DefaultMoveSpeed = 100.0f; + constexpr static inline float DefaultSceneDiagonal = 50.0f; // reference for default zoom and move speed; + + uint32_t samplesNeeded = 0u; + float moveSpeed = core::nan(); + float stepZoomSpeed = core::nan(); + float rotateSpeed = core::nan(); + float kappa = 0.f; + float Emin = 0.05f; + } dynamicDefaults = {}; + }; inline operator bool() const diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index d02272774..dfc57b8e1 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -9,6 +9,8 @@ #include "renderer/CRenderer.h" +#include "nlohmann/json.hpp" + // TODO remove #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -27,6 +29,40 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes using device_base_t = SimpleWindowedApplication; using asset_base_t = BuiltinResourcesApplication; + // TODO: move to Nabla proper + static inline void jsonizeGitInfo(nlohmann::json& target, const gtml::GitInfo& info) + { + target["isPopulated"] = info.isPopulated; + if (info.hasUncommittedChanges.has_value()) + target["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); + else + target["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; + + target["commitAuthorName"] = info.commitAuthorName; + target["commitAuthorEmail"] = info.commitAuthorEmail; + target["commitHash"] = info.commitHash; + target["commitShortHash"] = info.commitShortHash; + target["commitDate"] = info.commitDate; + target["commitSubject"] = info.commitSubject; + target["commitBody"] = info.commitBody; + target["describe"] = info.describe; + target["branchName"] = info.branchName; + target["latestTag"] = info.latestTag; + target["latestTagName"] = info.latestTagName; + } + + inline void printGitInfos() const + { + nlohmann::json j; + + auto& modules = j["modules"]; + jsonizeGitInfo(modules["nabla"],gtml::nabla_git_info); + jsonizeGitInfo(modules["dxc"],gtml::dxc_git_info); + + m_logger->log("Build Info:\n%s",ILogger::ELL_INFO,j.dump(4).c_str()); + } + + // TODO: remove constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; // TODO: remove constexpr static inline uint32_t MaxFramesInFlight = 3u; constexpr static inline uint8_t MaxUITextureCount = 1u; // TODO: remove @@ -103,6 +139,8 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; + + printGitInfos(); // TODO: move new members smart_refctd_ptr m_sceneLoader; diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index f4177fb75..8189de6ca 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -88,14 +88,14 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult const auto type = asset.getAssetType(); if (type!=IAsset::E_TYPE::ET_SCENE) { - params.logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); + m_params.logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); return {}; } const auto* const untypedMeta = asset.getMetadata(); if (!untypedMeta || strcmpi(untypedMeta->getLoaderName(),CMitsubaMetadata::LoaderName)!=0) { - params.logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); + m_params.logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); return {}; } const auto* const meta = static_cast(untypedMeta); @@ -105,17 +105,22 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult auto& _sensors = meta->m_global.m_sensors; if (_sensors.empty()) { - params.logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); + m_params.logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); return {}; } else { sensors.resize(_sensors.size()); + m_params.logger.log("Total number of Sensors = %d",ILogger::ELL_INFO,sensors.size()); //for () // TODO: load the stuff } // TODO: any CPU-side touch-ups we need to do, like Material IR options + + // empty out the cache from individual images and meshes taht are not used by the scene + assMan->clearAllAssetCache(); + // return return { .scene = IAsset::castDown(asset.getContents()[0]), .sensors = std::move(sensors) From 086af9e6590119bd394f2622db80ab0054445502 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 31 Dec 2025 14:05:14 +0300 Subject: [PATCH 138/219] Sample and visualize samples on the OBB, - Correct manipulation of OBB using `ImGuizmo::ViewManipulate()` - More visualizations of cube faces and 2D Primary Sample Space --- .../app_resources/hlsl/Drawing.hlsl | 207 ++++++++++- .../app_resources/hlsl/Sampling.hlsl | 247 +++++++++++++ .../hlsl/SolidAngleVis.frag.hlsl | 333 +++++++++--------- .../app_resources/hlsl/common.hlsl | 18 +- 73_SolidAngleVisualizer/include/transform.hpp | 34 +- 73_SolidAngleVisualizer/main.cpp | 29 +- 6 files changed, 675 insertions(+), 193 deletions(-) create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index f3f1b4e96..89dfd4ae6 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -40,13 +40,24 @@ float drawGreatCircleArc(float3 fragPos, float3 points[2], float aaWidth, float return alpha; } +float drawCross2D(float2 fragPos, float2 center, float size, float thickness) +{ + float2 p = abs(fragPos - center); + + // Check if point is inside the cross (horizontal or vertical bar) + bool inHorizontal = (p.x <= size && p.y <= thickness); + bool inVertical = (p.y <= size && p.x <= thickness); + + return (inHorizontal || inVertical) ? 1.0f : 0.0f; +} + float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) { float4 color = 0; float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); NBL_UNROLL - for (int i = 0; i < 12; i++) + for (int32_t i = 0; i < 12; i++) { // skip silhouette edges if (silEdgeMask & (1u << i)) @@ -85,14 +96,14 @@ float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) return color; } -float4 drawCorners(float3 spherePos, float2 p, float aaWidth) +float4 drawCorners(float2 p, float aaWidth) { float4 color = 0; float dotSize = 0.02f; float innerDotSize = dotSize * 0.5f; - for (int i = 0; i < 8; i++) + for (int32_t i = 0; i < 8; i++) { float3 corner3D = normalize(getVertex(i)); float2 cornerPos = sphereToCircle(corner3D); @@ -130,6 +141,34 @@ float4 drawCorners(float3 spherePos, float2 p, float aaWidth) return color; } +float4 drawClippedSilhouetteVertices(float2 p, ClippedSilhouette silhouette, float aaWidth) +{ + float4 color = 0; + float dotSize = 0.03f; + + for (uint i = 0; i < silhouette.count; i++) + { + float3 corner3D = normalize(silhouette.vertices[i]); + float2 cornerPos = sphereToCircle(corner3D); + + float dist = length(p - cornerPos); + + // Smooth circle for the vertex + float alpha = 1.0f - smoothstep(dotSize * 0.8f, dotSize, dist); + + if (alpha > 0.0f) + { + // Color gradient: Red (index 0) to Cyan (last index) + // This helps verify the CCW winding order visually + float t = float(i) / float(max(1u, silhouette.count - 1)); + float3 vertexColor = lerp(float3(1, 0, 0), float3(0, 1, 1), t); + + color += float4(vertexColor * alpha, alpha); + } + } + return color; +} + float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); @@ -139,6 +178,59 @@ float4 drawRing(float2 p, float aaWidth) return ringAlpha * float4(1, 1, 1, 1); } +// Returns the number of visible faces and populates the faceIndices array +uint getVisibleFaces(int3 region, out uint faceIndices[3]) +{ + uint count = 0; + + // Check X axis + if (region.x == 0) + faceIndices[count++] = 3; // X+ + else if (region.x == 2) + faceIndices[count++] = 2; // X- + + // Check Y axis + if (region.y == 0) + faceIndices[count++] = 5; // Y+ + else if (region.y == 2) + faceIndices[count++] = 4; // Y- + + // Check Z axis + if (region.z == 0) + faceIndices[count++] = 1; // Z+ + else if (region.z == 2) + faceIndices[count++] = 0; // Z- + + return count; +} + +float4 drawVisibleFaceOverlay(float3 spherePos, int3 region, float aaWidth) +{ + uint faceIndices[3]; + uint count = getVisibleFaces(region, faceIndices); + float4 color = 0; + + for (uint i = 0; i < count; i++) + { + uint fIdx = faceIndices[i]; + float3 n = localNormals[fIdx]; + + // Transform normal to world space (using the same logic as your corners) + float3 worldNormal = -normalize(mul((float3x3)pc.modelMatrix, n)); + worldNormal.z = -worldNormal.z; // Invert Z for correct orientation + + // Very basic visualization: highlight if the sphere position + // is generally pointing towards that face's normal + float alignment = dot(spherePos, worldNormal); + if (alignment > 0.95f) + { + // Use different colors for different face indices + color += float4(colorLUT[fIdx % 24], 0.5f); + } + } + return color; +} + // Check if a face on the hemisphere is visible from camera at origin bool isFaceVisible(float3 faceCenter, float3 faceNormal) { @@ -146,8 +238,109 @@ bool isFaceVisible(float3 faceCenter, float3 faceNormal) return dot(faceNormal, viewVec) > 0.0f; } -int getEdgeVisibility(int edgeIdx) +float4 drawFaces(float3 spherePos, float aaWidth) +{ + float4 color = 0.0f; + float3 p = normalize(spherePos); + + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + + // Check each of the 6 faces + for (int32_t faceIdx = 0; faceIdx < 6; faceIdx++) + { + float3 n_world = mul(rotMatrix, localNormals[faceIdx]); + + // Check if face is visible + if (!isFaceVisible(faceCenters[faceIdx], n_world)) + continue; + + // Get the 4 corners of this face + float3 faceVerts[4]; + for (int32_t i = 0; i < 4; i++) + { + int32_t cornerIdx = faceToCorners[faceIdx][i]; + faceVerts[i] = normalize(getVertex(cornerIdx)); + } + + // Compute face center for winding + float3 faceCenter = float3(0, 0, 0); + for (int32_t i = 0; i < 4; i++) + faceCenter += faceVerts[i]; + faceCenter = normalize(faceCenter); + + // Check if point is inside this face + bool isInside = true; + float minDist = 1e10; + + for (int32_t i = 0; i < 4; i++) + { + float3 v0 = faceVerts[i]; + float3 v1 = faceVerts[(i + 1) % 4]; + + // Skip edges behind camera + if (v0.z < 0.0f && v1.z < 0.0f) + { + isInside = false; + break; + } + + // Great circle normal + float3 edgeNormal = normalize(cross(v0, v1)); + + // Ensure normal points inward + if (dot(edgeNormal, faceCenter) < 0.0f) + edgeNormal = -edgeNormal; + + float d = dot(p, edgeNormal); + + if (d < -1e-6f) + { + isInside = false; + break; + } + + minDist = min(minDist, abs(d)); + } + + if (isInside) + { + float alpha = smoothstep(0.0f, aaWidth * 2.0f, minDist); + + // Use colorLUT based on face index (0-5) + float3 faceColor = colorLUT[faceIdx]; + + float shading = saturate(p.z * 0.8f + 0.2f); + color += float4(faceColor * shading * alpha, alpha); + } + } + + return color; +} + +int32_t getEdgeVisibility(int32_t edgeIdx) { + + // Adjacency of edges to faces + // Corrected Adjacency of edges to faces + static const int2 edgeToFaces[12] = { + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) + }; + int2 faces = edgeToFaces[edgeIdx]; // Transform normals to world space @@ -175,7 +368,7 @@ uint32_t computeGroundTruthEdgeMask() { uint32_t mask = 0u; NBL_UNROLL - for (int j = 0; j < 12; j++) + for (int32_t j = 0; j < 12; j++) { // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry if (getEdgeVisibility(j) == 1) @@ -186,7 +379,7 @@ uint32_t computeGroundTruthEdgeMask() return mask; } -void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSilMask) +void validateEdgeVisibility(uint32_t sil, int32_t vertexCount, uint32_t generatedSilMask) { uint32_t mismatchAccumulator = 0; @@ -199,7 +392,7 @@ void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSil if (mismatchMask != 0) { NBL_UNROLL - for (int j = 0; j < 12; j++) + for (int32_t j = 0; j < 12; j++) { if ((mismatchMask >> j) & 1u) { diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl new file mode 100644 index 000000000..d213d8b94 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl @@ -0,0 +1,247 @@ +#ifndef _SAMPLING_HLSL_ +#define _SAMPLING_HLSL_ + +// Include the spherical triangle utilities +#include +#include +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +using namespace nbl::hlsl; +// Sampling mode enum +#define SAMPLING_MODE_SOLID_ANGLE 0 +#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 + +// Maximum number of triangles we can have after clipping +// Without clipping, max 3 faces can be visible at once +// With clipping, can be more. 7 - 2 = 5 max triangles because fanning from one vertex +#define MAX_TRIANGLES 5 + +struct SamplingData +{ + float32_t triangleWeights[MAX_TRIANGLES]; + uint32_t triangleIndices[MAX_TRIANGLES]; // Store the 'i' value for each valid triangle + uint32_t count; + float32_t totalWeight; +}; + +float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) +{ + return float32_t2( + float32_t(rnd()) * 2.3283064365386963e-10, + float32_t(rnd()) * 2.3283064365386963e-10); +} + +float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float32_t3 v2, float32_t3 N) +{ + // 1. Get edge normals (unit vectors) + // We use the cross product of the vertices (unit vectors on sphere) + float32_t3 n0 = cross(v0, v1); + float32_t3 n1 = cross(v1, v2); + float32_t3 n2 = cross(v2, v0); + + // 2. Normalize edge normals (magnitude is sin of the arc length) + float32_t l0 = length(n0); + float32_t l1 = length(n1); + float32_t l2 = length(n2); + + // Guard against degenerate triangles + if (l0 < 1e-7 || l1 < 1e-7 || l2 < 1e-7) + return 0.0f; + + n0 /= l0; + n1 /= l1; + n2 /= l2; + + // 3. Get arc lengths (angles in radians) + float32_t a = asin(clamp(l0, -1.0, 1.0)); // side v0-v1 + float32_t b = asin(clamp(l1, -1.0, 1.0)); // side v1-v2 + float32_t c = asin(clamp(l2, -1.0, 1.0)); // side v2-v0 + + // Handle acos/asin quadrant if dot product is negative + if (dot(v0, v1) < 0) + a = 3.14159265 - a; + if (dot(v1, v2) < 0) + b = 3.14159265 - b; + if (dot(v2, v0) < 0) + c = 3.14159265 - c; + + // 4. Compute projected solid angle + float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); + + // Return the absolute value of the total (to handle CW/CCW triangles) + return abs(Gamma); +} + +// Build sampling data - store weights and vertex indices +SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, int32_t samplingMode) +{ + SamplingData data; + data.count = 0; + data.totalWeight = 0; + + if (silhouette.count < 3) + return data; + + float32_t3 v0 = silhouette.vertices[0]; + float32_t3 origin = float32_t3(0, 0, 0); + + for (uint32_t i = 1; i < silhouette.count - 1; i++) + { + float32_t3 v1 = silhouette.vertices[i]; + float32_t3 v2 = silhouette.vertices[i + 1]; + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + if (shapeTri.pyramidAngles()) + continue; + + float32_t weight; + if (samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + { + float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); // TODO: precompute? + weight = computeProjectedSolidAngleFallback(normalize(v0), normalize(v1), normalize(v2), faceNormal); + } + else + { + weight = shapeTri.solidAngleOfTriangle(); + } + + if (weight <= 0.0f) + continue; + + data.triangleWeights[data.count] = weight; + data.triangleIndices[data.count] = i; // Store the original vertex index, we need to account for skipped degenerate triangles. + data.totalWeight += weight; + data.count++; + } + +#ifdef DEBUG_DATA + // Assert no edge has both vertices antipodal (lune case) + for (uint32_t i = 0; i < silhouette.count; i++) + { + uint32_t j = (i + 1) % silhouette.count; + float32_t3 n1 = normalize(silhouette.vertices[i]); + float32_t3 n2 = normalize(silhouette.vertices[j]); + + // Check if vertices are antipodal + bool antipodal = dot(n1, n2) < -0.99f; + + assert(false && "Spherical lune detected: antipodal silhouette edge"); + } +#endif + + DebugDataBuffer[0].maxTrianglesExcceded = data.count > MAX_TRIANGLES; + return data; +} + +float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) +{ + if (data.count == 0 || data.totalWeight <= 0.0f) + { + pdf = 0; + selectedIdx = 0; + return float32_t3(0, 0, 1); + } + + // Select triangle using uniform random sampling weighted by importance + float32_t toFind = xi.x * data.totalWeight; + uint32_t triIdx = 0; + float32_t cumulativeWeight = 0.0f; + float32_t prevCumulativeWeight = 0.0f; + + NBL_UNROLL + for (uint32_t i = 0; i < data.count; i++) + { + prevCumulativeWeight = cumulativeWeight; + cumulativeWeight += data.triangleWeights[i]; + if (toFind <= cumulativeWeight) + { + triIdx = i; + break; + } + } + + selectedIdx = triIdx; + + // Remap xi.x to [0,1] within the selected triangle's weight range + float32_t triMin = prevCumulativeWeight; + float32_t triMax = cumulativeWeight; + float32_t triWeight = triMax - triMin; + float32_t u = (toFind - triMin) / max(triWeight, 1e-7f); + + // Reconstruct the triangle using the stored vertex index + uint32_t vertexIdx = data.triangleIndices[triIdx]; // We need to account for skipped degenerate triangles. + float32_t3 v0 = silhouette.vertices[0]; + float32_t3 v1 = silhouette.vertices[vertexIdx]; + float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; + float32_t3 origin = float32_t3(0, 0, 0); + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + sampling::SphericalTriangle samplingTri = sampling::SphericalTriangle::create(shapeTri); + + // Sample from the selected triangle using remapped u and original xi.y + float32_t rcpPdf; + float32_t3 direction = samplingTri.generate(rcpPdf, float32_t2(u, xi.y)); + + float32_t trianglePdf = 1.0f / rcpPdf; + pdf = trianglePdf * (data.triangleWeights[triIdx] / data.totalWeight); + + return normalize(direction); +} + +float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSilhouette silhouette, + int32_t samplingMode, SamplingData samplingData, int32_t numSamples) +{ + float32_t4 accumColor = 0; + + if (samplingData.count == 0) + return 0; + + float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen + float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner + bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); + + for (int32_t i = 0; i < numSamples; i++) + { + nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(pc.frameIndex * 65536u + i); + const uint32_t seed1 = seedGen(); + const uint32_t seed2 = seedGen(); + nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(seed1, seed2)); + float32_t2 xi = nextRandomUnorm2(rnd); + + float32_t pdf; + uint32_t triIdx; + float32_t3 sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + + float32_t dist3D = distance(sampleDir, normalize(spherePos)); + float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); + + if (alpha3D > 0.0f && !isInsidePSS) + { + float32_t3 sampleColor = colorLUT[triIdx].rgb; + accumColor += float32_t4(sampleColor * alpha3D, alpha3D); + } + + if (isInsidePSS) + { + // Map the raw xi to the PSS square dimensions + float32_t2 xiPixelPos = pssPos + xi * pssSize; + float32_t dist2D = distance(screenUV, xiPixelPos); + + float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); + if (alpha2D > 0.0f) + { + float32_t3 sampleColor = colorLUT[triIdx].rgb; + accumColor += float32_t4(sampleColor * alpha2D, alpha2D); + } + } + } + + // just the outline of the PSS + if (isInsidePSS && accumColor.a < 0.1) + accumColor = float32_t4(0.1, 0.1, 0.1, 1.0); + + return accumColor; +} +#endif diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 8cc46bd25..31cbe577a 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -13,12 +13,17 @@ using namespace ext::FullScreenTriangle; static const float CIRCLE_RADIUS = 0.5f; // --- Geometry Utils --- +struct ClippedSilhouette +{ + float32_t3 vertices[7]; + uint32_t count; +}; -static const float3 constCorners[8] = { - float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), - float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1)}; +static const float32_t3 constCorners[8] = { + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1)}; -static const int2 allEdges[12] = { +static const int32_t2 allEdges[12] = { {0, 1}, {2, 3}, {4, 5}, @@ -33,43 +38,33 @@ static const int2 allEdges[12] = { {3, 7}, // Z axis }; -// Adjacency of edges to faces -// Corrected Adjacency of edges to faces -static const int2 edgeToFaces[12] = { - // Edge Index: | allEdges[i] | Shared Faces: - - /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) - /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) - /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) - /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) - - /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) - /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) - /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) - /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) - - /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) - /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) - /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) - /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) +// Maps face index (0-5) to its 4 corner indices in CCW order +static const uint32_t faceToCorners[6][4] = { + {0, 2, 3, 1}, // Face 0: Z- + {4, 5, 7, 6}, // Face 1: Z+ + {0, 4, 6, 2}, // Face 2: X- + {1, 3, 7, 5}, // Face 3: X+ + {0, 1, 5, 4}, // Face 4: Y- + {2, 6, 7, 3} // Face 5: Y+ }; -static float3 corners[8]; -static float3 faceCenters[6] = { - float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), - float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0)}; - -static const float3 localNormals[6] = { - float3(0, 0, -1), // Face 0 (Z-) - float3(0, 0, 1), // Face 1 (Z+) - float3(-1, 0, 0), // Face 2 (X-) - float3(1, 0, 0), // Face 3 (X+) - float3(0, -1, 0), // Face 4 (Y-) - float3(0, 1, 0) // Face 5 (Y+) + +static float32_t3 corners[8]; +static float32_t3 faceCenters[6] = { + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0), + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0)}; + +static const float32_t3 localNormals[6] = { + float32_t3(0, 0, -1), // Face 0 (Z-) + float32_t3(0, 0, 1), // Face 1 (Z+) + float32_t3(-1, 0, 0), // Face 2 (X-) + float32_t3(1, 0, 0), // Face 3 (X+) + float32_t3(0, -1, 0), // Face 4 (Y-) + float32_t3(0, 1, 0) // Face 5 (Y+) }; // TODO: unused, remove later // Vertices are ordered CCW relative to the camera view. -static const int silhouettes[27][7] = { +static const int32_t silhouettes[27][7] = { {6, 1, 3, 2, 6, 4, 5}, // 0: Black {6, 2, 6, 4, 5, 7, 3}, // 1: White {6, 0, 4, 5, 7, 3, 2}, // 2: Gray @@ -130,22 +125,22 @@ static const uint32_t binSilhouettes[27] = { 0b11000000000000011010110100101001, }; -int getSilhouetteVertex(uint32_t packedSil, int index) +int32_t getSilhouetteVertex(uint32_t packedSil, int32_t index) { return (packedSil >> (3 * index)) & 0x7; } // Get silhouette size -int getSilhouetteSize(uint32_t sil) +int32_t getSilhouetteSize(uint32_t sil) { return (sil >> 29) & 0x7; } // Check if vertex has negative z -bool getVertexZNeg(int vertexIdx) +bool getVertexZNeg(int32_t vertexIdx) { #if FAST - float3 localPos = float3( + float32_t3 localPos = float32_t3( (vertexIdx & 1) ? 1.0f : -1.0f, (vertexIdx & 2) ? 1.0f : -1.0f, (vertexIdx & 4) ? 1.0f : -1.0f); @@ -157,7 +152,8 @@ bool getVertexZNeg(int vertexIdx) #endif } -float3 getVertex(int vertexIdx) +// Get world position of cube vertex +float32_t3 getVertex(int32_t vertexIdx) { #if FAST // Reconstruct local cube corner from index bits @@ -165,7 +161,7 @@ float3 getVertex(int vertexIdx) float sy = (vertexIdx & 2) ? 1.0f : -1.0f; float sz = (vertexIdx & 4) ? 1.0f : -1.0f; - float4x3 model = transpose(pc.modelMatrix); + float32_t4x3 model = transpose(pc.modelMatrix); // Transform to world // Full position, not just Z like getVertexZNeg @@ -173,21 +169,22 @@ float3 getVertex(int vertexIdx) model[1].xyz * sy + model[2].xyz * sz + model[3].xyz; - // return mul(pc.modelMatrix, float4(sx, sy, sz, 1.0f)); + // return mul(pc.modelMatrix, float32_t4(sx, sy, sz, 1.0f)); #else return corners[vertexIdx]; #endif } #include "Drawing.hlsl" +#include "Sampling.hlsl" -void setDebugData(uint32_t sil, int3 region, int configIndex) +void setDebugData(uint32_t sil, int32_t3 region, int32_t configIndex) { #if DEBUG_DATA - DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); - DebugDataBuffer[0].region = uint3(region); + DebugDataBuffer[0].region = uint32_t3(region); DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); - for (int i = 0; i < 6; i++) + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + for (int32_t i = 0; i < 6; i++) { DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); } @@ -195,29 +192,29 @@ void setDebugData(uint32_t sil, int3 region, int configIndex) #endif } -float2 toCircleSpace(float2 uv) +float32_t2 toCircleSpace(float32_t2 uv) { - float2 p = uv * 2.0f - 1.0f; + float32_t2 p = uv * 2.0f - 1.0f; float aspect = pc.viewport.z / pc.viewport.w; p.x *= aspect; return p; } -uint32_t packSilhouette(const int s[7]) +uint32_t packSilhouette(const int32_t s[7]) { uint32_t packed = 0; - int size = s[0] & 0x7; // 3 bits for size + int32_t size = s[0] & 0x7; // 3 bits for size // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) - for (int i = 1; i <= 6; ++i) + for (int32_t i = 1; i <= 6; ++i) { - int v = s[i]; + int32_t v = s[i]; if (v < 0) v = 0; // replace unused vertices with 0 packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) } - // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) + // Put size in the MSB (bits 29-31 for a 32-bit uint32_t, leaving 29 bits for vertices) packed |= (size & 0x7) << 29; return packed; @@ -225,211 +222,201 @@ uint32_t packSilhouette(const int s[7]) void computeCubeGeo() { - for (int i = 0; i < 8; i++) + for (int32_t i = 0; i < 8; i++) + corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; + + for (int32_t f = 0; f < 6; f++) { - float3 localPos = constCorners[i]; - float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - corners[i] = worldPos.xyz; - faceCenters[i / 4] += worldPos / 4.0f; - faceCenters[2 + i % 2] += worldPos / 4.0f; - faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; + faceCenters[f] = float32_t3(0, 0, 0); + for (int32_t v = 0; v < 4; v++) + faceCenters[f] += corners[faceToCorners[f][v]]; + faceCenters[f] /= 4.0f; } } // Helper to draw an edge with proper color mapping -float4 drawEdge(int originalEdgeIdx, float3 pts[2], float3 spherePos, float aaWidth, float width = 0.01f) +float32_t4 drawEdge(int32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float aaWidth, float width = 0.01f) { - float4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); - return float4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); + float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); }; -float4 drawSilhouette(uint32_t vertexCount, uint32_t sil, float3 spherePos, float aaWidth) +float32_t4 computeSilhouette(uint32_t vertexCount, uint32_t sil, float32_t3 spherePos, float aaWidth, out ClippedSilhouette silhouette) { - float4 color = 0; + float32_t4 color = float32_t4(0, 0, 0, 0); + silhouette.count = 0; // Build clip mask (z < 0) - int clipMask = 0u; + int32_t clipMask = 0u; NBL_UNROLL - for (int i = 0; i < 4; i++) + for (int32_t i = 0; i < 4; i++) clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; if (vertexCount == 6) { NBL_UNROLL - for (int i = 4; i < 6; i++) + for (int32_t i = 4; i < 6; i++) clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; } - int clipCount = countbits(clipMask); + int32_t clipCount = countbits(clipMask); +#if 0 // Early exit if fully clipped - // if (clipCount == vertexCount) - // return color; + if (clipCount == vertexCount) + return color; // No clipping needed - fast path - // if (clipCount == 0) - // { - // for (int i = 0; i < vertexCount; i++) - // { - // int i0 = i; - // int i1 = (i + 1) % vertexCount; - - // float3 v0 = getVertex(getSilhouetteVertex(sil, i0)); - // float3 v1 = getVertex(getSilhouetteVertex(sil, i1)); - // float3 pts[2] = {v0, v1}; - - // color += drawEdge(i1, pts, spherePos, aaWidth); - // } - // return color; - // } + if (clipCount == 0) + { + for (int32_t i = 0; i < vertexCount; i++) + { + int32_t i0 = i; + int32_t i1 = (i + 1) % vertexCount; + + float32_t3 v0 = getVertex(getSilhouetteVertex(sil, i0)); + float32_t3 v1 = getVertex(getSilhouetteVertex(sil, i1)); + float32_t3 pts[2] = {v0, v1}; + + color += drawEdge(i1, pts, spherePos, aaWidth); + } + return color; + } +#endif // Rotate clip mask so positives come first uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); - int rotateAmount = wrapAround - ? firstbitlow(invertedMask) // -> First POSITIVE - : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative, + int32_t rotateAmount = wrapAround + ? firstbitlow(invertedMask) // -> First POSITIVE + : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); - int positiveCount = vertexCount - clipCount; + int32_t positiveCount = vertexCount - clipCount; // ALWAYS compute both clip points - int lastPosIdx = positiveCount - 1; - int firstNegIdx = positiveCount; - float3 vLastPos = getVertex(getSilhouetteVertex(rotatedSil, lastPosIdx)); - float3 vFirstNeg = getVertex(getSilhouetteVertex(rotatedSil, firstNegIdx)); + int32_t lastPosIdx = positiveCount - 1; + int32_t firstNegIdx = positiveCount; + float32_t3 vLastPos = getVertex(getSilhouetteVertex(rotatedSil, lastPosIdx)); + float32_t3 vFirstNeg = getVertex(getSilhouetteVertex(rotatedSil, firstNegIdx)); float t = vLastPos.z / (vLastPos.z - vFirstNeg.z); - float3 clipA = lerp(vLastPos, vFirstNeg, t); + float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); - float3 vLastNeg = getVertex(getSilhouetteVertex(rotatedSil, vertexCount - 1)); - float3 vFirstPos = getVertex(getSilhouetteVertex(rotatedSil, 0)); + float32_t3 vLastNeg = getVertex(getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float32_t3 vFirstPos = getVertex(getSilhouetteVertex(rotatedSil, 0)); t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); - float3 clipB = lerp(vLastNeg, vFirstPos, t); + float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); // Draw positive edges NBL_UNROLL - for (int i = 0; i < positiveCount; i++) + for (int32_t i = 0; i < positiveCount; i++) { - float3 v0 = getVertex(getSilhouetteVertex(rotatedSil, i)); + float32_t3 v0 = getVertex(getSilhouetteVertex(rotatedSil, i)); // ONLY use clipA if we are at the end of the positive run AND there's a clip bool isLastPositive = (i == positiveCount - 1); bool useClipA = (clipCount > 0) && isLastPositive; // If not using clipA, wrap around to the next vertex - float3 v1 = useClipA ? clipA : getVertex(getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); + float32_t3 v1 = useClipA ? clipA : getVertex(getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); - float3 pts[2] = {v0, v1}; + float32_t3 pts[2] = {v0, v1}; color += drawEdge((i + 1) % vertexCount, pts, spherePos, aaWidth); + + silhouette.vertices[silhouette.count++] = v0; } - // NP edge if (clipCount > 0 && clipCount < vertexCount) { - float3 vFirst = getVertex(getSilhouetteVertex(rotatedSil, 0)); - float3 npPts[2] = {clipB, vFirst}; + // NP edge + float32_t3 vFirst = getVertex(getSilhouetteVertex(rotatedSil, 0)); + float32_t3 npPts[2] = {clipB, vFirst}; color += drawEdge(0, npPts, spherePos, aaWidth); - } - // Horizon arc - if (clipCount > 0 && clipCount < vertexCount) - { - float3 arcPts[2] = {clipA, clipB}; + // Horizon arc + float32_t3 arcPts[2] = {clipA, clipB}; color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); + + silhouette.vertices[silhouette.count++] = clipA; + silhouette.vertices[silhouette.count++] = clipB; } #if DEBUG_DATA DebugDataBuffer[0].clipMask = clipMask; DebugDataBuffer[0].clipCount = clipCount; - { - int transitions = 0; - for (int i = 0; i < vertexCount; i++) - { - bool a = (rotatedClipMask >> i) & 1u; - bool b = (rotatedClipMask >> ((i + 1) % vertexCount)) & 1u; - if (a != b) - transitions++; - } - // transitions must be 0 or 2 - DebugDataBuffer[0].MoreThanTwoBitTransitions = transitions > 2; - DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; - DebugDataBuffer[0].rotateAmount = rotateAmount; - DebugDataBuffer[0].positiveVertCount = positiveCount; - DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; - DebugDataBuffer[0].rotatedSil = rotatedSil; - } + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; + #endif return color; } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float4 color = float4(0, 0, 0, 0); - for (int i = 0; i < 1; i++) + float32_t4 color = float32_t4(0, 0, 0, 0); + for (int32_t i = 0; i < 1; i++) { + float aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + float32_t2 p = toCircleSpace(vx.uv); - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - float2 p = toCircleSpace(vx.uv); - - float2 normalized = p / CIRCLE_RADIUS; + float32_t2 normalized = p / CIRCLE_RADIUS; float r2 = dot(normalized, normalized); - float3 spherePos; + float32_t3 spherePos; if (r2 <= 1.0f) { - spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); + spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); } else { float uv2Plus1 = r2 + 1.0f; - spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; } spherePos = normalize(spherePos); computeCubeGeo(); - float4x3 columnModel = transpose(pc.modelMatrix); - - float3 obbCenter = columnModel[3].xyz; - - float3x3 upper3x3 = (float3x3)columnModel; - - float3 rcpSqScales = rcp(float3( + float32_t4x3 columnModel = transpose(pc.modelMatrix); + float32_t3 obbCenter = columnModel[3].xyz; + float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + float32_t3 rcpSqScales = rcp(float32_t3( dot(upper3x3[0], upper3x3[0]), dot(upper3x3[1], upper3x3[1]), dot(upper3x3[2], upper3x3[2]))); + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - float3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - - int3 region = int3( + int32_t3 region = int32_t3( normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1)); - int configIndex = region.x + region.y * 3 + region.z * 9; + int32_t configIndex = region.x + region.y * 3 + region.z * 9; // uint32_t sil = packSilhouette(silhouettes[configIndex]); uint32_t sil = binSilhouettes[configIndex]; - int vertexCount = getSilhouetteSize(sil); - uint32_t silEdgeMask = 0; + int32_t vertexCount = getSilhouetteSize(sil); + uint32_t silEdgeMask = 0; // TODO: take from 'fast' computeSilhouette() #if DEBUG_DATA { - for (int i = 0; i < vertexCount; i++) + for (int32_t i = 0; i < vertexCount; i++) { - int vIdx = i % vertexCount; - int v1Idx = (i + 1) % vertexCount; + int32_t vIdx = i % vertexCount; + int32_t v1Idx = (i + 1) % vertexCount; - int v0Corner = getSilhouetteVertex(sil, vIdx); - int v1Corner = getSilhouetteVertex(sil, v1Idx); + int32_t v0Corner = getSilhouetteVertex(sil, vIdx); + int32_t v1Corner = getSilhouetteVertex(sil, v1Idx); // Mark edge as part of silhouette - for (int e = 0; e < 12; e++) + for (int32_t e = 0; e < 12; e++) { - int2 edge = allEdges[e]; + int32_t2 edge = allEdges[e]; if ((edge.x == v0Corner && edge.y == v1Corner) || (edge.x == v1Corner && edge.y == v0Corner)) { @@ -442,16 +429,36 @@ float4 drawSilhouette(uint32_t vertexCount, uint32_t sil, float3 spherePos, floa #endif uint32_t positiveCount = 0; - color += drawSilhouette(vertexCount, sil, spherePos, aaWidth); - setDebugData(sil, region, configIndex); - color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); - color += drawCorners(spherePos, p, aaWidth); + ClippedSilhouette silhouette; + color += computeSilhouette(vertexCount, sil, spherePos, aaWidth, silhouette); + // Draw clipped silhouette vertices + // color += drawClippedSilhouetteVertices(p, silhouette, aaWidth); + + SamplingData samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); + + uint32_t faceIndices[3]; + uint32_t visibleFaceCount = getVisibleFaces(region, faceIndices); + + // For debugging: Draw a small indicator of which faces are found + // color += drawVisibleFaceOverlay(spherePos, region, aaWidth); + + // color += drawFaces(spherePos, aaWidth); + + // Draw samples on sphere + color += visualizeSamples(vx.uv, spherePos, silhouette, pc.samplingMode, samplingData, 64); + + // Or draw 2D sample space (in a separate viewport) + // color += visualizePrimarySampleSpace(vx.uv, pc.samplingMode, 64, aaWidth); + + setDebugData(sil, region, configIndex); + // color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); + color += drawCorners(p, aaWidth); color += drawRing(p, aaWidth); - if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) + if (all(vx.uv >= float32_t2(0.49f, 0.49f)) && all(vx.uv <= float32_t2(0.51f, 0.51f))) { - return float4(colorLUT[configIndex], 1.0f); + return float32_t4(colorLUT[configIndex], 1.0f); } } diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index c8532e796..dd0ab2d99 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -24,9 +24,11 @@ namespace nbl uint32_t clipCount; uint32_t rotatedSil; uint32_t wrapAround; + uint32_t rotatedClipMask; uint32_t rotateAmount; - uint32_t MoreThanTwoBitTransitions; + uint32_t maxTrianglesExcceded; + uint32_t vertices[6]; }; @@ -34,10 +36,15 @@ namespace nbl { float32_t3x4 modelMatrix; float32_t4 viewport; + uint32_t samplingMode; + uint32_t frameIndex; }; + // Sampling mode enum +#define SAMPLING_MODE_SOLID_ANGLE 0 +#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 static const float32_t3 colorLUT[27] = { - float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), + float32_t3(0, 0, 0), float32_t3(0.5, 0.5, 0.5), float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), @@ -45,14 +52,13 @@ namespace nbl float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), - float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1)}; + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1), float32_t3(1, 1, 1)}; #ifndef __HLSL_VERSION - static const char *colorNames[27] = {"Black", - "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + static const char *colorNames[27] = {"Black", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", - "Tan/Beige", "Dark Brown"}; + "Tan/Beige", "Dark Brown", "White"}; #endif // __HLSL_VERSION } } diff --git a/73_SolidAngleVisualizer/include/transform.hpp b/73_SolidAngleVisualizer/include/transform.hpp index 538173223..e1ffcd764 100644 --- a/73_SolidAngleVisualizer/include/transform.hpp +++ b/73_SolidAngleVisualizer/include/transform.hpp @@ -168,18 +168,36 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti // Decompose original matrix nbl::hlsl::float32_t3 translation, rotation, scale; ImGuizmo::DecomposeMatrixToComponents(matrix, &translation.x, &rotation.x, &scale.x); - - float temp[16]; + // Create rotation-only matrix + nbl::hlsl::float32_t4x4 temp; nbl::hlsl::float32_t3 baseTranslation(0.0f); nbl::hlsl::float32_t3 baseScale(1.0f); - ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, temp); - // Manipulate rotation only - ImGuizmo::ViewManipulate(temp, 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, &temp[0][0]); + temp = nbl::hlsl::transpose(temp); - // Extract rotation from manipulated temp - nbl::hlsl::float32_t3 newRot; - ImGuizmo::DecomposeMatrixToComponents(temp, &baseTranslation.x, &newRot.x, &baseScale.x); + // Invert to make it "view-like" + nbl::hlsl::float32_t4x4 tempInv = nbl::hlsl::inverse(temp); + + // Create flip matrix (flip X to fix left/right) + nbl::hlsl::float32_t4x4 flip(1.0f); + flip[0][0] = -1.0f; // Flip X axis + + // Apply flip to the inverted matrix + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + // Manipulate + ImGuizmo::ViewManipulate(&tempInv[0][0], 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + // Undo flip (flip is its own inverse, so multiply by flip again) + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + + // Invert back to model space + temp = nbl::hlsl::inverse(tempInv); + temp = nbl::hlsl::transpose(temp); + + // Extract rotation + nbl::hlsl::float32_t3 newRot; + ImGuizmo::DecomposeMatrixToComponents(&temp[0][0], &baseTranslation.x, &newRot.x, &baseScale.x); // Recompose original matrix with new rotation but keep translation & scale ImGuizmo::RecomposeMatrixFromComponents(&translation.x, &newRot.x, &scale.x, matrix); diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp index 64f4cb100..401ab71b3 100644 --- a/73_SolidAngleVisualizer/main.cpp +++ b/73_SolidAngleVisualizer/main.cpp @@ -420,7 +420,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { PushConstants pc{ .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) } + .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, + .samplingMode = m_samplingMode, + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u }; auto pipeline = m_visualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); @@ -794,6 +796,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + static inline uint32_t m_samplingMode = SAMPLING_MODE_SOLID_ANGLE; + static inline bool m_frameSeeding = true; static inline ResultData m_GPUOutResulData; // smart_refctd_ptr m_scene; @@ -855,13 +859,20 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); ImGui::Begin("Editor"); - //if (ImGui::RadioButton("Full view", !transformParams.useWindow)) - // transformParams.useWindow = false; + ImGui::Text("Sampling Mode: "); + ImGui::SameLine(); + + if (ImGui::RadioButton("Solid Angle", m_samplingMode == 0)) + m_samplingMode = SAMPLING_MODE_SOLID_ANGLE; + + ImGui::SameLine(); + + if (ImGui::RadioButton("Projected Solid Angle", m_samplingMode == 1)) + m_samplingMode = SAMPLING_MODE_PROJECTED_SOLID_ANGLE; - //ImGui::SameLine(); + ImGui::Checkbox("Frame seeding", &m_frameSeeding); - //if (ImGui::RadioButton("Window", transformParams.useWindow)) - // transformParams.useWindow = true; + ImGui::Separator(); ImGui::Text("Camera"); @@ -1108,7 +1119,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); - ImGui::Text("More Than Two Bit Transitions: %s", m_GPUOutResulData.MoreThanTwoBitTransitions ? "true" : "false"); + ImGui::Text("More Than Two Bit Transitions: %s", m_GPUOutResulData.maxTrianglesExcceded ? "true" : "false"); { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1136,12 +1147,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; } - if (!m_GPUOutResulData.edgeVisibilityMismatch || !m_GPUOutResulData.MoreThanTwoBitTransitions) + if (!m_GPUOutResulData.edgeVisibilityMismatch || !m_GPUOutResulData.maxTrianglesExcceded) { // Reset flag when mismatch is cleared modalShown = false; } - if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.MoreThanTwoBitTransitions) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExcceded) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care { // Open modal popup only once per configuration ImGui::OpenPopup("Edge Visibility Mismatch Warning"); From a52cdb681f056bedb6759950981123949b48aec1 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 31 Dec 2025 15:39:05 +0100 Subject: [PATCH 139/219] sensors and sessions --- 22_RaytracedAO/Renderer.h | 13 - 22_RaytracedAO/common.h | 36 --- 22_RaytracedAO/main.cpp | 314 +++------------------ 40_PathTracer/include/io/CSceneLoader.h | 26 +- 40_PathTracer/include/renderer/CRenderer.h | 14 + 5 files changed, 77 insertions(+), 326 deletions(-) diff --git a/22_RaytracedAO/Renderer.h b/22_RaytracedAO/Renderer.h index 5c8e45738..f3f7e43c7 100644 --- a/22_RaytracedAO/Renderer.h +++ b/22_RaytracedAO/Renderer.h @@ -24,20 +24,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac public: #include "rasterizationCommon.h" #include "raytraceCommon.h" - #ifdef __cplusplus - #undef uint - #undef vec4 - #undef mat4 - #undef mat4x3 - #endif - struct DenoiserArgs - { - std::filesystem::path bloomFilePath; - float bloomScale = 0.0f; - float bloomIntensity = 0.0f; - std::string tonemapperArgs = ""; - }; Renderer(nbl::video::IVideoDriver* _driver, nbl::asset::IAssetManager* _assetManager, nbl::scene::ISceneManager* _smgr, bool deferDenoise, bool useDenoiser = true); diff --git a/22_RaytracedAO/common.h b/22_RaytracedAO/common.h index 6e82bcb11..da7528213 100644 --- a/22_RaytracedAO/common.h +++ b/22_RaytracedAO/common.h @@ -2,42 +2,6 @@ #define _COMMON_INCLUDED_ -#define RAYCOUNT_N_BUFFERING_LOG2 2 -#define RAYCOUNT_N_BUFFERING (0x1< readBuffer = std::make_unique(readSize); - readFile.seekg(0, std::ios::beg); - readFile.read(reinterpret_cast(readBuffer.get()), readSize); - if (readFile.rdstate() == std::ios_base::goodbit) - { - uint64_t offset = 0; - - memcpy(&isBeauty, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&isInteractiveMode, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&isInteractiveViewMatrixLH, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&startSensorID, readBuffer.get() + offset, sizeof(uint32_t)); - offset += sizeof(uint32_t); - - memcpy(&processSensorsBehaviour, readBuffer.get() + offset, sizeof(ProcessSensorsBehaviour)); - offset += sizeof(ProcessSensorsBehaviour); - - memcpy(&interactiveCameraViewMatrix, readBuffer.get() + offset, sizeof(core::matrix3x4SIMD)); - offset += sizeof(core::matrix3x4SIMD); - - const char* path = reinterpret_cast(readBuffer.get() + offset); - zipPath = std::string(path); - offset += zipPath.length() + 1; - - path = reinterpret_cast(readBuffer.get() + offset); - xmlPath = std::string(path); - offset += xmlPath.length() + 1; - - readSuccess = (offset == static_cast(readSize)); - } - } - - readFile.close(); - } - - return readSuccess; - } - - bool writeToDisk() const - { - bool writeSuccess = false; - std::ofstream outFile("lastRun.cache", std::ios::out | std::ios::binary); - if (outFile.is_open()) - { - const size_t writeSize = getSerializedMemorySize(); - - std::unique_ptr writeBuffer = std::make_unique(writeSize); - - uint64_t offset = 0; - - memcpy(writeBuffer.get() + offset, &isBeauty, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &isInteractiveMode, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &isInteractiveViewMatrixLH, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &startSensorID, sizeof(uint32_t)); - offset += sizeof(uint32_t); - - memcpy(writeBuffer.get() + offset, &processSensorsBehaviour, sizeof(ProcessSensorsBehaviour)); - offset += sizeof(ProcessSensorsBehaviour); - - memcpy(writeBuffer.get() + offset, &interactiveCameraViewMatrix, sizeof(core::matrix3x4SIMD)); - offset += sizeof(core::matrix3x4SIMD); - - memcpy(writeBuffer.get() + offset, zipPath.c_str(), zipPath.length() + 1); - offset += zipPath.length() + 1; - - memcpy(writeBuffer.get() + offset, xmlPath.c_str(), xmlPath.length() + 1); - offset += xmlPath.length() + 1; - - assert(offset == static_cast(writeSize)); - - outFile.write(reinterpret_cast(writeBuffer.get()), writeSize); - if (outFile.rdstate() == std::ios_base::goodbit) - writeSuccess = true; - - outFile.close(); - } - - if (!writeSuccess) - printf("[ERROR]: Failed to write the persistent state cache.\n"); - - return writeSuccess; - } - -private: - inline size_t getSerializedMemorySize() const - { - const size_t result = - sizeof(bool) + // isBeauty - sizeof(bool) + // isInteractiveMode - sizeof(bool) + // isInteractiveViewMatrixLH - sizeof(uint32_t) + // startSensorID - sizeof(ProcessSensorsBehaviour) + // processSensorsBehaviour - sizeof(core::matrix3x4SIMD) + // interactiveCameraViewMatrix - (zipPath.length() + 1) + - (xmlPath.length() + 1) ; - - return result; - } -}; int main(int argc, char** argv) { @@ -272,16 +139,7 @@ int main(int argc, char** argv) for (auto i = 1ul; i < argc; ++i) arguments.emplace_back(argv[i]); } - std::cout << std::endl; - std::cout << "-- Build URL:" << std::endl; - std::cout << NBL_BUILD_URL << std::endl; - std::cout << std::endl; - std::cout << "-- Build log:" << std::endl; - std::cout << NBL_GIT_LOG << std::endl; - std::cout << std::endl; - - bool applicationIsReloaded = false; - PersistentState applicationState; + { CommandLineHandler cmdHandler = CommandLineHandler(arguments); @@ -291,8 +149,6 @@ int main(int argc, char** argv) applicationState.isDenoiseDeferred = cmdHandler.getDeferredDenoiseFlag(); auto sceneDir = cmdHandler.getSceneDirectory(); - if ((sceneDir.size() == 1) && (sceneDir[0] == "")) // special condition for reloading the application - applicationIsReloaded = true; std::string filePath = (sceneDir.size() >= 1) ? sceneDir[0] : ""; // zip or xml std::string extraPath = (sceneDir.size() >= 2) ? sceneDir[1] : "";; // xml in zip @@ -315,34 +171,14 @@ int main(int argc, char** argv) bool takeScreenShots = true; std::string mainFileName; // std::filesystem::path(filePath).filename().string(); - // create device with full flexibility over creation parameters - // you can add more parameters if desired, check nbl::SIrrlichtCreationParameters - nbl::SIrrlichtCreationParameters params; - params.Bits = 24; //may have to set to 32bit for some platforms - params.ZBufferBits = 24; - params.DriverType = video::EDT_OPENGL; - params.Fullscreen = false; - params.Vsync = false; - params.Doublebuffer = true; - params.Stencilbuffer = false; //! This will not even be a choice soon - params.WindowSize = dimension2d(1920, 1080); - auto device = createDeviceEx(params); - if (!device) - return 1; // could not create selected driver. +// DEVICE CREATION EMITTED // asset::SAssetBundle meshes = {}; core::smart_refctd_ptr globalMeta; { - io::IFileSystem* fs = device->getFileSystem(); - asset::IAssetManager* am = device->getAssetManager(); - auto serializedLoader = core::make_smart_refctd_ptr(am); - auto mitsubaLoader = core::make_smart_refctd_ptr(am, fs); - serializedLoader->initialize(); - mitsubaLoader->initialize(); - am->addAssetLoader(std::move(serializedLoader)); - am->addAssetLoader(std::move(mitsubaLoader)); +// LOADER ADDITION EMITTED if (applicationState.zipPath.empty() && applicationState.xmlPath.empty() && !applicationIsReloaded) { @@ -360,19 +196,9 @@ int main(int argc, char** argv) auto loadScene = [&device, &am, &fs](const std::string& _zipPath, std::string& _xmlPath, std::string& _mainFileName) -> asset::SAssetBundle { asset::SAssetBundle result = {}; - if (_zipPath.empty() && _xmlPath.empty()) - return result; - _mainFileName = ""; - if (!_zipPath.empty()) - { - _mainFileName = std::filesystem::path(_zipPath).filename().string(); - _mainFileName = _mainFileName.substr(0u, _mainFileName.find_first_of('.')); +// ADD ARCHIVE AND VALIDATION EMITTED - io::IFileArchive* arch = nullptr; - device->getFileSystem()->addFileArchive(_zipPath.c_str(), io::EFAT_ZIP, "", &arch); - if (!arch) - return result; auto flist = arch->getFileList(); if (!flist) @@ -449,12 +275,7 @@ int main(int argc, char** argv) } _mainFileName += std::string("_") + std::filesystem::path(_xmlPath.c_str()).filename().replace_extension().string(); - } - else if (!_xmlPath.empty()) - { - _mainFileName = std::filesystem::path(_xmlPath).filename().string(); - _mainFileName = _mainFileName.substr(0u, _mainFileName.find_first_of('.')); - } + printf("[INFO]: Loading XML file: %s\n", _xmlPath.c_str()); @@ -471,87 +292,39 @@ int main(int argc, char** argv) }; meshes = loadScene(applicationState.zipPath, applicationState.xmlPath, mainFileName); - if (meshes.getContents().empty() || applicationIsReloaded) - { - if (meshes.getContents().empty() && !applicationState.xmlPath.empty()) - printf("[ERROR]: Failed to load asset at: %s\n", applicationState.xmlPath.c_str()); - - // Restore state to get new values for zipPath and xmlPath and try loading again - printf("[INFO]: Trying to restore the application to its previous state.\n"); - - bool restoreSuccess = false; - if (applicationState.readFromDisk()) - { - meshes = loadScene(applicationState.zipPath, applicationState.xmlPath, mainFileName); - if (!meshes.getContents().empty()) - restoreSuccess = true; - } - if (!restoreSuccess) - { - pfd::message("ERROR", "Cannot restore application to its previous state.", pfd::choice::ok); - return 2; - } - } +// APPLICATION RESTORE OMITTED - globalMeta = core::smart_refctd_ptr(meshes.getMetadata()->selfCast()); - if (!globalMeta) - { - std::cout << "[ERROR] Couldn't get global Meta"; - return 3; - } - - std::cout << "Total number of Sensors = " << globalMeta->m_global.m_sensors.size() << std::endl; - - if (globalMeta->m_global.m_sensors.empty()) - { - std::cout << "[ERROR] No Sensors found." << std::endl; - assert(false); - return 5; // return code? - } - - if (applicationState.startSensorID >= globalMeta->m_global.m_sensors.size()) - { - applicationState.startSensorID = 0; - printf("[WARNING]: A valid sensor ID was not found. Selecting the sensor: %u\n", applicationState.startSensorID); - } - - // empty out the cache from individual images and meshes taht are not used by the scene - am->clearAllAssetCache(); } - constexpr float DefaultRotateSpeed = 300.0f; - constexpr float DefaultZoomSpeed = 1.0f; - constexpr float DefaultMoveSpeed = 100.0f; - constexpr float DefaultSceneDiagonal = 50.0f; // reference for default zoom and move speed; struct SensorData { - int32_t width = 0u; - int32_t height = 0u; - int32_t cropWidth = 0u; - int32_t cropHeight = 0u; - int32_t cropOffsetX = 0u; - int32_t cropOffsetY = 0u; - bool rightHandedCamera = true; - uint32_t samplesNeeded = 0u; - float moveSpeed = core::nan(); - float stepZoomSpeed = core::nan(); - float rotateSpeed = core::nan(); +int32_t width = 0u; +int32_t height = 0u; +int32_t cropWidth = 0u; +int32_t cropHeight = 0u; +int32_t cropOffsetX = 0u; +int32_t cropOffsetY = 0u; +bool rightHandedCamera = true; +uint32_t samplesNeeded = 0u; +float moveSpeed = core::nan(); +float stepZoomSpeed = core::nan(); +float rotateSpeed = core::nan(); scene::ICameraSceneNode * staticCamera; scene::ICameraSceneNode * interactiveCamera; - std::filesystem::path outputFilePath; - ext::MitsubaLoader::CElementSensor::Type type; +std::filesystem::path outputFilePath; +ext::MitsubaLoader::CElementSensor::Type type; ext::MitsubaLoader::CElementFilm::FileFormat fileFormat; - Renderer::DenoiserArgs denoiserInfo = {}; - int32_t cascadeCount = 1; - float cascadeLuminanceBase = core::nan(); - float cascadeLuminanceStart = core::nan(); - float kappa = 0.f; - float Emin = 0.05f; +Renderer::DenoiserArgs denoiserInfo = {}; +int32_t cascadeCount = 1; +float cascadeLuminanceBase = core::nan(); +float cascadeLuminanceStart = core::nan(); +float kappa = 0.f; +float Emin = 0.05f; bool envmap = false; float envmapRegFactor = 0.0f; - core::vector clipPlanes; +core::vector clipPlanes; scene::CSceneNodeAnimatorCameraModifiedMaya* getInteractiveCameraAnimator() { @@ -575,12 +348,12 @@ int main(int argc, char** argv) } }; - struct CubemapRender - { - uint32_t sensorIdx = 0u; - uint32_t getSensorsBeginIdx() const { return sensorIdx; } - uint32_t getSensorsEndIdx() const { return sensorIdx + 5; } - }; +struct CubemapRender +{ + uint32_t sensorIdx = 0u; + uint32_t getSensorsBeginIdx() const { return sensorIdx; } + uint32_t getSensorsEndIdx() const { return sensorIdx + 5; } +}; auto smgr = device->getSceneManager(); @@ -893,6 +666,7 @@ int main(int argc, char** argv) std::cout << "[WARN] CropOffsets are non-zero. cropping is not supported for non cubemap renders." << std::endl; } +#if 0 // camera setup non spherical mainSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); auto& staticCamera = mainSensorData.staticCamera; @@ -975,6 +749,7 @@ int main(int argc, char** argv) { assert(false); } +#endif mainSensorData.resetInteractiveCamera(); sensors.push_back(mainSensorData); @@ -983,26 +758,17 @@ int main(int argc, char** argv) return true; }; - // Always add all the sensors because the interactive mode wants all the sensors. - for(uint32_t s = 0u; s < globalMeta->m_global.m_sensors.size(); ++s) - { - std::cout << "Sensors[" << s << "] = " << std::endl; - const auto& sensor = globalMeta->m_global.m_sensors[s]; - extractAndAddToSensorData(sensor, s); - } - auto driver = device->getVideoDriver(); core::smart_refctd_ptr renderer = core::make_smart_refctd_ptr(driver,device->getAssetManager(),smgr,applicationState.isDenoiseDeferred); renderer->initSceneResources(meshes,"LowDiscrepancySequenceCache.bin"); - // free memory - meshes = {}; - device->getAssetManager()->clearAllGPUObjects(); - - RaytracerExampleEventReceiver receiver; - device->setEventReceiver(&receiver); - // Deduce Move and Zoom Speeds if it is nan +// free memory +meshes = {}; +device->getAssetManager()->clearAllGPUObjects(); + + +// Deduce Move and Zoom Speeds if it is nan auto sceneBoundsExtent = renderer->getSceneBound().getExtent(); auto sceneDiagonal = sceneBoundsExtent.getLength(); diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index f33babb42..3d036ceaf 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -92,6 +92,7 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov // std::array clipPlanes = {}; + // float cascadeLuminanceBase = core::nan(); float cascadeLuminanceStart = core::nan(); } mutableDefaults = {}; @@ -103,10 +104,29 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov constexpr static inline float DefaultMoveSpeed = 100.0f; constexpr static inline float DefaultSceneDiagonal = 50.0f; // reference for default zoom and move speed; - uint32_t samplesNeeded = 0u; + // + union Raygen + { + hlsl::float32_t4x4 linearProj = {}; + } raygen; + union + { + struct SOrientable // spherical can't move + { + hlsl::float32_t3 up = {}; + float speed = core::nan(); + } orientable = {}; + }; + union + { + struct SZoomable // spherical can't zoom + { + float speed = core::nan(); + } zoomable = {}; + }; float moveSpeed = core::nan(); - float stepZoomSpeed = core::nan(); - float rotateSpeed = core::nan(); + // + uint32_t samplesNeeded = 0u; float kappa = 0.f; float Emin = 0.05f; } dynamicDefaults = {}; diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index b3227fff7..e86c56aa5 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -97,6 +97,20 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); + // session object + class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable + { + // sensor data + struct STransients + { +// core::smart_refctd_ptr<>; + } transients = {}; + + public: + // init + // deinit + }; + protected: struct SConstructorParams : SCachedCreationParams { From 58b42cfc87274db606d593d5baec787b626bb945 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 3 Jan 2026 17:10:03 +0100 Subject: [PATCH 140/219] remove all DEPENDS for NSC rules, now we have depfiles and glob for headers is done automatically --- .../CMakeLists.txt | 9 +------- 07_StagingAndMultipleQueues/CMakeLists.txt | 9 +------- 10_CountingSort/CMakeLists.txt | 8 ------- 11_FFT/CMakeLists.txt | 9 +------- 14_Mortons/CMakeLists.txt | 12 +--------- 22_CppCompat/CMakeLists.txt | 11 +-------- 24_ColorSpaceTest/CMakeLists.txt | 9 +------- 27_MPMCScheduler/CMakeLists.txt | 12 +--------- 62_CAD/CMakeLists.txt | 17 +------------- 64_EmulatedFloatTest/CMakeLists.txt | 11 +-------- 67_RayQueryGeometry/CMakeLists.txt | 9 +------- 70_FLIPFluids/CMakeLists.txt | 23 +------------------ 71_RayTracingPipeline/CMakeLists.txt | 18 --------------- common/src/nbl/examples/CMakeLists.txt | 5 +--- 14 files changed, 12 insertions(+), 150 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index 55ebaf41d..6e90f86cb 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index fe063be7c..b5648de8f 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/comp_shader.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt index 14bde428d..1c23744fe 100644 --- a/10_CountingSort/CMakeLists.txt +++ b/10_CountingSort/CMakeLists.txt @@ -24,13 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/prefix_sum_shader.comp.hlsl - app_resources/scatter_shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(REQUIRED_CAPS [=[ @@ -72,7 +65,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt index ca9fe8428..6b6304ed8 100644 --- a/11_FFT/CMakeLists.txt +++ b/11_FFT/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/14_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt index 1c595e8bb..8229b36b5 100644 --- a/14_Mortons/CMakeLists.txt +++ b/14_Mortons/CMakeLists.txt @@ -30,15 +30,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/testCommon.hlsl - app_resources/testCommon2.hlsl - app_resources/test.comp.hlsl - app_resources/test2.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -63,7 +54,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -79,4 +69,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/22_CppCompat/CMakeLists.txt b/22_CppCompat/CMakeLists.txt index 86a1c34fc..d7a203d2d 100644 --- a/22_CppCompat/CMakeLists.txt +++ b/22_CppCompat/CMakeLists.txt @@ -30,14 +30,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/test.comp.hlsl - app_resources/intrinsicsTest.comp.hlsl - app_resources/tgmathTest.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -66,7 +58,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -82,4 +73,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index a2feb2cb8..da95b3d8e 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -35,12 +35,6 @@ add_test(NAME NBL_IMAGE_HASH_RUN_TESTS ) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/present.frag.hlsl - app_resources/push_constants.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -61,7 +55,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -77,4 +70,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/27_MPMCScheduler/CMakeLists.txt b/27_MPMCScheduler/CMakeLists.txt index 92531a8d5..7d7cfd71c 100644 --- a/27_MPMCScheduler/CMakeLists.txt +++ b/27_MPMCScheduler/CMakeLists.txt @@ -3,14 +3,6 @@ include(common) nbl_create_executable_project("" "" "" "") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/mpmc_queue.hlsl - app_resources/schedulers/mpmc.hlsl - app_resources/shader.comp.hlsl - app_resources/workgroup/pool_allocator.hlsl - app_resources/workgroup/stack.hlsl -) set(JSON [=[ [ @@ -18,7 +10,6 @@ set(JSON [=[ "INPUT": "app_resources/shader.comp.hlsl", "KEY": "shader", "COMPILE_OPTIONS": ["-T", "cs_6_8"], - "DEPENDS": [], "CAPS": [] } ] @@ -27,7 +18,6 @@ set(JSON [=[ NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} @@ -43,4 +33,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index 0928d3b61..7a700b861 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -64,20 +64,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - shaders/globals.hlsl - shaders/runtimeDeviceConfigCaps.hlsl - shaders/main_pipeline/common.hlsl - shaders/main_pipeline/dtm.hlsl - shaders/main_pipeline/fragment.hlsl - shaders/main_pipeline/fragment_shader.hlsl - shaders/main_pipeline/fragment_shader_debug.hlsl - shaders/main_pipeline/line_style.hlsl - shaders/main_pipeline/resolve_alphas.hlsl - shaders/main_pipeline/vertex_shader.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(REQUIRED_CAPS [=[ @@ -113,7 +99,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -129,4 +114,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index af46da896..bd4de23ce 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -30,14 +30,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/test.comp.hlsl - app_resources/benchmark/benchmark.comp.hlsl - app_resources/benchmark/common.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -62,7 +54,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -78,4 +69,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index 1fdfc03ce..768379100 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -28,12 +28,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/render.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -54,7 +48,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -70,4 +63,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt index 842492167..96eb752c3 100644 --- a/70_FLIPFluids/CMakeLists.txt +++ b/70_FLIPFluids/CMakeLists.txt @@ -24,26 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/compute/advectParticles.comp.hlsl - app_resources/compute/applyBodyForces.comp.hlsl - app_resources/compute/diffusion.comp.hlsl - app_resources/compute/genParticleVertices.comp.hlsl - app_resources/compute/particlesInit.comp.hlsl - app_resources/compute/prepareCellUpdate.comp.hlsl - app_resources/compute/pressureSolver.comp.hlsl - app_resources/compute/updateFluidCells.comp.hlsl - app_resources/cellUtils.hlsl - app_resources/common.hlsl - app_resources/descriptor_bindings.hlsl - app_resources/fluidParticles.fragment.hlsl - app_resources/fluidParticles.vertex.hlsl - app_resources/gridSampling.hlsl - app_resources/gridUtils.hlsl - app_resources/render_common.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -101,7 +81,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -117,4 +96,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index d7bb13671..ca361af31 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -35,23 +35,6 @@ if(NBL_BUILD_IMGUI) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/light_directional.rcall.hlsl - app_resources/light_point.rcall.hlsl - app_resources/light_spot.rcall.hlsl - app_resources/present.frag.hlsl - app_resources/raytrace.rahit.hlsl - app_resources/raytrace.rchit.hlsl - app_resources/raytrace.rgen.hlsl - app_resources/raytrace.rint.hlsl - app_resources/raytrace.rmiss.hlsl - app_resources/raytrace_procedural.rchit.hlsl - app_resources/raytrace_shadow.rahit.hlsl - app_resources/raytrace_shadow.rmiss.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -116,7 +99,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} diff --git a/common/src/nbl/examples/CMakeLists.txt b/common/src/nbl/examples/CMakeLists.txt index e486b2b22..f3ad7fe91 100644 --- a/common/src/nbl/examples/CMakeLists.txt +++ b/common/src/nbl/examples/CMakeLists.txt @@ -29,7 +29,6 @@ set(JSON [=[ "INPUT": "shaders/geometry/unified.hlsl", "KEY": "shaders/geometry/unified", "COMPILE_OPTIONS": ["-T", "lib_6_6"], - "DEPENDS": [], "CAPS": [] } ] @@ -39,11 +38,10 @@ NBL_CREATE_NSC_COMPILE_RULES(${ARGS} INPUTS ${JSON}) set(JSON [=[ [ - { + { "INPUT": "shaders/geometry/unified.hlsl", "KEY": "shaders/geometry/unified-caps", "COMPILE_OPTIONS": ["-T", "lib_6_6"], - "DEPENDS": [], "CAPS": [ { "name": "shaderFloat64", @@ -61,7 +59,6 @@ set(JSON [=[ "INPUT": "shaders/geometry/unified.hlsl", "KEY": "shaders/geometry/unified-caps-2", "COMPILE_OPTIONS": ["-T", "lib_6_6"], - "DEPENDS": [], "CAPS": [ { "name": "shaderFloat64", From f90e26683f20e1bd67dd166f3a2e0478921087d4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 6 Jan 2026 10:50:06 +0700 Subject: [PATCH 141/219] fix bug in ITester --- common/include/nbl/examples/Tester/ITester.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 01c4973fc..16572acdf 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -368,7 +368,7 @@ class ITester for (int i = 0; i < m_testIterationCount; ++i) { verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU); - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::GPU); + verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU); } } From b70d47266041dcdd848a7dd11e74fee15ab7c9d5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 6 Jan 2026 16:26:12 +0700 Subject: [PATCH 142/219] setup quaternion unit test + some basic tests --- 59_QuaternionTests/CMakeLists.txt | 68 ++ 59_QuaternionTests/CQuaternionTester.h | 95 +++ 59_QuaternionTests/app_resources/common.hlsl | 74 ++ .../app_resources/quaternionTest.comp.hlsl | 19 + 59_QuaternionTests/main.cpp | 783 ++++++++++++++++++ CMakeLists.txt | 1 + 6 files changed, 1040 insertions(+) create mode 100644 59_QuaternionTests/CMakeLists.txt create mode 100644 59_QuaternionTests/CQuaternionTester.h create mode 100644 59_QuaternionTests/app_resources/common.hlsl create mode 100644 59_QuaternionTests/app_resources/quaternionTest.comp.hlsl create mode 100644 59_QuaternionTests/main.cpp diff --git a/59_QuaternionTests/CMakeLists.txt b/59_QuaternionTests/CMakeLists.txt new file mode 100644 index 000000000..84152c9b8 --- /dev/null +++ b/59_QuaternionTests/CMakeLists.txt @@ -0,0 +1,68 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/quaternionTest.comp.hlsl", + "KEY": "quaternionTest", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h new file mode 100644 index 000000000..cb6fecb2e --- /dev/null +++ b/59_QuaternionTests/CQuaternionTester.h @@ -0,0 +1,95 @@ +#ifndef _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ + + +#include +#include +#define GLM_ENABLE_EXPERIMENTAL +#include + +#include "nbl/examples/examples.hpp" +#include "app_resources/common.hlsl" +#include "nbl/examples/Tester/ITester.h" + +using namespace nbl; + +class CQuaternionTester final : public ITester +{ + using base_t = ITester; + +public: + CQuaternionTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; + +private: + QuaternionInputTestValues generateInputTestValues() override + { + std::uniform_real_distribution realDistribution(-100.0f, 100.0f); + std::uniform_real_distribution realDistributionSmall(1.0f, 4.0f); + std::uniform_int_distribution intDistribution(-100, 100); + std::uniform_int_distribution coinFlipDistribution(0, 1); + + QuaternionInputTestValues testInput; + testInput.axis = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.angle = realDistribution(getRandomEngine()); + testInput.quat0 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat0 = hlsl::normalize(testInput.quat0); + testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat1 = hlsl::normalize(testInput.quat1); + testInput.pitch = realDistribution(getRandomEngine()); + testInput.yaw = realDistribution(getRandomEngine()); + testInput.roll = realDistribution(getRandomEngine()); + + return testInput; + } + + QuaternionTestValues determineExpectedResults(const QuaternionInputTestValues& testInput) override + { + // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values + QuaternionTestValues expected; + { + const auto glmquat = glm::angleAxis(testInput.angle, testInput.axis); + expected.quatFromAngleAxis.data.x = glmquat.data.data[0]; + expected.quatFromAngleAxis.data.y = glmquat.data.data[1]; + expected.quatFromAngleAxis.data.z = glmquat.data.data[2]; + expected.quatFromAngleAxis.data.w = glmquat.data.data[3]; + } + { + const auto rotmat = glm::yawPitchRoll(testInput.yaw, testInput.pitch, testInput.roll); + const auto glmquat = glm::quat_cast(rotmat); + expected.quatFromEulerAngles.data.x = glmquat.data.data[0]; + expected.quatFromEulerAngles.data.y = glmquat.data.data[1]; + expected.quatFromEulerAngles.data.z = glmquat.data.data[2]; + expected.quatFromEulerAngles.data.w = glmquat.data.data[3]; + } + { + const auto glmquat = glm::quat(testInput.quat0.data.w, testInput.quat0.data.x, testInput.quat0.data.y, testInput.quat0.data.z); + const auto rotmat = glm::mat3_cast(glmquat); + expected.rotationMat[0] = rotmat[0]; + expected.rotationMat[1] = rotmat[1]; + expected.rotationMat[2] = rotmat[2]; + } + { + const auto glmquat0 = glm::quat(testInput.quat0.data.w, testInput.quat0.data.x, testInput.quat0.data.y, testInput.quat0.data.z); + const auto glmquat1 = glm::quat(testInput.quat1.data.w, testInput.quat1.data.x, testInput.quat1.data.y, testInput.quat1.data.z); + const auto mult = glmquat0 * glmquat1; + expected.quatMult.data.x = mult.data.data[0]; + expected.quatMult.data.y = mult.data.data[1]; + expected.quatMult.data.z = mult.data.data[2]; + expected.quatMult.data.w = mult.data.data[3]; + } + + return expected; + } + + void verifyTestResults(const QuaternionTestValues& expectedTestValues, const QuaternionTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override + { + verifyTestValue("create from axis angle", expectedTestValues.quatFromAngleAxis.data, testValues.quatFromAngleAxis.data, testIteration, seed, testType, 1e-2); + verifyTestValue("create from Euler angles", expectedTestValues.quatFromEulerAngles.data, testValues.quatFromEulerAngles.data, testIteration, seed, testType, 1e-2); + verifyTestValue("multiply quat", expectedTestValues.quatMult.data, testValues.quatMult.data, testIteration, seed, testType, 1e-2); + + verifyTestValue("construct matrix", expectedTestValues.rotationMat, testValues.rotationMat, testIteration, seed, testType, 1e-2); + } +}; + +#endif diff --git a/59_QuaternionTests/app_resources/common.hlsl b/59_QuaternionTests/app_resources/common.hlsl new file mode 100644 index 000000000..683f04150 --- /dev/null +++ b/59_QuaternionTests/app_resources/common.hlsl @@ -0,0 +1,74 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_59_QUATERNION_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_59_QUATERNION_COMMON_INCLUDED_ + +// because DXC doesn't properly support `_Static_assert` +// TODO: add a message, and move to macros.h or cpp_compat +#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } + +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include + + +#include +#include +#include + +#include +#include + +#include + +using namespace nbl::hlsl; +struct QuaternionInputTestValues +{ + math::quaternion quat0; + math::quaternion quat1; + float32_t3 axis; + float angle; + float pitch; + float yaw; + float roll; + float32_t3x3 rotationMat; +}; + +struct QuaternionTestValues +{ + math::quaternion quatFromAngleAxis; + math::quaternion quatFromEulerAngles; + float32_t3x3 rotationMat; + math::quaternion quatMult; +}; + +struct QuaternionTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(QuaternionInputTestValues) input, NBL_REF_ARG(QuaternionTestValues) output) + { + output.quatFromAngleAxis = math::quaternion::create(input.axis, input.angle); + output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); + output.rotationMat = input.quat0.constructMatrix(); + output.quatMult = input.quat0 * input.quat1; + } +}; + +#endif diff --git a/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl b/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl new file mode 100644 index 000000000..5d3e6577a --- /dev/null +++ b/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl @@ -0,0 +1,19 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "common.hlsl" +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(256, 1, 1)] +[shader("compute")] +void main() +{ + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + QuaternionTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/59_QuaternionTests/main.cpp b/59_QuaternionTests/main.cpp new file mode 100644 index 000000000..866356d4e --- /dev/null +++ b/59_QuaternionTests/main.cpp @@ -0,0 +1,783 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "app_resources/common.hlsl" + +#include "CQuaternionTester.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +//using namespace glm; + +void cpu_tests(); + +struct S +{ + float32_t3 f; +}; + +struct T +{ + float32_t a; + float32_t3 b; + S c; + float32_t2x3 d; + float32_t2x3 e; + int f[3]; + float32_t2 g[2]; + float32_t4 h; +}; + +class QuaternionTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + QuaternionTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + { + CQuaternionTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"quaternionTest">(m_device.get()); + + CQuaternionTester quaternionTester(8); + quaternionTester.setupPipeline(pplnSetupData); + quaternionTester.performTestsAndVerifyResults("QuaternionTestLog.txt"); + } + + //m_queue = m_device->getQueue(0, 0); + //m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + //m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); + + //smart_refctd_ptr shader; + //{ + // IAssetLoader::SAssetLoadParams lp = {}; + // lp.logger = m_logger.get(); + // lp.workingDirectory = "app_resources"; // virtual root + // auto key = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); + // auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + // const auto assets = assetBundle.getContents(); + // if (assets.empty()) + // return logFail("Could not load shader!"); + + // auto source = IAsset::castDown(assets[0]); + // // The down-cast should not fail! + // assert(source); + + // // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + // shader = m_device->compileShader({ source.get() }); + // if (!shader) + // return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + //} + + //const uint32_t bindingCount = 4u; + //IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {}; + //bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE; + //bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE; + //bindings[2].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; + //bindings[3].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; + // + // for(int i = 0; i < bindingCount; ++i) + // { + // bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + // bindings[i].count = 1; + // bindings[i].binding = i; + // } + //m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + // { + // SPushConstantRange pcRange = {}; + // pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + // pcRange.offset = 0u; + // pcRange.size = 2 * sizeof(uint32_t); + // auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); + // IGPUComputePipeline::SCreationParams params = {}; + // params.layout = layout.get(); + // params.shader.shader = shader.get(); + // params.shader.entryPoint = "main"; + // if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + // return logFail("Failed to create compute pipeline!\n"); + // } + + + // for (int i = 0; i < 2; ++i) + // { + // m_images[i] = m_device->createImage(IGPUImage::SCreationParams { + // { + // .type = IGPUImage::E_TYPE::ET_2D, + // .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + // .format = E_FORMAT::EF_R32G32B32A32_SFLOAT, + // .extent = { 1920,1080,1 }, + // .mipLevels = 1, + // .arrayLayers = 1, + // .usage = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT + // | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT + // | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT, + // }, {}, IGPUImage::TILING::LINEAR, + // }); + + // auto reqs = m_images[i]->getMemoryReqs(); + // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + // m_device->allocate(reqs, m_images[i].get()); + + // m_imageViews[i] = m_device->createImageView(IGPUImageView::SCreationParams { + // .image = m_images[i], + // .viewType = IGPUImageView::E_TYPE::ET_2D, + // .format = E_FORMAT::EF_R32G32B32A32_SFLOAT, + // // .subresourceRange = { IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, 0, 1, 0, 1 }, + // }); + + // m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { + // {.size = reqs.size, .usage = + // IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | + // IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, + // } + // }); + + // reqs = m_buffers[i]->getMemoryReqs(); + // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); + // m_device->allocate(reqs, m_buffers[i].get()); + + // m_readbackBuffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { + // {.size = reqs.size, .usage = IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT } + // }); + + // reqs = m_readbackBuffers[i]->getMemoryReqs(); + // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); + // m_device->allocate(reqs, m_readbackBuffers[i].get()); + // } + + // smart_refctd_ptr descriptorPool = nullptr; + // { + // IDescriptorPool::SCreateInfo createInfo = {}; + // createInfo.maxSets = 1; + // createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] = 2; + // createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 2; + // descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); + // } + + // m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout)); + + + // IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {}; + // IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {}; + // + // for(int i = 0; i < bindingCount; ++i) + // { + // writeDescriptorSets[i].info = &descriptorInfos[i]; + // writeDescriptorSets[i].dstSet = m_descriptorSet.get(); + // writeDescriptorSets[i].binding = i; + // writeDescriptorSets[i].count = bindings[i].count; + + // if(i<2) + // { + // descriptorInfos[i].desc = m_imageViews[i]; + // descriptorInfos[i].info.image.imageLayout = IImage::LAYOUT::GENERAL; + // } + // else + // { + // descriptorInfos[i].desc = m_buffers[i-2]; + // descriptorInfos[i].info.buffer.size = ~0ull; + // } + // } + + // m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override + { + cpu_tests(); + + //constexpr auto StartedValue = 0; + + //smart_refctd_ptr progress = m_device->createSemaphore(StartedValue); + + //m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + //m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + + //IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t layoutTransBarriers[2] = { { + // .barrier = { + // .dep = { + // .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT, + // .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT, + // .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + // .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + // } + // }, + // .image = m_images[0].get(), + // .subresourceRange = { + // .aspectMask = IImage::EAF_COLOR_BIT, + // .baseMipLevel = 0u, + // .levelCount = 1u, + // .baseArrayLayer = 0u, + // .layerCount = 1u, + // }, + // .oldLayout = IImage::LAYOUT::UNDEFINED, + // .newLayout = IImage::LAYOUT::GENERAL + //} }; + //layoutTransBarriers[1] = layoutTransBarriers[0]; + //layoutTransBarriers[1].image = m_images[1].get(); + + //const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .imgBarriers = layoutTransBarriers }; + //m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); + // + + //const uint32_t pushConstants[2] = { 1920, 1080 }; + //const IGPUDescriptorSet* set = m_descriptorSet.get(); + //m_cmdbuf->bindComputePipeline(m_pipeline.get()); + //m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); + //m_cmdbuf->dispatch(240, 135, 1u); + //for (int i = 0; i < 2; ++i) + //{ + // layoutTransBarriers[i].barrier.dep = layoutTransBarriers[i].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); + // layoutTransBarriers[i].oldLayout = layoutTransBarriers[i].newLayout; + // layoutTransBarriers[i].newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; + //} + //m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); + + ////{ + //// constexpr auto FinishedValue1 = 42; + //// IQueue::SSubmitInfo submitInfos[1] = {}; + //// const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + //// submitInfos[0].commandBuffers = cmdbufs; + //// const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue1,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + //// submitInfos[0].signalSemaphores = signals; + //// m_api->startCapture(); + //// m_queue->submit(submitInfos); //Command buffer is NOT IN THE EXECUTABLE STATE + //// m_api->endCapture(); + //// const ISemaphore::SWaitInfo waitInfos[] = { { + //// .semaphore = progress.get(), + //// .value = FinishedValue1 + //// } }; + //// m_device->blockForSemaphores(waitInfos); + + ////} + //IImage::SBufferCopy copy = { + // .imageSubresource = { + // .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + // .mipLevel = 0, + // .baseArrayLayer = 0, + // .layerCount = 1, + // }, + // .imageExtent = {1920, 1080, 1}, + //}; + // + //bool succ = m_cmdbuf->copyImageToBuffer(m_images[0].get(), IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, m_readbackBuffers[0].get(), 1, ©); + //succ &= m_cmdbuf->copyImageToBuffer(m_images[1].get(), IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, m_readbackBuffers[1].get(), 1, ©); + //assert(succ); + //m_cmdbuf->end(); + + //{ + // constexpr auto FinishedValue = 69; + // IQueue::SSubmitInfo submitInfos[1] = {}; + // const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + // submitInfos[0].commandBuffers = cmdbufs; + // const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + // submitInfos[0].signalSemaphores = signals; + // m_api->startCapture(); + // m_queue->submit(submitInfos); + // m_api->endCapture(); + // const ISemaphore::SWaitInfo waitInfos[] = { { + // .semaphore = progress.get(), + // .value = FinishedValue + // } }; + // m_device->blockForSemaphores(waitInfos); + //} + + //using res = std::array, 1080>, 1920>; + //res* ptrs[4] = {}; + // + //static_assert(sizeof(res) == sizeof(float) * 4 * 1920 * 1080); + + //for (int i = 0; i < 4; ++i) + //{ + // auto mem = (i < 2 ? m_buffers[i] : m_readbackBuffers[i-2])->getBoundMemory(); + // assert(mem.memory->isMappable()); + // auto* ptr = mem.memory->map({ .offset = 0, .length = mem.memory->getAllocationSize() }); + // ptrs[i] = (res*)ptr; + //} + //res& buf = *ptrs[1]; + //res& img = *ptrs[3]; + + //std::cout << buf[0][0][0] << " " + // << buf[0][0][1] << " " + // << buf[0][0][2] << " " + // << buf[0][0][3] << " " + // << "\n"; + // + //const std::ios::fmtflags f(std::cout.flags()); + //std::cout << std::hex + // << std::bit_cast(buf[0][0][0]) << " " + // << std::bit_cast(buf[0][0][1]) << " " + // << std::bit_cast(buf[0][0][2]) << " " + // << std::bit_cast(buf[0][0][3]) << " " + // << "\n"; + //std::cout.flags(f); + + //if(buf[0][0][0] != -1.f) + //{ + // std::cout << "Shader tests failed\n"; + //} + + m_keepRunning = false; + } + + bool keepRunning() override + { + return m_keepRunning; + } + + +private: + smart_refctd_ptr m_pipeline = nullptr; + smart_refctd_ptr m_descriptorSetLayout; + smart_refctd_ptr m_descriptorSet; + + smart_refctd_ptr m_images[2]; + smart_refctd_ptr m_buffers[2]; + smart_refctd_ptr m_readbackBuffers[2]; + smart_refctd_ptr m_imageViews[2]; + smart_refctd_ptr m_cmdbuf = nullptr; + IQueue* m_queue; + smart_refctd_ptr m_commandPool; + uint64_t m_iteration = 0; + constexpr static inline uint64_t MaxIterations = 200; + + bool m_keepRunning = true; +}; + +template +constexpr bool val(T a) +{ + return std::is_const_v; +} + +template +bool equal(T l, U r) +{ + static_assert(sizeof(T) == sizeof(U)); + return 0==memcmp(&l, &r, sizeof(T)); +} + + +bool almost_equal(float l, float r) +{ + return fabs(l - r) < std::numeric_limits::epsilon() * 1000; +} + +template +constexpr auto limits_var(T obj) +{ + if constexpr (std::is_function_v>) + return obj(); + else + return obj; +} + +template +T random(T lo, T hi) +{ + return (hi-lo)/RAND_MAX * rand() + lo; +} + +NBL_MAIN_FUNC(QuaternionTest) + +void cpu_tests() +{ + float32_t3 a = float32_t3(1.0f, 2.0f, 3.0f); + float32_t3 b = float32_t3(2.0f, 3.0f, 4.0f); + b = a * 3.0f; + bool3 asdf = bool3(true, false, true); + pow(a, b); + + // TODO: later this whole test should be templated so we can check all `T` not just `float`, but for this we need `type_traits` + + // DO NOT EVER THINK TO CHANGE `using type1 = vector` to `using type1 = type` EVER! + static_assert(!std::is_same_v); + static_assert(!std::is_same_v); + static_assert(!std::is_same_v); + static_assert(!std::is_same_v); + //static_assert(!std::is_same_v,T>); + + // checking matrix memory layout + { + float32_t4x3 a; + float32_t3x4 b; + float32_t3 v; + float32_t4 u; + mul(a, b); + mul(b, a); + mul(a, v); + mul(v, b); + mul(u, a); + mul(b, u); + + float32_t4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + a - a; + b + b; + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + } + + // making sure linear operators returns the correct type + + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + + // checking scalar packing + static_assert(offsetof(T, a) == 0); + static_assert(offsetof(T, b) == offsetof(T, a) + sizeof(T::a)); + static_assert(offsetof(T, c) == offsetof(T, b) + sizeof(T::b)); + static_assert(offsetof(T, d) == offsetof(T, c) + sizeof(T::c)); + static_assert(offsetof(T, e) == offsetof(T, d) + sizeof(T::d)); + static_assert(offsetof(T, f) == offsetof(T, e) + sizeof(T::e)); + static_assert(offsetof(T, g) == offsetof(T, f) + sizeof(T::f)); + static_assert(offsetof(T, h) == offsetof(T, g) + sizeof(T::g)); + + // use some functions + float32_t3 x; + float32_t2x3 y; + float32_t3x3 z; + //barycentric::reconstructBarycentrics(x, y); + //barycentric::reconstructBarycentrics(x, z); + + // color matrix tests: + //testColorMatrices(); + + // promote.hlsl tests: + // promote scalar to vector + //float32_t3 v0 = nbl::hlsl::promote(2.0f); + // promote scalar to matrix + //float32_t3x3 m0 = nbl::hlsl::promote(2.0f); + + // TODO?: promote vector to matrix + //glm::mat3 m1 = nbl::hlsl::promote(glm::vec3(1.0f, 2.0f, 3.0f)); + + // test vector comparison operators + { + /*float32_t3 a = float32_t3(1.0f, 2.0f, 3.0f); + float32_t3 b = float32_t3(0.5f, 0.5f, 0.5f); + assert(glm::all(a > b)); + assert(glm::all(b < a)); + + b = float32_t3(0.5f, 2.0f, 0.5f); + assert(glm::all(a >= b)); + assert(glm::all(b <= a));*/ + } + + // TODO[Przemek]: tests function output + float32_t3 ZERO_VEC = float32_t3(0.0f, 0.0f, 0.0f); + float32_t3 ONE_VEC = float32_t3(1.0f, 1.0f, 1.0f); + + // test functions from EOTF.hlsl + //assert(areVectorsEqual(colorspace::eotf::identity(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::impl_shared_2_4(ZERO_VEC, 0.5f), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::sRGB(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::Display_P3(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::DCI_P3_XYZ(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::SMPTE_170M(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::SMPTE_ST2084(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::HDR10_HLG(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::AdobeRGB(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::eotf::Gamma_2_2(ZERO_VEC), ZERO_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::ACEScc(ZERO_VEC), ZERO_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::ACEScct(ZERO_VEC), ZERO_VEC)); + + //assert(areVectorsEqual(colorspace::eotf::identity(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::impl_shared_2_4(ONE_VEC, 0.5f), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::sRGB(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::Display_P3(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::DCI_P3_XYZ(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::SMPTE_170M(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::SMPTE_ST2084(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::HDR10_HLG(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::AdobeRGB(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::eotf::Gamma_2_2(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::ACEScc(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::eotf::ACEScct(ONE_VEC), ONE_VEC)); + + //// test functions from OETF.hlsl + //assert(areVectorsEqual(colorspace::oetf::identity(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::impl_shared_2_4(ZERO_VEC, 0.5f), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::sRGB(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::Display_P3(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::DCI_P3_XYZ(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::SMPTE_170M(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::SMPTE_ST2084(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::HDR10_HLG(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::AdobeRGB(ZERO_VEC), ZERO_VEC)); + //assert(areVectorsEqual(colorspace::oetf::Gamma_2_2(ZERO_VEC), ZERO_VEC)); + ////assert(areVectorsEqual(colorspace::oetf::ACEScc(ZERO_VEC), ZERO_VEC)); + ////assert(areVectorsEqual(colorspace::oetf::ACEScct(ZERO_VEC), ZERO_VEC)); + + //assert(areVectorsEqual(colorspace::oetf::identity(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::impl_shared_2_4(ONE_VEC, 0.5f), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::sRGB(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::Display_P3(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::oetf::DCI_P3_XYZ(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::SMPTE_170M(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::SMPTE_ST2084(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::HDR10_HLG(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::AdobeRGB(ONE_VEC), ONE_VEC)); + //assert(areVectorsEqual(colorspace::oetf::Gamma_2_2(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::oetf::ACEScc(ONE_VEC), ONE_VEC)); + ////assert(areVectorsEqual(colorspace::oetf::ACEScct(ONE_VEC), ONE_VEC)); + + // xoroshiro64 tests + //constexpr uint32_t2 state = uint32_t2(12u, 34u); + //Xoroshiro64Star xoroshiro64Star = Xoroshiro64Star::construct(state); + //xoroshiro64Star(); + //Xoroshiro64StarStar xoroshiro64StarStar = Xoroshiro64StarStar::construct(state); + //xoroshiro64StarStar(); + + auto zero = cross(x,x); + auto lenX2 = dot(x,x); + //auto z_inv = inverse(z); //busted return type conversion + auto mid = nbl::hlsl::mix(x,x,float32_t3(0.5f)); + //auto w = transpose(y); //also busted + + + // half test + { + + float16_t MIN = 6.103515e-05F; + float16_t MAX = 65504.0F; + float16_t DENORM_MIN = 5.96046448e-08F; + uint16_t QUIET_NAN = 0x7FFF; + uint16_t SIGNALING_NAN = 0x7DFF; + +// TODO: reenable after port to OpenEXR 3.0 +// TODO: This whole test is wrong, the uint constants should be reinterpret casted into `float16_t` not static-casted +#if 0 // disabling test, because Imath 2.4.0 doesn't provide constexpr limits, which makes the specialization of `nbl::hlsl::numeric_limits` impossible + if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::min, nbl::hlsl::numeric_limits::min())) + { + std::cout << "numeric_limits::min does not match\n"; + } + if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::max, nbl::hlsl::numeric_limits::max())) + { + std::cout << "numeric_limits::max does not match\n"; + } + if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::denorm_min, nbl::hlsl::numeric_limits::denorm_min())) + { + std::cout << "numeric_limits::denorm_min does not match\n"; + } + if(!equal(nbl::hlsl::impl::numeric_limits::quiet_NaN, nbl::hlsl::numeric_limits::quiet_NaN())) + { + std::cout << "numeric_limits::quiet_NaN does not match\n"; + } + if(!equal(nbl::hlsl::impl::numeric_limits::signaling_NaN, nbl::hlsl::numeric_limits::signaling_NaN())) + { + std::cout << "numeric_limits::signaling_NaN does not match\n"; + } +#endif + } + + auto test_type_limits = []() + { + using L = std::numeric_limits; + using R = nbl::hlsl::impl::numeric_limits; + + #define TEST_AND_LOG(var) \ + { \ + auto rhs = limits_var(R::var); \ + auto lhs = limits_var(L::var); \ + if(!equal(lhs, rhs)) \ + { \ + std::cout << typeid(T).name() << " " << #var << " does not match : " << double(lhs) << " - " << double(rhs) << "\n"; \ + } \ + } + + TEST_AND_LOG(is_specialized); + TEST_AND_LOG(is_signed); + TEST_AND_LOG(is_integer); + TEST_AND_LOG(is_exact); + TEST_AND_LOG(has_infinity); + TEST_AND_LOG(has_quiet_NaN); + TEST_AND_LOG(has_signaling_NaN); + TEST_AND_LOG(has_denorm); + TEST_AND_LOG(has_denorm_loss); + TEST_AND_LOG(round_style); + TEST_AND_LOG(is_iec559); + TEST_AND_LOG(is_bounded); + TEST_AND_LOG(is_modulo); + TEST_AND_LOG(digits); + TEST_AND_LOG(digits10); + TEST_AND_LOG(max_digits10); + TEST_AND_LOG(radix); + TEST_AND_LOG(min_exponent); + TEST_AND_LOG(min_exponent10); + TEST_AND_LOG(max_exponent); + TEST_AND_LOG(max_exponent10); + TEST_AND_LOG(traps); + TEST_AND_LOG(tinyness_before); + TEST_AND_LOG(min); + TEST_AND_LOG(max); + TEST_AND_LOG(lowest); + TEST_AND_LOG(epsilon); + TEST_AND_LOG(round_error); + TEST_AND_LOG(infinity); + TEST_AND_LOG(quiet_NaN); + TEST_AND_LOG(signaling_NaN); + TEST_AND_LOG(denorm_min); + #undef TEST_AND_LOG + }; + + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + test_type_limits.template operator()(); + + // countl_zero test + mpl::countl_zero::value; + // TODO: fix warning about nodiscard + std::countl_zero(5u); + nbl::hlsl::countl_zero(5u); + + // bit.hlsl test + /*nbl::hlsl::rotl(1u, 1u); + nbl::hlsl::rotr(1u, 1u);*/ + + + // cmath + + +#define PASS_VARS1 x0 +#define PASS_VARS2 x0,x1 +#define PASS_VARS3 x0,x1,x2 +#define PASS_VARS(count) PASS_VARS##count + + +#define ASSERT_EQ(fn) \ + if (!almost_equal(lhs, rhs)) \ + std::cout << #fn << " does not match " << lhs << " vs " << rhs << '\n'; + +#define INIT_VARS(T) \ + T x0 = random(T(-10000), T(10000)); \ + T x1 = random(T(1), T(1000)); \ + T x2 = random(T(1), T(1000)); \ + +#define TEST_CMATH(fn, varcount, T) \ + { INIT_VARS(T)\ + auto lhs = nbl::hlsl::fn(PASS_VARS(varcount)); \ + auto rhs = std::fn(PASS_VARS(varcount)); \ + ASSERT_EQ(fn); \ + } + +#define TEST_CMATHT(fn, out_type, varcount, T) \ + { INIT_VARS(T) \ + out_type o0, o1; \ + auto lhs = nbl::hlsl::fn(PASS_VARS(varcount),o0); \ + auto rhs = std::fn(PASS_VARS(varcount),&o1); \ + ASSERT_EQ(fn); \ + assert(almost_equal(o0,o1)); \ + } + +#ifndef DISABLE_TGMATH_TESTS +#define TEST_CMATH_FOR_TYPE(type) \ + TEST_CMATH(cos, 1, type) \ + TEST_CMATH(sin, 1, type) \ + TEST_CMATH(tan, 1, type) \ + TEST_CMATH(acos, 1, type) \ + TEST_CMATH(asin, 1, type) \ + TEST_CMATH(atan, 1, type) \ + TEST_CMATH(atan2, 2, type) \ + TEST_CMATH(cosh, 1, type) \ + TEST_CMATH(sinh, 1, type) \ + TEST_CMATH(tanh, 1, type) \ + TEST_CMATH(acosh, 1, type) \ + TEST_CMATH(asinh, 1, type) \ + TEST_CMATH(atanh, 1, type) \ + TEST_CMATH(exp, 1, type) \ + TEST_CMATHT(frexp, int, 1, type) \ + TEST_CMATH(ldexp, 2, type) \ + TEST_CMATH(log,1,type) \ + TEST_CMATH(log10,1,type) \ + TEST_CMATHT(modf, type, 1, type) \ + TEST_CMATH(exp2, 1, type) \ + TEST_CMATH(log2, 1, type) \ + TEST_CMATH(logb, 1, type) \ + TEST_CMATH(expm1, 1, type) \ + TEST_CMATH(log1p, 1, type) \ + TEST_CMATH(ilogb, 1, type) \ + TEST_CMATH(scalbn, 2, type) \ + TEST_CMATH(pow, 2, type) \ + TEST_CMATH(sqrt, 1, type) \ + TEST_CMATH(cbrt, 1, type) \ + TEST_CMATH(hypot, 2, type) \ + TEST_CMATH(copysign, 2, type) \ + TEST_CMATH(erf, 1, type) \ + TEST_CMATH(erfc, 1, type) \ + TEST_CMATH(tgamma, 1, type) \ + TEST_CMATH(lgamma, 1, type) \ + TEST_CMATH(ceil, 1, type) \ + TEST_CMATH(floor, 1, type) \ + TEST_CMATH(fmod, 2, type) \ + TEST_CMATH(trunc, 1, type) \ + TEST_CMATH(round, 1, type) \ + TEST_CMATH(rint, 1, type) \ + TEST_CMATH(nearbyint, 1, type) \ + TEST_CMATHT(remquo, int, 2, type) \ + TEST_CMATH(remainder, 2, type) \ + TEST_CMATH(abs, 1, type) \ + TEST_CMATH(fabs, 1, type) \ + TEST_CMATH(fma, 3, type) \ + TEST_CMATH(fmax, 2, type) \ + TEST_CMATH(fmin, 2, type) \ + TEST_CMATH(fdim, 2, type) \ + + + //TEST_CMATH_FOR_TYPE(float32_t) + //TEST_CMATH_FOR_TYPE(float64_t) +#endif + std::cout << "cpu tests done\n"; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b82f37f..98e5154de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) add_subdirectory(54_Transformations EXCLUDE_FROM_ALL) add_subdirectory(55_RGB18E7S3 EXCLUDE_FROM_ALL) + add_subdirectory(59_QuaternionTests) add_subdirectory(61_UI) add_subdirectory(62_CAD EXCLUDE_FROM_ALL) # TODO: Erfan, Przemek, Francisco and co. need to resurrect this add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) From efedbf600b78d70ac7ce5789800908c0af964f99 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 6 Jan 2026 16:56:14 +0700 Subject: [PATCH 143/219] add lerp test --- 59_QuaternionTests/CQuaternionTester.h | 23 +++++++++++++------- 59_QuaternionTests/app_resources/common.hlsl | 3 +++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index cb6fecb2e..c6a4707c6 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -33,19 +33,20 @@ class CQuaternionTester final : public ITester::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); - testInput.quat0 = hlsl::normalize(testInput.quat0); testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); - testInput.quat1 = hlsl::normalize(testInput.quat1); testInput.pitch = realDistribution(getRandomEngine()); testInput.yaw = realDistribution(getRandomEngine()); testInput.roll = realDistribution(getRandomEngine()); + testInput.factor = realDistribution(getRandomEngine()); return testInput; } QuaternionTestValues determineExpectedResults(const QuaternionInputTestValues& testInput) override { - // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values + const auto glmquat0 = glm::quat(testInput.quat0.data.w, testInput.quat0.data.x, testInput.quat0.data.y, testInput.quat0.data.z); + const auto glmquat1 = glm::quat(testInput.quat1.data.w, testInput.quat1.data.x, testInput.quat1.data.y, testInput.quat1.data.z); + QuaternionTestValues expected; { const auto glmquat = glm::angleAxis(testInput.angle, testInput.axis); @@ -63,21 +64,25 @@ class CQuaternionTester final : public ITester quatFromEulerAngles; float32_t3x3 rotationMat; math::quaternion quatMult; + math::quaternion quatLerp; }; struct QuaternionTestExecutor @@ -68,6 +70,7 @@ struct QuaternionTestExecutor output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); output.rotationMat = input.quat0.constructMatrix(); output.quatMult = input.quat0 * input.quat1; + output.quatLerp = math::quaternion::unnormLerp(input.quat0, input.quat1, input.factor); } }; From 15e4d5d044d0b682279fcce5486a841e1f3d3541 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 7 Jan 2026 00:20:02 +0300 Subject: [PATCH 144/219] added benchmark code for sampling, visualization of rays in 3D view, added NSC compile rules (for benchmark only for now) --- 73_SolidAngleVisualizer/CMakeLists.txt | 78 ++- .../app_resources/hlsl/Drawing.hlsl | 327 ++++++----- .../app_resources/hlsl/RayVis.frag.hlsl | 221 ++++++++ .../app_resources/hlsl/Sampling.hlsl | 193 +++++-- .../hlsl/SolidAngleVis.frag.hlsl | 465 +++------------- .../hlsl/benchmark/benchmark.comp.hlsl | 45 ++ .../app_resources/hlsl/benchmark/common.hlsl | 23 + .../app_resources/hlsl/common.hlsl | 29 +- .../app_resources/hlsl/gpu_common.hlsl | 168 ++++++ .../app_resources/hlsl/silhouette.hlsl | 164 ++++++ .../app_resources/hlsl/utils.hlsl | 19 + 73_SolidAngleVisualizer/main.cpp | 527 +++++++++++++++--- 12 files changed, 1603 insertions(+), 656 deletions(-) create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl diff --git a/73_SolidAngleVisualizer/CMakeLists.txt b/73_SolidAngleVisualizer/CMakeLists.txt index 5d0021f61..f1701829f 100644 --- a/73_SolidAngleVisualizer/CMakeLists.txt +++ b/73_SolidAngleVisualizer/CMakeLists.txt @@ -7,14 +7,88 @@ if(NBL_BUILD_IMGUI) "${CMAKE_CURRENT_SOURCE_DIR}/include" ) - list(APPEND NBL_LIBRARIES + list(APPEND NBL_LIBRARIES imtestengine imguizmo "${NBL_EXT_IMGUI_UI_LIB}" ) - + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + set(DEPENDS + app_resources/hlsl/common.hlsl + app_resources/hlsl/gpu_common.hlsl + app_resources/hlsl/Drawing.hlsl + app_resources/hlsl/Sampling.hlsl + app_resources/hlsl/Sampling.hlsl + app_resources/hlsl/silhouette.hlsl + app_resources/hlsl/utils.hlsl + + # app_resources/hlsl/test.comp.hlsl + app_resources/hlsl/benchmark/benchmark.comp.hlsl + app_resources/hlsl/benchmark/common.hlsl + ) + target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) + set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + + set(SM 6_8) + set(JSON [=[ + [ + + { + "INPUT": "app_resources/hlsl/benchmark/benchmark.comp.hlsl", + "KEY": "benchmark", + }, + ] + ]=]) + string(CONFIGURE "${JSON}" JSON) + + set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} + ) + + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + ) + + NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + ) endif() \ No newline at end of file diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index 89dfd4ae6..1a2962c78 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -1,72 +1,86 @@ #ifndef _DEBUG_HLSL_ #define _DEBUG_HLSL_ + #include "common.hlsl" +#include "gpu_common.hlsl" -float2 sphereToCircle(float3 spherePoint) +#if DEBUG_DATA +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float32_t3 faceCenter, float32_t3 faceNormal) +{ + float32_t3 viewVec = normalize(-faceCenter); // Vector from camera to face + return dot(faceNormal, viewVec) > 0.0f; +} +#endif // DEBUG_DATA + +#if VISUALIZE_SAMPLES + +// doesn't change Z coordinate +float32_t3 sphereToCircle(float32_t3 spherePoint) { if (spherePoint.z >= 0.0f) { - return spherePoint.xy * CIRCLE_RADIUS; + return float32_t3(spherePoint.xy * CIRCLE_RADIUS, spherePoint.z); } else { - float r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); - float uv2Plus1 = r2 + 1.0f; - return (spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS; + float32_t r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float32_t uv2Plus1 = r2 + 1.0f; + return float32_t3((spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS, spherePoint.z); } } -float drawGreatCircleArc(float3 fragPos, float3 points[2], float aaWidth, float width = 0.01f) +float32_t drawGreatCircleArc(float32_t3 fragPos, float32_t3 points[2], float32_t aaWidth, float32_t width = 0.01f) { - float3 v0 = normalize(points[0]); - float3 v1 = normalize(points[1]); - float3 p = normalize(fragPos); + float32_t3 v0 = normalize(points[0]); + float32_t3 v1 = normalize(points[1]); + float32_t3 ndc = normalize(fragPos); - float3 arcNormal = normalize(cross(v0, v1)); - float dist = abs(dot(p, arcNormal)); + float32_t3 arcNormal = normalize(cross(v0, v1)); + float32_t dist = abs(dot(ndc, arcNormal)); - float dotMid = dot(v0, v1); - bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); + float32_t dotMid = dot(v0, v1); + bool onArc = (dot(ndc, v0) >= dotMid) && (dot(ndc, v1) >= dotMid); if (!onArc) return 0.0f; - float avgDepth = (length(points[0]) + length(points[1])) * 0.5f; - float depthScale = 3.0f / avgDepth; + float32_t avgDepth = (length(points[0]) + length(points[1])) * 0.5f; + float32_t depthScale = 3.0f / avgDepth; width = min(width * depthScale, 0.02f); - float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); return alpha; } -float drawCross2D(float2 fragPos, float2 center, float size, float thickness) +float32_t drawCross2D(float32_t2 fragPos, float32_t2 center, float32_t size, float32_t thickness) { - float2 p = abs(fragPos - center); + float32_t2 ndc = abs(fragPos - center); // Check if point is inside the cross (horizontal or vertical bar) - bool inHorizontal = (p.x <= size && p.y <= thickness); - bool inVertical = (p.y <= size && p.x <= thickness); + bool inHorizontal = (ndc.x <= size && ndc.y <= thickness); + bool inVertical = (ndc.y <= size && ndc.x <= thickness); return (inHorizontal || inVertical) ? 1.0f : 0.0f; } -float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) +float32_t4 drawHiddenEdges(float32_t3x4 modelMatrix, float32_t3 spherePos, uint32_t silEdgeMask, float32_t aaWidth) { - float4 color = 0; - float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); + float32_t4 color = 0; + float32_t3 hiddenEdgeColor = float32_t3(0.1, 0.1, 0.1); NBL_UNROLL - for (int32_t i = 0; i < 12; i++) + for (uint32_t i = 0; i < 12; i++) { // skip silhouette edges if (silEdgeMask & (1u << i)) continue; - int2 edge = allEdges[i]; + uint32_t2 edge = allEdges[i]; - float3 v0 = normalize(getVertex(edge.x)); - float3 v1 = normalize(getVertex(edge.y)); + float32_t3 v0 = normalize(getVertex(modelMatrix, edge.x)); + float32_t3 v1 = normalize(getVertex(modelMatrix, edge.y)); bool neg0 = v0.z < 0.0f; bool neg1 = v1.z < 0.0f; @@ -75,107 +89,163 @@ float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) if (neg0 && neg1) continue; - float3 p0 = v0; - float3 p1 = v1; + float32_t3 p0 = v0; + float32_t3 p1 = v1; // clip if needed if (neg0 ^ neg1) { - float t = v0.z / (v0.z - v1.z); - float3 clip = normalize(lerp(v0, v1, t)); + float32_t t = v0.z / (v0.z - v1.z); + float32_t3 clip = normalize(lerp(v0, v1, t)); p0 = neg0 ? clip : v0; p1 = neg1 ? clip : v1; } - float3 pts[2] = {p0, p1}; - float4 c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.005f); - color += float4(hiddenEdgeColor * c.a, c.a); + float32_t3 pts[2] = {p0, p1}; + float32_t4 c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.005f); + color += float32_t4(hiddenEdgeColor * c.a, c.a); } return color; } -float4 drawCorners(float2 p, float aaWidth) +float32_t4 drawCorner(float32_t3 cornerNDCPos, float32_t2 ndc, float32_t aaWidth, float32_t dotSize, float32_t innerDotSize, float32_t3 dotColor) { - float4 color = 0; - - float dotSize = 0.02f; - float innerDotSize = dotSize * 0.5f; - - for (int32_t i = 0; i < 8; i++) - { - float3 corner3D = normalize(getVertex(i)); - float2 cornerPos = sphereToCircle(corner3D); + float32_t4 color = float32_t4(0, 0, 0, 0); + float32_t dist = length(ndc - cornerNDCPos.xy); - float dist = length(p - cornerPos); - - // outer dot - float outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, + // outer dot + float32_t outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); - if (outerAlpha <= 0.0f) - continue; + if (outerAlpha <= 0.0f) + return color; - float3 dotColor = colorLUT[i]; - color += float4(dotColor * outerAlpha, outerAlpha); + color += float32_t4(dotColor * outerAlpha, outerAlpha); - // ------------------------------------------------- - // inner black dot for hidden corners - // ------------------------------------------------- - if (corner3D.z < 0.0f) - { - float innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, + // ------------------------------------------------- + // inner black dot for hidden corners + // ------------------------------------------------- + if (cornerNDCPos.z < 0.0f) + { + float32_t innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, innerDotSize + aaWidth, dist); - // ensure it stays inside the outer dot - innerAlpha *= outerAlpha; + // ensure it stays inside the outer dot + innerAlpha *= outerAlpha; - float3 innerColor = float3(0.0, 0.0, 0.0); - color -= float4(innerAlpha.xxx, 0.0f); - } + color -= float32_t4(innerAlpha.xxx, 0.0f); } return color; } -float4 drawClippedSilhouetteVertices(float2 p, ClippedSilhouette silhouette, float aaWidth) +// Draw a line segment in NDC space +float32_t lineSegment(float32_t2 ndc, float32_t2 a, float32_t2 b, float32_t thickness) { - float4 color = 0; - float dotSize = 0.03f; + float32_t2 pa = ndc - a; + float32_t2 ba = b - a; + float32_t h = saturate(dot(pa, ba) / dot(ba, ba)); + float32_t dist = length(pa - ba * h); + return smoothstep(thickness, thickness * 0.5, dist); +} - for (uint i = 0; i < silhouette.count; i++) +// Draw an arrow head (triangle) in NDC space +float32_t arrowHead(float32_t2 ndc, float32_t2 tip, float32_t2 direction, float32_t size) +{ + // Create perpendicular vector + float32_t2 perp = float32_t2(-direction.y, direction.x); + + // Three points of the arrow head triangle + float32_t2 p1 = tip; + float32_t2 p2 = tip - direction * size + perp * size * 0.5; + float32_t2 p3 = tip - direction * size - perp * size * 0.5; + + // Check if point is inside triangle using barycentric coordinates + float32_t2 v0 = p3 - p1; + float32_t2 v1 = p2 - p1; + float32_t2 v2 = ndc - p1; + + float32_t dot00 = dot(v0, v0); + float32_t dot01 = dot(v0, v1); + float32_t dot02 = dot(v0, v2); + float32_t dot11 = dot(v1, v1); + float32_t dot12 = dot(v1, v2); + + float32_t invDenom = 1.0 / (dot00 * dot11 - dot01 * dot01); + float32_t u = (dot11 * dot02 - dot01 * dot12) * invDenom; + float32_t v = (dot00 * dot12 - dot01 * dot02) * invDenom; + + bool inside = (u >= 0.0) && (v >= 0.0) && (u + v <= 1.0); + + // Add some antialiasing + float32_t minDist = min(min( + length(ndc - p1), + length(ndc - p2)), + length(ndc - p3)); + + return inside ? 1.0 : smoothstep(0.02, 0.0, minDist); +} + +// Helper to draw an edge with proper color mapping +float32_t4 drawEdge(uint32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float32_t aaWidth, float32_t width = 0.01f) +{ + float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); +}; + +float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWidth) +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + + float32_t dotSize = 0.02f; + float32_t innerDotSize = dotSize * 0.5f; + + for (uint32_t i = 0; i < 8; i++) { - float3 corner3D = normalize(silhouette.vertices[i]); - float2 cornerPos = sphereToCircle(corner3D); + float32_t3 cornerCirclePos = sphereToCircle(normalize(getVertex(modelMatrix, i))); + color += drawCorner(cornerCirclePos, ndc, aaWidth, dotSize, innerDotSize, colorLUT[i]); + } - float dist = length(p - cornerPos); + return color; +} + +float32_t4 drawClippedSilhouetteVertices(float32_t2 ndc, ClippedSilhouette silhouette, float32_t aaWidth) +{ + float32_t4 color = 0; + float32_t dotSize = 0.03f; + + for (uint i = 0; i < silhouette.count; i++) + { + float32_t3 cornerCirclePos = sphereToCircle(normalize(silhouette.vertices[i])); + float32_t dist = length(ndc - cornerCirclePos.xy); // Smooth circle for the vertex - float alpha = 1.0f - smoothstep(dotSize * 0.8f, dotSize, dist); + float32_t alpha = 1.0f - smoothstep(dotSize * 0.8f, dotSize, dist); if (alpha > 0.0f) { // Color gradient: Red (index 0) to Cyan (last index) // This helps verify the CCW winding order visually - float t = float(i) / float(max(1u, silhouette.count - 1)); - float3 vertexColor = lerp(float3(1, 0, 0), float3(0, 1, 1), t); + float32_t t = float32_t(i) / float32_t(max(1u, silhouette.count - 1)); + float32_t3 vertexColor = lerp(float32_t3(1, 0, 0), float32_t3(0, 1, 1), t); - color += float4(vertexColor * alpha, alpha); + color += float32_t4(vertexColor * alpha, alpha); } } return color; } -float4 drawRing(float2 p, float aaWidth) +float32_t4 drawRing(float32_t2 ndc, float32_t aaWidth) { - float positionLength = length(p); - float ringWidth = 0.003f; - float ringDistance = abs(positionLength - CIRCLE_RADIUS); - float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - return ringAlpha * float4(1, 1, 1, 1); + float32_t positionLength = length(ndc); + float32_t ringWidth = 0.003f; + float32_t ringDistance = abs(positionLength - CIRCLE_RADIUS); + float32_t ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + return ringAlpha * float32_t4(1, 1, 1, 1); } // Returns the number of visible faces and populates the faceIndices array @@ -204,78 +274,72 @@ uint getVisibleFaces(int3 region, out uint faceIndices[3]) return count; } -float4 drawVisibleFaceOverlay(float3 spherePos, int3 region, float aaWidth) +float32_t4 drawVisibleFaceOverlay(float32_t3x4 modelMatrix, float32_t3 spherePos, int3 region, float32_t aaWidth) { uint faceIndices[3]; uint count = getVisibleFaces(region, faceIndices); - float4 color = 0; + + float32_t4 color = 0; for (uint i = 0; i < count; i++) { uint fIdx = faceIndices[i]; - float3 n = localNormals[fIdx]; + float32_t3 n = localNormals[fIdx]; // Transform normal to world space (using the same logic as your corners) - float3 worldNormal = -normalize(mul((float3x3)pc.modelMatrix, n)); + float32_t3 worldNormal = -normalize(mul((float3x3)modelMatrix, n)); worldNormal.z = -worldNormal.z; // Invert Z for correct orientation // Very basic visualization: highlight if the sphere position // is generally pointing towards that face's normal - float alignment = dot(spherePos, worldNormal); + float32_t alignment = dot(spherePos, worldNormal); if (alignment > 0.95f) { // Use different colors for different face indices - color += float4(colorLUT[fIdx % 24], 0.5f); + color += float32_t4(colorLUT[fIdx % 24], 0.5f); } } return color; } -// Check if a face on the hemisphere is visible from camera at origin -bool isFaceVisible(float3 faceCenter, float3 faceNormal) +float32_t4 drawFaces(float32_t3x4 modelMatrix, float32_t3 spherePos, float32_t aaWidth) { - float3 viewVec = normalize(-faceCenter); // Vector from camera to face - return dot(faceNormal, viewVec) > 0.0f; -} + float32_t4 color = 0.0f; + float32_t3 ndc = normalize(spherePos); -float4 drawFaces(float3 spherePos, float aaWidth) -{ - float4 color = 0.0f; - float3 p = normalize(spherePos); - - float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3x3 rotMatrix = (float3x3)modelMatrix; // Check each of the 6 faces - for (int32_t faceIdx = 0; faceIdx < 6; faceIdx++) + for (uint32_t faceIdx = 0; faceIdx < 6; faceIdx++) { - float3 n_world = mul(rotMatrix, localNormals[faceIdx]); + float32_t3 n_world = mul(rotMatrix, localNormals[faceIdx]); // Check if face is visible if (!isFaceVisible(faceCenters[faceIdx], n_world)) continue; // Get the 4 corners of this face - float3 faceVerts[4]; - for (int32_t i = 0; i < 4; i++) + float32_t3 faceVerts[4]; + for (uint32_t i = 0; i < 4; i++) { - int32_t cornerIdx = faceToCorners[faceIdx][i]; - faceVerts[i] = normalize(getVertex(cornerIdx)); + uint32_t cornerIdx = faceToCorners[faceIdx][i]; + faceVerts[i] = normalize(getVertex(modelMatrix, cornerIdx)); } // Compute face center for winding - float3 faceCenter = float3(0, 0, 0); - for (int32_t i = 0; i < 4; i++) + float32_t3 faceCenter = float32_t3(0, 0, 0); + for (uint32_t i = 0; i < 4; i++) faceCenter += faceVerts[i]; faceCenter = normalize(faceCenter); // Check if point is inside this face bool isInside = true; - float minDist = 1e10; + float32_t minDist = 1e10; - for (int32_t i = 0; i < 4; i++) + for (uint32_t i = 0; i < 4; i++) { - float3 v0 = faceVerts[i]; - float3 v1 = faceVerts[(i + 1) % 4]; + float32_t3 v0 = faceVerts[i]; + float32_t3 v1 = faceVerts[(i + 1) % 4]; // Skip edges behind camera if (v0.z < 0.0f && v1.z < 0.0f) @@ -285,13 +349,13 @@ float4 drawFaces(float3 spherePos, float aaWidth) } // Great circle normal - float3 edgeNormal = normalize(cross(v0, v1)); + float32_t3 edgeNormal = normalize(cross(v0, v1)); // Ensure normal points inward if (dot(edgeNormal, faceCenter) < 0.0f) edgeNormal = -edgeNormal; - float d = dot(p, edgeNormal); + float32_t d = dot(ndc, edgeNormal); if (d < -1e-6f) { @@ -304,25 +368,29 @@ float4 drawFaces(float3 spherePos, float aaWidth) if (isInside) { - float alpha = smoothstep(0.0f, aaWidth * 2.0f, minDist); + float32_t alpha = smoothstep(0.0f, aaWidth * 2.0f, minDist); // Use colorLUT based on face index (0-5) - float3 faceColor = colorLUT[faceIdx]; + float32_t3 faceColor = colorLUT[faceIdx]; - float shading = saturate(p.z * 0.8f + 0.2f); - color += float4(faceColor * shading * alpha, alpha); + float32_t shading = saturate(ndc.z * 0.8f + 0.2f); + color += float32_t4(faceColor * shading * alpha, alpha); } } return color; } -int32_t getEdgeVisibility(int32_t edgeIdx) +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + +uint32_t getEdgeVisibility(float32_t3x4 modelMatrix, uint32_t edgeIdx) { // Adjacency of edges to faces // Corrected Adjacency of edges to faces - static const int2 edgeToFaces[12] = { + static const uint32_t2 edgeToFaces[12] = { // Edge Index: | allEdges[i] | Shared Faces: /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) @@ -341,12 +409,12 @@ int32_t getEdgeVisibility(int32_t edgeIdx) /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) }; - int2 faces = edgeToFaces[edgeIdx]; + uint32_t2 faces = edgeToFaces[edgeIdx]; // Transform normals to world space - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); - float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + float3x3 rotMatrix = (float3x3)modelMatrix; + float32_t3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float32_t3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); @@ -363,15 +431,14 @@ int32_t getEdgeVisibility(int32_t edgeIdx) return 0; } -#if DEBUG_DATA -uint32_t computeGroundTruthEdgeMask() +uint32_t computeGroundTruthEdgeMask(float32_t3x4 modelMatrix) { uint32_t mask = 0u; NBL_UNROLL - for (int32_t j = 0; j < 12; j++) + for (uint32_t j = 0; j < 12; j++) { // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry - if (getEdgeVisibility(j) == 1) + if (getEdgeVisibility(modelMatrix, j) == 1) { mask |= (1u << j); } @@ -379,12 +446,12 @@ uint32_t computeGroundTruthEdgeMask() return mask; } -void validateEdgeVisibility(uint32_t sil, int32_t vertexCount, uint32_t generatedSilMask) +void validateEdgeVisibility(float32_t3x4 modelMatrix, uint32_t sil, uint32_t vertexCount, uint32_t generatedSilMask) { uint32_t mismatchAccumulator = 0; // The Ground Truth now represents the full 3D silhouette, clipped or not. - uint32_t groundTruthMask = computeGroundTruthEdgeMask(); + uint32_t groundTruthMask = computeGroundTruthEdgeMask(modelMatrix); // The comparison checks if the generated mask perfectly matches the full 3D ground truth. uint32_t mismatchMask = groundTruthMask ^ generatedSilMask; @@ -392,11 +459,11 @@ void validateEdgeVisibility(uint32_t sil, int32_t vertexCount, uint32_t generate if (mismatchMask != 0) { NBL_UNROLL - for (int32_t j = 0; j < 12; j++) + for (uint32_t j = 0; j < 12; j++) { if ((mismatchMask >> j) & 1u) { - int2 edge = allEdges[j]; + uint32_t2 edge = allEdges[j]; // Accumulate vertex indices where error occurred mismatchAccumulator |= (1u << edge.x) | (1u << edge.y); } @@ -406,6 +473,6 @@ void validateEdgeVisibility(uint32_t sil, int32_t vertexCount, uint32_t generate // Simple Write (assuming all fragments calculate the same result) InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); } -#endif +#endif // DEBUG_DATA #endif // _DEBUG_HLSL_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl new file mode 100644 index 000000000..2b4d7e3ef --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl @@ -0,0 +1,221 @@ +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include +#include "utils.hlsl" + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +[[vk::push_constant]] struct PushConstantRayVis pc; +[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; +#define VISUALIZE_SAMPLES 1 +#include "Drawing.hlsl" + +// Ray-AABB intersection in world space +// Returns the distance to the nearest intersection point, or -1 if no hit +float32_t rayAABBIntersection(float32_t3 rayOrigin, float32_t3 rayDir, float32_t3 aabbMin, float32_t3 aabbMax) +{ + float32_t3 invDir = 1.0 / rayDir; + float32_t3 t0 = (aabbMin - rayOrigin) * invDir; + float32_t3 t1 = (aabbMax - rayOrigin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + float32_t tNear = max(max(tmin.x, tmin.y), tmin.z); + float32_t tFar = min(min(tmax.x, tmax.y), tmax.z); + + // Check if ray intersects AABB + if (tNear > tFar || tFar < 0.0) + return -1.0; + + // Return the nearest positive intersection + return tNear >= 0.0 ? tNear : tFar; +} + +// Project 3D point to NDC space +float32_t2 projectToNDC(float32_t3 worldPos, float32_t4x4 viewProj, float32_t aspect) +{ + float32_t4 clipPos = mul(viewProj, float32_t4(worldPos, 1.0)); + clipPos /= clipPos.w; + + // Apply aspect ratio correction + clipPos.x *= aspect; + + return clipPos.xy; +} + +// Visualizes a ray as an arrow from origin in NDC space +// Returns color (rgb), intensity (a), and depth (in extra component) +struct ArrowResult +{ + float32_t4 color : SV_Target0; + float32_t depth : SV_Depth; +}; + +ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf, float32_t arrowLength, float32_t2 ndcPos, float32_t aspect) +{ + ArrowResult result; + result.color = float32_t4(0, 0, 0, 0); + result.depth = 0.0; + + float32_t3 rayDir = normalize(directionAndPdf.xyz); + float32_t pdf = directionAndPdf.w; + + float32_t3 rayEnd = rayOrigin + rayDir * arrowLength; + + // Project start and end points to NDC space + float32_t2 ndcStart = projectToNDC(rayOrigin, pc.viewProjMatrix, aspect); + float32_t2 ndcEnd = projectToNDC(rayEnd, pc.viewProjMatrix, aspect); + + // Get clip space positions + float32_t4 clipStart = mul(pc.viewProjMatrix, float32_t4(rayOrigin, 1.0)); + float32_t4 clipEnd = mul(pc.viewProjMatrix, float32_t4(rayEnd, 1.0)); + + // Calculate arrow properties in NDC space + float32_t arrowNDCLength = length(ndcEnd - ndcStart); + + // Skip if arrow is too small on screen (in NDC units) + if (arrowNDCLength < 0.01) + return result; + + // Calculate the parametric position along the arrow shaft IN NDC + float32_t2 pa = ndcPos - ndcStart; + float32_t2 ba = ndcEnd - ndcStart; + float32_t t_ndc = saturate(dot(pa, ba) / dot(ba, ba)); + + // Draw line shaft + float32_t lineThickness = 0.002; + float32_t lineIntensity = lineSegment(ndcPos, ndcStart, ndcEnd, lineThickness); + + // Calculate depth at this pixel's position along the arrow + if (lineIntensity > 0.0) + { + // Interpolate in CLIP space for perspective-correct depth + float32_t4 clipPos = lerp(clipStart, clipEnd, t_ndc); + float32_t depthNDC = clipPos.z / clipPos.w; + + // Convert to reversed depth [0,1] -> [1,0] + result.depth = 1.0 - depthNDC; + + // Clip against depth range (like hardware would) + // In reversed depth: near=1.0, far=0.0 + if (result.depth < 0.0 || result.depth > 1.0) + { + lineIntensity = 0.0; // Outside depth range, clip it + } + } + + // Modulate by PDF + float32_t pdfIntensity = saturate(pdf * 0.5); + + float32_t3 finalColor = pdfIntensity; + + result.color = float32_t4(finalColor, lineIntensity); + return result; +} + +// Transform a point by inverse of model matrix (world to local space) +float32_t3 worldToLocal(float32_t3 worldPos, float32_t3x4 modelMatrix) +{ + // Manually construct 4x4 from 3x4 + float32_t4x4 model4x4 = float32_t4x4( + modelMatrix[0], + modelMatrix[1], + modelMatrix[2], + float32_t4(0.0, 0.0, 0.0, 1.0)); + float32_t4x4 invModel = inverse(model4x4); + return mul(invModel, float32_t4(worldPos, 1.0)).xyz; +} + +// Transform a direction by inverse of model matrix (no translation) +float32_t3 worldToLocalDir(float32_t3 worldDir, float32_t3x4 modelMatrix) +{ + // Manually construct 4x4 from 3x4 + float32_t4x4 model4x4 = float32_t4x4( + modelMatrix[0], + modelMatrix[1], + modelMatrix[2], + float32_t4(0.0, 0.0, 0.0, 1.0)); + float32_t4x4 invModel = inverse(model4x4); + return mul(invModel, float32_t4(worldDir, 0.0)).xyz; +} +[[vk::location(0)]] ArrowResult main(SVertexAttributes vx) +{ + ArrowResult output; + output.color = float32_t4(0.0, 0.0, 0.0, 0.0); + output.depth = 0.0; // Default to far plane in reversed depth + float32_t maxDepth = 0.0; // Track the closest depth (maximum in reversed depth) + + // Convert to NDC space with aspect ratio correction + float32_t2 ndcPos = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndcPos.x *= aspect; + + // Draw clipped silhouett vertices using drawCorners() + for (uint32_t v = 0; v < DebugDataBuffer[0].clippedSilhouetteVertexCount; v++) + { + float32_t4 clipPos = mul(pc.viewProjMatrix, float32_t4(DebugDataBuffer[0].clippedSilhouetteVertices[v], 1.0)); + float32_t3 ndcPosVertex = clipPos.xyz / clipPos.w; // Perspective divide to get NDC + + float32_t4 intensity = drawCorner(ndcPosVertex, ndcPos, 0.005, 0.01, 0.01, float32_t3(1.0, 0.0, 0.0)); + + output.color += intensity; + output.depth = intensity > 0.0 ? 1.0 : output.depth; // Update depth + maxDepth = max(maxDepth, output.depth); + } + + int sampleCount = DebugDataBuffer[0].sampleCount; + + for (int i = 0; i < sampleCount; i++) + { + float32_t3 rayOrigin = float32_t3(0, 0, 0); + float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; + float32_t3 rayDir = normalize(directionAndPdf.xyz); + + // Define cube bounds in local space (unit cube from -0.5 to 0.5, adjust as needed) + float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); + float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + + // Transform ray to local space of the cube + float32_t3 localRayOrigin = worldToLocal(rayOrigin, pc.modelMatrix); + float32_t3 localRayDir = normalize(worldToLocalDir(rayDir, pc.modelMatrix)); + + // Perform intersection test in local space + float32_t hitDistance = rayAABBIntersection(localRayOrigin, localRayDir, cubeLocalMin, cubeLocalMax); + + float32_t arrowLength; + if (hitDistance > 0.0) + { + // Calculate world space hit distance + // We need to account for the scaling in the model matrix + float32_t3 localHitPoint = localRayOrigin + localRayDir * hitDistance; + float32_t3 worldHitPoint = mul(pc.modelMatrix, float32_t4(localHitPoint, 1.0)).xyz; + arrowLength = length(worldHitPoint - rayOrigin); + } + else + { + // No intersection, use fallback (e.g., fixed length or distance to cube center) + float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; + arrowLength = length(cubeCenter - rayOrigin) + 2.0; + } + + ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); + maxDepth = max(maxDepth, arrow.depth); + + // Additive blending + output.color.rgb += hitDistance > 0.0 ? arrow.color.rgb : float32_t3(1.0, 0.0, 0.0); + output.color.a = max(output.color.a, arrow.color.a); + } + + // Clamp to prevent overflow + output.color = saturate(output.color); + output.color.a = 1.0; + + // Write the closest depth (maximum in reversed depth) + // ONLY write depth if we actually drew something + output.depth = output.color.a > 0.0 ? maxDepth : 0.0; + + return output; +} \ No newline at end of file diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl index d213d8b94..9caf83246 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl @@ -2,8 +2,10 @@ #define _SAMPLING_HLSL_ // Include the spherical triangle utilities +#include #include #include +#include #include "nbl/builtin/hlsl/random/pcg.hlsl" #include "nbl/builtin/hlsl/random/xoroshiro.hlsl" @@ -13,16 +15,19 @@ using namespace nbl::hlsl; #define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 // Maximum number of triangles we can have after clipping -// Without clipping, max 3 faces can be visible at once -// With clipping, can be more. 7 - 2 = 5 max triangles because fanning from one vertex +// Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles +// With clipping, one more edge. 7 - 2 = 5 max triangles because fanning from one vertex #define MAX_TRIANGLES 5 +// Minimal cached sampling data - only what's needed for selection struct SamplingData { - float32_t triangleWeights[MAX_TRIANGLES]; - uint32_t triangleIndices[MAX_TRIANGLES]; // Store the 'i' value for each valid triangle - uint32_t count; - float32_t totalWeight; + uint32_t count; // Number of valid triangles + uint32_t samplingMode; // Mode used during build + float32_t totalWeight; // Sum of all triangle weights + float32_t3 faceNormal; // Face normal (only used for projected mode) + float32_t triangleSolidAngles[MAX_TRIANGLES]; // Weight per triangle (for selection) + uint32_t triangleIndices[MAX_TRIANGLES]; // Vertex index i (forms triangle with v0, vi, vi+1) }; float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) @@ -69,23 +74,35 @@ float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float // 4. Compute projected solid angle float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); - // Return the absolute value of the total (to handle CW/CCW triangles) + // Return the absolute value of the total return abs(Gamma); } -// Build sampling data - store weights and vertex indices -SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, int32_t samplingMode) +// Build sampling data once - cache only weights for triangle selection +SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint32_t samplingMode) { SamplingData data; data.count = 0; - data.totalWeight = 0; + data.totalWeight = 0.0f; + data.samplingMode = samplingMode; + data.faceNormal = float32_t3(0, 0, 0); if (silhouette.count < 3) return data; - float32_t3 v0 = silhouette.vertices[0]; - float32_t3 origin = float32_t3(0, 0, 0); + const float32_t3 v0 = silhouette.vertices[0]; + const float32_t3 origin = float32_t3(0, 0, 0); + // Compute face normal ONCE before the loop - silhouette is planar! + if (samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + { + float32_t3 v1 = silhouette.vertices[1]; + float32_t3 v2 = silhouette.vertices[2]; + data.faceNormal = normalize(cross(v1 - v0, v2 - v0)); + } + + // Build fan triangulation from v0 + NBL_UNROLL for (uint32_t i = 1; i < silhouette.count - 1; i++) { float32_t3 v1 = silhouette.vertices[i]; @@ -93,60 +110,84 @@ SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, int32 shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + // Skip degenerate triangles if (shapeTri.pyramidAngles()) continue; - float32_t weight; + // Calculate triangle solid angle + float32_t solidAngle; if (samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) { - float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); // TODO: precompute? - weight = computeProjectedSolidAngleFallback(normalize(v0), normalize(v1), normalize(v2), faceNormal); + // scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + solidAngle = shapeTri.projectedSolidAngleOfTriangle(data.faceNormal, shapeTri.cos_sides, shapeTri.csc_sides, cos_vertices); } else { - weight = shapeTri.solidAngleOfTriangle(); + solidAngle = shapeTri.solidAngleOfTriangle(); } - if (weight <= 0.0f) + if (solidAngle <= 0.0f) continue; - data.triangleWeights[data.count] = weight; - data.triangleIndices[data.count] = i; // Store the original vertex index, we need to account for skipped degenerate triangles. - data.totalWeight += weight; + // Store only what's needed for weighted selection + data.triangleSolidAngles[data.count] = solidAngle; + data.triangleIndices[data.count] = i; + data.totalWeight += solidAngle; data.count++; } #ifdef DEBUG_DATA - // Assert no edge has both vertices antipodal (lune case) + // Validate no antipodal edges exist (would create spherical lune) for (uint32_t i = 0; i < silhouette.count; i++) { uint32_t j = (i + 1) % silhouette.count; float32_t3 n1 = normalize(silhouette.vertices[i]); float32_t3 n2 = normalize(silhouette.vertices[j]); - // Check if vertices are antipodal - bool antipodal = dot(n1, n2) < -0.99f; + if (dot(n1, n2) < -0.99f) + { + DebugDataBuffer[0].sphericalLuneDetected = 1; + assert(false && "Spherical lune detected: antipodal silhouette edge"); + } + } + DebugDataBuffer[0].maxTrianglesExceeded = (data.count > MAX_TRIANGLES); + + DebugDataBuffer[0].clippedSilhouetteVertexCount = silhouette.count; + for (uint32_t v = 0; v < silhouette.count; v++) + { + DebugDataBuffer[0].clippedSilhouetteVertices[v] = silhouette.vertices[v]; + } - assert(false && "Spherical lune detected: antipodal silhouette edge"); + DebugDataBuffer[0].triangleCount = data.count; + DebugDataBuffer[0].totalSolidAngles = data.totalWeight; + for (uint32_t tri = 0; tri < data.count; tri++) + { + DebugDataBuffer[0].solidAngles[tri] = data.triangleSolidAngles[tri]; } #endif - DebugDataBuffer[0].maxTrianglesExcceded = data.count > MAX_TRIANGLES; return data; } +// Sample using cached selection weights, but recompute geometry on-demand float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) { + selectedIdx = 0; + + // Handle empty or invalid data if (data.count == 0 || data.totalWeight <= 0.0f) { - pdf = 0; - selectedIdx = 0; + pdf = 0.0f; return float32_t3(0, 0, 1); } - // Select triangle using uniform random sampling weighted by importance - float32_t toFind = xi.x * data.totalWeight; - uint32_t triIdx = 0; + // Select triangle using cached weighted random selection + float32_t targetWeight = xi.x * data.totalWeight; float32_t cumulativeWeight = 0.0f; float32_t prevCumulativeWeight = 0.0f; @@ -154,57 +195,104 @@ float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float for (uint32_t i = 0; i < data.count; i++) { prevCumulativeWeight = cumulativeWeight; - cumulativeWeight += data.triangleWeights[i]; - if (toFind <= cumulativeWeight) + cumulativeWeight += data.triangleSolidAngles[i]; + + if (targetWeight <= cumulativeWeight) { - triIdx = i; + selectedIdx = i; break; } } - selectedIdx = triIdx; - - // Remap xi.x to [0,1] within the selected triangle's weight range - float32_t triMin = prevCumulativeWeight; - float32_t triMax = cumulativeWeight; - float32_t triWeight = triMax - triMin; - float32_t u = (toFind - triMin) / max(triWeight, 1e-7f); + // Remap xi.x to [0,1] within selected triangle's solidAngle interval + float32_t triSolidAngle = data.triangleSolidAngles[selectedIdx]; + float32_t u = (targetWeight - prevCumulativeWeight) / max(triSolidAngle, 1e-7f); - // Reconstruct the triangle using the stored vertex index - uint32_t vertexIdx = data.triangleIndices[triIdx]; // We need to account for skipped degenerate triangles. + // Reconstruct the selected triangle geometry + uint32_t vertexIdx = data.triangleIndices[selectedIdx]; float32_t3 v0 = silhouette.vertices[0]; float32_t3 v1 = silhouette.vertices[vertexIdx]; float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; + + float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); + float32_t3 origin = float32_t3(0, 0, 0); shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); - sampling::SphericalTriangle samplingTri = sampling::SphericalTriangle::create(shapeTri); - // Sample from the selected triangle using remapped u and original xi.y + // Compute vertex angles once + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + float32_t3 sin_vertices = sqrt(float32_t3(1.0f, 1.0f, 1.0f) - cos_vertices * cos_vertices); + + // Sample based on mode + float32_t3 direction; float32_t rcpPdf; - float32_t3 direction = samplingTri.generate(rcpPdf, float32_t2(u, xi.y)); - float32_t trianglePdf = 1.0f / rcpPdf; - pdf = trianglePdf * (data.triangleWeights[triIdx] / data.totalWeight); + if (data.samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + { + sampling::ProjectedSphericalTriangle samplingTri = + sampling::ProjectedSphericalTriangle::create(shapeTri); + + direction = samplingTri.generate( + rcpPdf, + triSolidAngle, + cos_vertices, + sin_vertices, + shapeTri.cos_sides[0], + shapeTri.cos_sides[2], + shapeTri.csc_sides[1], + shapeTri.csc_sides[2], + faceNormal, + false, + float32_t2(u, xi.y)); + triSolidAngle = rcpPdf; // projected solid angle returned as rcpPdf + } + else + { + sampling::SphericalTriangle samplingTri = + sampling::SphericalTriangle::create(shapeTri); + + direction = samplingTri.generate( + triSolidAngle, + cos_vertices, + sin_vertices, + shapeTri.cos_sides[0], + shapeTri.cos_sides[2], + shapeTri.csc_sides[1], + shapeTri.csc_sides[2], + float32_t2(u, xi.y)); + } + + // Calculate PDF + float32_t trianglePdf = 1.0f / triSolidAngle; + float32_t selectionProb = triSolidAngle / data.totalWeight; + pdf = trianglePdf * selectionProb; return normalize(direction); } +#if VISUALIZE_SAMPLES + float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSilhouette silhouette, - int32_t samplingMode, SamplingData samplingData, int32_t numSamples) + uint32_t samplingMode, uint32_t frameIndex, SamplingData samplingData, uint32_t numSamples, inout RWStructuredBuffer DebugDataBuffer) { float32_t4 accumColor = 0; - if (samplingData.count == 0) + if (silhouette.count == 0) return 0; float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); - for (int32_t i = 0; i < numSamples; i++) + DebugDataBuffer[0].sampleCount = numSamples; + for (uint32_t i = 0; i < numSamples; i++) { - nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(pc.frameIndex * 65536u + i); + nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(frameIndex * 65536u + i); const uint32_t seed1 = seedGen(); const uint32_t seed2 = seedGen(); nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(seed1, seed2)); @@ -214,6 +302,8 @@ float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSi uint32_t triIdx; float32_t3 sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); + float32_t dist3D = distance(sampleDir, normalize(spherePos)); float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); @@ -245,3 +335,4 @@ float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSi return accumColor; } #endif +#endif diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 31cbe577a..79791af57 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -2,189 +2,27 @@ #include "common.hlsl" #include -#include "utils.hlsl" using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; -[[vk::push_constant]] struct PushConstants pc; -[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; - -static const float CIRCLE_RADIUS = 0.5f; - -// --- Geometry Utils --- -struct ClippedSilhouette -{ - float32_t3 vertices[7]; - uint32_t count; -}; - -static const float32_t3 constCorners[8] = { - float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), - float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1)}; - -static const int32_t2 allEdges[12] = { - {0, 1}, - {2, 3}, - {4, 5}, - {6, 7}, // X axis - {0, 2}, - {1, 3}, - {4, 6}, - {5, 7}, // Y axis - {0, 4}, - {1, 5}, - {2, 6}, - {3, 7}, // Z axis -}; - -// Maps face index (0-5) to its 4 corner indices in CCW order -static const uint32_t faceToCorners[6][4] = { - {0, 2, 3, 1}, // Face 0: Z- - {4, 5, 7, 6}, // Face 1: Z+ - {0, 4, 6, 2}, // Face 2: X- - {1, 3, 7, 5}, // Face 3: X+ - {0, 1, 5, 4}, // Face 4: Y- - {2, 6, 7, 3} // Face 5: Y+ -}; - -static float32_t3 corners[8]; -static float32_t3 faceCenters[6] = { - float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0), - float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0)}; - -static const float32_t3 localNormals[6] = { - float32_t3(0, 0, -1), // Face 0 (Z-) - float32_t3(0, 0, 1), // Face 1 (Z+) - float32_t3(-1, 0, 0), // Face 2 (X-) - float32_t3(1, 0, 0), // Face 3 (X+) - float32_t3(0, -1, 0), // Face 4 (Y-) - float32_t3(0, 1, 0) // Face 5 (Y+) -}; - -// TODO: unused, remove later -// Vertices are ordered CCW relative to the camera view. -static const int32_t silhouettes[27][7] = { - {6, 1, 3, 2, 6, 4, 5}, // 0: Black - {6, 2, 6, 4, 5, 7, 3}, // 1: White - {6, 0, 4, 5, 7, 3, 2}, // 2: Gray - {6, 1, 3, 7, 6, 4, 5}, // 3: Red - {4, 4, 5, 7, 6, -1, -1}, // 4: Green - {6, 0, 4, 5, 7, 6, 2}, // 5: Blue - {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow - {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta - {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan - {6, 1, 3, 2, 6, 7, 5}, // 9: Orange - {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange - {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange - {4, 1, 3, 7, 5, -1, -1}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink - {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose - {6, 0, 1, 3, 7, 5, 4}, // 15: Purple - {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple - {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo - {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green - {6, 0, 2, 6, 7, 3, 1}, // 19: Lime - {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green - {6, 0, 2, 3, 7, 5, 1}, // 21: Navy - {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue - {6, 0, 4, 6, 2, 3, 1}, // 23: Teal - {6, 0, 2, 3, 7, 5, 4}, // 24: Brown - {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige - {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown -}; - -// Binary packed silhouettes -static const uint32_t binSilhouettes[27] = { - 0b11000000000000101100110010011001, - 0b11000000000000011111101100110010, - 0b11000000000000010011111101100000, - 0b11000000000000101100110111011001, - 0b10000000000000000000110111101100, - 0b11000000000000010110111101100000, - 0b11000000000000100110111011001000, - 0b11000000000000100110111101001000, - 0b11000000000000010110111101001000, - 0b11000000000000101111110010011001, - 0b10000000000000000000011111110010, - 0b11000000000000010011111110100000, - 0b10000000000000000000101111011001, - 0b11000000000000010011111110100000, - 0b10000000000000000000010110100000, - 0b11000000000000100101111011001000, - 0b10000000000000000000100101001000, - 0b11000000000000010110100101001000, - 0b11000000000000001101111110010000, - 0b11000000000000001011111110010000, - 0b11000000000000001011111110100000, - 0b11000000000000001101111011010000, - 0b10000000000000000000001011010000, - 0b11000000000000001011010110100000, - 0b11000000000000100101111011010000, - 0b11000000000000100101001011010000, - 0b11000000000000011010110100101001, -}; - -int32_t getSilhouetteVertex(uint32_t packedSil, int32_t index) -{ - return (packedSil >> (3 * index)) & 0x7; -} - -// Get silhouette size -int32_t getSilhouetteSize(uint32_t sil) -{ - return (sil >> 29) & 0x7; -} - -// Check if vertex has negative z -bool getVertexZNeg(int32_t vertexIdx) -{ -#if FAST - float32_t3 localPos = float32_t3( - (vertexIdx & 1) ? 1.0f : -1.0f, - (vertexIdx & 2) ? 1.0f : -1.0f, - (vertexIdx & 4) ? 1.0f : -1.0f); - - float transformedZ = dot(pc.modelMatrix[2].xyz, localPos) + pc.modelMatrix[2].w; - return transformedZ < 0.0f; -#else - return corners[vertexIdx].z < 0.0f; -#endif -} - -// Get world position of cube vertex -float32_t3 getVertex(int32_t vertexIdx) -{ -#if FAST - // Reconstruct local cube corner from index bits - float sx = (vertexIdx & 1) ? 1.0f : -1.0f; - float sy = (vertexIdx & 2) ? 1.0f : -1.0f; - float sz = (vertexIdx & 4) ? 1.0f : -1.0f; - - float32_t4x3 model = transpose(pc.modelMatrix); +[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; // TODO: move below other includes - // Transform to world - // Full position, not just Z like getVertexZNeg - return model[0].xyz * sx + - model[1].xyz * sy + - model[2].xyz * sz + - model[3].xyz; - // return mul(pc.modelMatrix, float32_t4(sx, sy, sz, 1.0f)); -#else - return corners[vertexIdx]; -#endif -} +#define VISUALIZE_SAMPLES 1 +#include "utils.hlsl" #include "Drawing.hlsl" #include "Sampling.hlsl" +#include "silhouette.hlsl" +[[vk::push_constant]] struct PushConstants pc; -void setDebugData(uint32_t sil, int32_t3 region, int32_t configIndex) +void setDebugData(uint32_t sil, uint32_t3 region, uint32_t configIndex) { #if DEBUG_DATA DebugDataBuffer[0].region = uint32_t3(region); DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); - for (int32_t i = 0; i < 6; i++) + for (uint32_t i = 0; i < 6; i++) { DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); } @@ -192,274 +30,131 @@ void setDebugData(uint32_t sil, int32_t3 region, int32_t configIndex) #endif } -float32_t2 toCircleSpace(float32_t2 uv) -{ - float32_t2 p = uv * 2.0f - 1.0f; - float aspect = pc.viewport.z / pc.viewport.w; - p.x *= aspect; - return p; -} - -uint32_t packSilhouette(const int32_t s[7]) -{ - uint32_t packed = 0; - int32_t size = s[0] & 0x7; // 3 bits for size - - // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) - for (int32_t i = 1; i <= 6; ++i) - { - int32_t v = s[i]; - if (v < 0) - v = 0; // replace unused vertices with 0 - packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) - } - - // Put size in the MSB (bits 29-31 for a 32-bit uint32_t, leaving 29 bits for vertices) - packed |= (size & 0x7) << 29; - - return packed; -} - void computeCubeGeo() { - for (int32_t i = 0; i < 8; i++) + for (uint32_t i = 0; i < 8; i++) corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; - for (int32_t f = 0; f < 6; f++) + for (uint32_t f = 0; f < 6; f++) { faceCenters[f] = float32_t3(0, 0, 0); - for (int32_t v = 0; v < 4; v++) + for (uint32_t v = 0; v < 4; v++) faceCenters[f] += corners[faceToCorners[f][v]]; faceCenters[f] /= 4.0f; } } -// Helper to draw an edge with proper color mapping -float32_t4 drawEdge(int32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float aaWidth, float width = 0.01f) -{ - float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); - return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); -}; - -float32_t4 computeSilhouette(uint32_t vertexCount, uint32_t sil, float32_t3 spherePos, float aaWidth, out ClippedSilhouette silhouette) +void validateSilhouetteEdges(uint32_t sil, uint32_t vertexCount, inout uint32_t silEdgeMask) { - float32_t4 color = float32_t4(0, 0, 0, 0); - silhouette.count = 0; - - // Build clip mask (z < 0) - int32_t clipMask = 0u; - NBL_UNROLL - for (int32_t i = 0; i < 4; i++) - clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; - - if (vertexCount == 6) - { - NBL_UNROLL - for (int32_t i = 4; i < 6; i++) - clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; - } - - int32_t clipCount = countbits(clipMask); - -#if 0 - // Early exit if fully clipped - if (clipCount == vertexCount) - return color; - - // No clipping needed - fast path - if (clipCount == 0) +#if DEBUG_DATA { - for (int32_t i = 0; i < vertexCount; i++) + for (uint32_t i = 0; i < vertexCount; i++) { - int32_t i0 = i; - int32_t i1 = (i + 1) % vertexCount; + uint32_t vIdx = i % vertexCount; + uint32_t v1Idx = (i + 1) % vertexCount; - float32_t3 v0 = getVertex(getSilhouetteVertex(sil, i0)); - float32_t3 v1 = getVertex(getSilhouetteVertex(sil, i1)); - float32_t3 pts[2] = {v0, v1}; - - color += drawEdge(i1, pts, spherePos, aaWidth); + uint32_t v0Corner = getSilhouetteVertex(sil, vIdx); + uint32_t v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (uint32_t e = 0; e < 12; e++) + { + uint32_t2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } } - return color; + validateEdgeVisibility(pc.modelMatrix, sil, vertexCount, silEdgeMask); } #endif +} - // Rotate clip mask so positives come first - uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - bool wrapAround = ((clipMask & 1u) != 0u) && - ((clipMask & (1u << (vertexCount - 1))) != 0u); - int32_t rotateAmount = wrapAround - ? firstbitlow(invertedMask) // -> First POSITIVE - : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative - - uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); - uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); - - int32_t positiveCount = vertexCount - clipCount; - - // ALWAYS compute both clip points - int32_t lastPosIdx = positiveCount - 1; - int32_t firstNegIdx = positiveCount; - float32_t3 vLastPos = getVertex(getSilhouetteVertex(rotatedSil, lastPosIdx)); - float32_t3 vFirstNeg = getVertex(getSilhouetteVertex(rotatedSil, firstNegIdx)); - float t = vLastPos.z / (vLastPos.z - vFirstNeg.z); - float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); +void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 spherePos) +{ + ndc = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndc.x *= aspect; - float32_t3 vLastNeg = getVertex(getSilhouetteVertex(rotatedSil, vertexCount - 1)); - float32_t3 vFirstPos = getVertex(getSilhouetteVertex(rotatedSil, 0)); - t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); - float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); + float32_t2 normalized = ndc / CIRCLE_RADIUS; + float32_t r2 = dot(normalized, normalized); - // Draw positive edges - NBL_UNROLL - for (int32_t i = 0; i < positiveCount; i++) + if (r2 <= 1.0f) { - float32_t3 v0 = getVertex(getSilhouetteVertex(rotatedSil, i)); - - // ONLY use clipA if we are at the end of the positive run AND there's a clip - bool isLastPositive = (i == positiveCount - 1); - bool useClipA = (clipCount > 0) && isLastPositive; - - // If not using clipA, wrap around to the next vertex - float32_t3 v1 = useClipA ? clipA : getVertex(getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); - - float32_t3 pts[2] = {v0, v1}; - color += drawEdge((i + 1) % vertexCount, pts, spherePos, aaWidth); - - silhouette.vertices[silhouette.count++] = v0; + spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); } - - if (clipCount > 0 && clipCount < vertexCount) + else { - // NP edge - float32_t3 vFirst = getVertex(getSilhouetteVertex(rotatedSil, 0)); - float32_t3 npPts[2] = {clipB, vFirst}; - color += drawEdge(0, npPts, spherePos, aaWidth); - - // Horizon arc - float32_t3 arcPts[2] = {clipA, clipB}; - color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); - - silhouette.vertices[silhouette.count++] = clipA; - silhouette.vertices[silhouette.count++] = clipB; + float32_t uv2Plus1 = r2 + 1.0f; + spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; } - -#if DEBUG_DATA - DebugDataBuffer[0].clipMask = clipMask; - DebugDataBuffer[0].clipCount = clipCount; - DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; - DebugDataBuffer[0].rotateAmount = rotateAmount; - DebugDataBuffer[0].positiveVertCount = positiveCount; - DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; - DebugDataBuffer[0].rotatedSil = rotatedSil; - -#endif - return color; + spherePos = normalize(spherePos); } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { float32_t4 color = float32_t4(0, 0, 0, 0); - for (int32_t i = 0; i < 1; i++) + for (uint32_t i = 0; i < 1; i++) { - float aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); - float32_t2 p = toCircleSpace(vx.uv); - - float32_t2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); - + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); float32_t3 spherePos; - if (r2 <= 1.0f) - { - spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); - } - else - { - float uv2Plus1 = r2 + 1.0f; - spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; - } - spherePos = normalize(spherePos); - + float32_t2 ndc; + computeSpherePos(vx, ndc, spherePos); +#if !FAST || DEBUG_DATA computeCubeGeo(); - - float32_t4x3 columnModel = transpose(pc.modelMatrix); - float32_t3 obbCenter = columnModel[3].xyz; - float32_t3x3 upper3x3 = (float32_t3x3)columnModel; - float32_t3 rcpSqScales = rcp(float32_t3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]))); - float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - - int32_t3 region = int32_t3( - normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), - normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), - normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1)); - - int32_t configIndex = region.x + region.y * 3 + region.z * 9; - - // uint32_t sil = packSilhouette(silhouettes[configIndex]); - uint32_t sil = binSilhouettes[configIndex]; - - int32_t vertexCount = getSilhouetteSize(sil); +#endif + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); uint32_t silEdgeMask = 0; // TODO: take from 'fast' computeSilhouette() #if DEBUG_DATA - { - for (int32_t i = 0; i < vertexCount; i++) - { - int32_t vIdx = i % vertexCount; - int32_t v1Idx = (i + 1) % vertexCount; - - int32_t v0Corner = getSilhouetteVertex(sil, vIdx); - int32_t v1Corner = getSilhouetteVertex(sil, v1Idx); - // Mark edge as part of silhouette - for (int32_t e = 0; e < 12; e++) - { - int32_t2 edge = allEdges[e]; - if ((edge.x == v0Corner && edge.y == v1Corner) || - (edge.x == v1Corner && edge.y == v0Corner)) - { - silEdgeMask |= (1u << e); - } - } - } - validateEdgeVisibility(sil, vertexCount, silEdgeMask); - } + validateSilhouetteEdges(sil, vertexCount, silEdgeMask); #endif - - uint32_t positiveCount = 0; - ClippedSilhouette silhouette; - color += computeSilhouette(vertexCount, sil, spherePos, aaWidth, silhouette); + +#if VISUALIZE_SAMPLES + color += computeSilhouette(pc.modelMatrix, vertexCount, sil, spherePos, aaWidth, silhouette); +#else + computeSilhouette(pc.modelMatrix, vertexCount, sil, silhouette); +#endif // Draw clipped silhouette vertices - // color += drawClippedSilhouetteVertices(p, silhouette, aaWidth); + // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); SamplingData samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); - - uint32_t faceIndices[3]; - uint32_t visibleFaceCount = getVisibleFaces(region, faceIndices); +#if VISUALIZE_SAMPLES // For debugging: Draw a small indicator of which faces are found - // color += drawVisibleFaceOverlay(spherePos, region, aaWidth); + // color += drawVisibleFaceOverlay(pc.modelMatrix, spherePos, region, aaWidth); - // color += drawFaces(spherePos, aaWidth); + // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); // Draw samples on sphere - color += visualizeSamples(vx.uv, spherePos, silhouette, pc.samplingMode, samplingData, 64); + color += visualizeSamples(vx.uv, spherePos, silhouette, pc.samplingMode, pc.frameIndex, samplingData, 64, DebugDataBuffer); - // Or draw 2D sample space (in a separate viewport) - // color += visualizePrimarySampleSpace(vx.uv, pc.samplingMode, 64, aaWidth); - - setDebugData(sil, region, configIndex); - // color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); - color += drawCorners(p, aaWidth); - color += drawRing(p, aaWidth); + color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); + color += drawCorners(pc.modelMatrix, ndc, aaWidth); + color += drawRing(ndc, aaWidth); if (all(vx.uv >= float32_t2(0.49f, 0.49f)) && all(vx.uv <= float32_t2(0.51f, 0.51f))) { return float32_t4(colorLUT[configIndex], 1.0f); } +#else + nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(65536u + i); + const uint32_t2 seeds = uint32_t2(seedGen(), seedGen()); + nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(seeds); + float32_t2 xi = nextRandomUnorm2(rnd); + + float32_t pdf; + uint32_t triIdx; + float32_t3 sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + + color += float4(sampleDir * 0.02f / pdf, 1.0f); +#endif // VISUALIZE_SAMPLES + setDebugData(sil, region, configIndex); } return color; diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl new file mode 100644 index 000000000..6d04538a5 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl @@ -0,0 +1,45 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) +#define DEBUG_DATA 0 +#include "app_resources/hlsl/benchmark/common.hlsl" +#include "app_resources/hlsl/silhouette.hlsl" +#include "app_resources/hlsl/Sampling.hlsl" + +using namespace nbl::hlsl; + +[[vk::binding(0, 0)]] RWByteAddressBuffer outputBuffer; +[[vk::push_constant]] BenchmarkPushConstants pc; + +[numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] + [shader("compute")] void + main(uint3 invocationID : SV_DispatchThreadID) +{ + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); + + ClippedSilhouette silhouette; + computeSilhouette(pc.modelMatrix, vertexCount, sil, silhouette); + + SamplingData samplingData; + samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); + + nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(65536u + invocationID.x); + const uint32_t2 seeds = uint32_t2(seedGen(), seedGen()); + + float32_t pdf; + uint32_t triIdx; + float32_t3 sampleDir = float32_t3(0.0, 0.0, 0.0); + for (uint32_t i = 0; i < 64; i++) + { + nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(seeds); + float32_t2 xi = nextRandomUnorm2(rnd); + sampleDir += sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + } + + const uint32_t offset = sizeof(uint32_t) * invocationID.x; + outputBuffer.Store(offset, pdf + triIdx + asuint(sampleDir.x) + asuint(sampleDir.y) + asuint(sampleDir.z)); +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl new file mode 100644 index 000000000..d54ee8a36 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl @@ -0,0 +1,23 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 64u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1920u * 1080u / BENCHMARK_WORKGROUP_DIMENSION_SIZE_X; + +enum SAMPLING_BENCHMARK_MODE +{ + TRIANGLE_SOLID_ANGLE, + TRIANGLE_PROJECTED_SOLID_ANGLE, +}; + +struct BenchmarkPushConstants +{ + float32_t3x4 modelMatrix; + uint32_t samplingMode; + SAMPLING_BENCHMARK_MODE benchmarkMode; +}; \ No newline at end of file diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index dd0ab2d99..db2f328b5 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -2,6 +2,10 @@ #define _SOLID_ANGLE_VIS_COMMON_HLSL_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +// Sampling mode enum +#define SAMPLING_MODE_SOLID_ANGLE 0 +#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 + #define DEBUG_DATA 1 #define FAST 1 @@ -27,9 +31,21 @@ namespace nbl uint32_t rotatedClipMask; uint32_t rotateAmount; - uint32_t maxTrianglesExcceded; + uint32_t maxTrianglesExceeded; + uint32_t sphericalLuneDetected; uint32_t vertices[6]; + + uint32_t clippedSilhouetteVertexCount; + float32_t3 clippedSilhouetteVertices[7]; + + uint32_t triangleCount; + float32_t solidAngles[5]; + float32_t totalSolidAngles; + + // Sampling ray visualization data + uint32_t sampleCount; + float32_t4 rayData[64]; // xyz = direction, w = PDF }; struct PushConstants @@ -39,9 +55,14 @@ namespace nbl uint32_t samplingMode; uint32_t frameIndex; }; - // Sampling mode enum -#define SAMPLING_MODE_SOLID_ANGLE 0 -#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 + + struct PushConstantRayVis + { + float32_t4x4 viewProjMatrix; + float32_t3x4 modelMatrix; + float32_t4 viewport; + uint32_t frameIndex; + }; static const float32_t3 colorLUT[27] = { float32_t3(0, 0, 0), float32_t3(0.5, 0.5, 0.5), diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl new file mode 100644 index 000000000..d4ef71d07 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl @@ -0,0 +1,168 @@ +#ifndef GPU_COMMON_HLSL +#define GPU_COMMON_HLSL + +static const float32_t CIRCLE_RADIUS = 0.5f; + +// --- Geometry Utils --- +struct ClippedSilhouette +{ + float32_t3 vertices[7]; // Max 7 vertices after clipping, unnormalized + uint32_t count; +}; + +static const float32_t3 constCorners[8] = { + float32_t3(-0.5f, -0.5f, -0.5f), float32_t3(0.5f, -0.5f, -0.5f), float32_t3(-0.5f, 0.5f, -0.5f), float32_t3(0.5f, 0.5f, -0.5f), + float32_t3(-0.5f, -0.5f, 0.5f), float32_t3(0.5f, -0.5f, 0.5f), float32_t3(-0.5f, 0.5f, 0.5f), float32_t3(0.5f, 0.5f, 0.5f)}; + +static const uint32_t2 allEdges[12] = { + {0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, // X axis + {0, 2}, + {1, 3}, + {4, 6}, + {5, 7}, // Y axis + {0, 4}, + {1, 5}, + {2, 6}, + {3, 7}, // Z axis +}; + +// Maps face index (0-5) to its 4 corner indices in CCW order +static const uint32_t faceToCorners[6][4] = { + {0, 2, 3, 1}, // Face 0: Z- + {4, 5, 7, 6}, // Face 1: Z+ + {0, 4, 6, 2}, // Face 2: X- + {1, 3, 7, 5}, // Face 3: X+ + {0, 1, 5, 4}, // Face 4: Y- + {2, 6, 7, 3} // Face 5: Y+ +}; + +static float32_t3 corners[8]; +static float32_t3 faceCenters[6] = { + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0), + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0)}; + +static const float32_t3 localNormals[6] = { + float32_t3(0, 0, -1), // Face 0 (Z-) + float32_t3(0, 0, 1), // Face 1 (Z+) + float32_t3(-1, 0, 0), // Face 2 (X-) + float32_t3(1, 0, 0), // Face 3 (X+) + float32_t3(0, -1, 0), // Face 4 (Y-) + float32_t3(0, 1, 0) // Face 5 (Y+) +}; + +// TODO: unused, remove later +// Vertices are ordered CCW relative to the camera view. +static const uint32_t silhouettes[27][7] = { + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5}, // 3: Red + {4, 4, 5, 7, 6, 0, 0}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, 0, 0}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, 0, 0}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, 0, 0}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, 0, 0}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, 0, 0}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown +}; + +// Binary packed silhouettes +static const uint32_t binSilhouettes[27] = { + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, +}; + +uint32_t getSilhouetteVertex(uint32_t packedSil, uint32_t index) +{ + return (packedSil >> (3u * index)) & 0x7u; +} + +// Get silhouette size +uint32_t getSilhouetteSize(uint32_t sil) +{ + return (sil >> 29u) & 0x7u; +} + +// Check if vertex has negative z +bool getVertexZNeg(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + float32_t3 localPos = float32_t3( + (vertexIdx & 1) ? 0.5f : -0.5f, + (vertexIdx & 2) ? 0.5f : -0.5f, + (vertexIdx & 4) ? 0.5f : -0.5f); + + float32_t transformedZ = dot(modelMatrix[2].xyz, localPos) + modelMatrix[2].w; + return transformedZ < 0.0f; +#else + return corners[vertexIdx].z < 0.0f; +#endif +} + +// Get world position of cube vertex +float32_t3 getVertex(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + // Reconstruct local cube corner from index bits + float32_t sx = (vertexIdx & 1) ? 0.5f : -0.5f; + float32_t sy = (vertexIdx & 2) ? 0.5f : -0.5f; + float32_t sz = (vertexIdx & 4) ? 0.5f : -0.5f; + + float32_t4x3 model = transpose(modelMatrix); + + // Transform to world + // Full position, not just Z like getVertexZNeg + return model[0].xyz * sx + + model[1].xyz * sy + + model[2].xyz * sz + + model[3].xyz; + // return mul(modelMatrix, float32_t4(sx, sy, sz, 1.0f)); +#else + return corners[vertexIdx]; +#endif +} +#endif // GPU_COMMON_HLSL diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl new file mode 100644 index 000000000..05d913e01 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl @@ -0,0 +1,164 @@ +#ifndef _SILHOUETTE_HLSL_ +#define _SILHOUETTE_HLSL_ +#include "gpu_common.hlsl" + +#include "utils.hlsl" + +// Compute region and configuration index from model matrix +uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) +{ + float32_t4x3 columnModel = transpose(modelMatrix); + float32_t3 obbCenter = columnModel[3].xyz; + float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + float32_t3 rcpSqScales = rcp(float32_t3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; + + region = uint32_t3( + normalizedProj.x < -0.5f ? 0 : (normalizedProj.x > 0.5f ? 2 : 1), + normalizedProj.y < -0.5f ? 0 : (normalizedProj.y > 0.5f ? 2 : 1), + normalizedProj.z < -0.5f ? 0 : (normalizedProj.z > 0.5f ? 2 : 1)); + + configIndex = region.x + region.y * 3u + region.z * 9u; + + uint32_t sil = packSilhouette(silhouettes[configIndex]); + // uint32_t sil = binSilhouettes[configIndex]; + vertexCount = getSilhouetteSize(sil); + return sil; +} + +#if VISUALIZE_SAMPLES +float32_t4 +#else +void +#endif +computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil +#if VISUALIZE_SAMPLES + , + float32_t3 spherePos, float32_t aaWidth +#endif + , + NBL_REF_ARG(ClippedSilhouette) silhouette) +{ +#if VISUALIZE_SAMPLES + float32_t4 color = float32_t4(0, 0, 0, 0); +#endif + silhouette.count = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (uint32_t i = 0; i < 4; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + + if (vertexCount == 6) + { + NBL_UNROLL + for (uint32_t i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + uint32_t clipCount = countbits(clipMask); + +#if 0 + // Early exit if fully clipped + if (clipCount == vertexCount) + return color; + + // No clipping needed - fast path + if (clipCount == 0) + { + for (uint32_t i = 0; i < vertexCount; i++) + { + uint32_t i0 = i; + uint32_t i1 = (i + 1) % vertexCount; + + float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(sil, i0)); + silhouette.vertices[silhouette.count++] = v0; +#if VISUALIZE_SAMPLES + float32_t3 v1 = getVertex(modelMatrix, getSilhouetteVertex(sil, i1)); + float32_t3 pts[2] = {v0, v1}; + color += drawEdge(i1, pts, spherePos, aaWidth); +#endif + } + return color; + } +#endif + + // Rotate clip mask so positives come first + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + bool wrapAround = ((clipMask & 1u) != 0u) && + ((clipMask & (1u << (vertexCount - 1))) != 0u); + uint32_t rotateAmount = wrapAround + ? firstbitlow(invertedMask) // -> First POSITIVE + : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative + + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + + uint32_t positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + uint32_t lastPosIdx = positiveCount - 1; + uint32_t firstNegIdx = positiveCount; + float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); + float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); + float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); + + float32_t3 vLastNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float32_t3 vFirstPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); + + NBL_UNROLL + for (uint32_t i = 0; i < positiveCount; i++) + { + // Get raw vertex + float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); + + bool isLastPositive = (i == positiveCount - 1); + bool useClipA = (clipCount > 0) && isLastPositive; + +#if VISUALIZE_SAMPLES + float32_t3 v1 = useClipA ? clipA + : getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); + + float32_t3 pts[2] = {normalize(v0), normalize(v1)}; + color += drawEdge((i + 1) % vertexCount, pts, spherePos, aaWidth); +#endif + silhouette.vertices[silhouette.count++] = v0; + } + + if (clipCount > 0 && clipCount < vertexCount) + { + float32_t3 vFirst = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + +#if VISUALIZE_SAMPLES + float32_t3 npPts[2] = {normalize(clipB), normalize(vFirst)}; + color += drawEdge(0, npPts, spherePos, aaWidth); + + float32_t3 arcPts[2] = {normalize(clipA), normalize(clipB)}; + color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); +#endif + silhouette.vertices[silhouette.count++] = clipA; + silhouette.vertices[silhouette.count++] = clipB; + } + +#if DEBUG_DATA + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; +#endif +#if VISUALIZE_SAMPLES + return color; +#endif +} + +#endif // _SILHOUETTE_HLSL_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl index 4031e048f..f01667bf0 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -19,5 +19,24 @@ uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) return ((value >> bits) | (value << (width - bits))) & mask; } +uint32_t packSilhouette(const uint32_t s[7]) +{ + uint32_t packed = 0; + uint32_t size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (uint32_t i = 1; i <= 6; ++i) + { + uint32_t v = s[i]; + if (v < 0) + v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint32_t, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; +} #endif // _UTILS_HLSL_ diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp index 401ab71b3..4c32069ff 100644 --- a/73_SolidAngleVisualizer/main.cpp +++ b/73_SolidAngleVisualizer/main.cpp @@ -1,10 +1,11 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "common.hpp" #include "app_resources/hlsl/common.hlsl" +#include "app_resources/hlsl/benchmark/common.hlsl" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" /* @@ -18,6 +19,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR using asset_base_t = BuiltinResourcesApplication; inline static std::string SolidAngleVisShaderPath = "app_resources/hlsl/SolidAngleVis.frag.hlsl"; + inline static std::string RayVisShaderPath = "app_resources/hlsl/RayVis.frag.hlsl"; public: inline SolidAngleVisualizer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), @@ -31,6 +33,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; + interface.m_visualizer = this; + m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); @@ -162,7 +166,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Create graphics pipeline { - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "") -> smart_refctd_ptr + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, IShader::E_SHADER_STAGE stage, const std::string& defineMacro = "") -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -180,7 +184,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); CHLSLCompiler::SOptions options = {}; - options.stage = IShader::E_SHADER_STAGE::ESS_FRAGMENT; + options.stage = stage; options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; options.spirvOptimizer = nullptr; #ifndef _NBL_DEBUG @@ -216,21 +220,24 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); // Load Fragment Shader - auto fragmentShader = loadAndCompileHLSLShader(SolidAngleVisShaderPath); - if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + auto solidAngleVisFragShader = loadAndCompileHLSLShader(SolidAngleVisShaderPath, ESS_FRAGMENT); + if (!solidAngleVisFragShader) + return logFail("Failed to Load and Compile Fragment Shader: SolidAngleVis!"); - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), + const IGPUPipelineBase::SShaderSpecInfo solidAngleFragSpec = { + .shader = solidAngleVisFragShader.get(), .entryPoint = "main" }; - const asset::SPushConstantRange ranges[] = { { - .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(PushConstants) - } }; + auto rayVisFragShader = loadAndCompileHLSLShader(RayVisShaderPath, ESS_FRAGMENT); + if (!rayVisFragShader) + return logFail("Failed to Load and Compile Fragment Shader: rayVis!"); + const IGPUPipelineBase::SShaderSpecInfo RayFragSpec = { + .shader = rayVisFragShader.get(), + .entryPoint = "main" + }; + smart_refctd_ptr solidAngleVisLayout, rayVisLayout; nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { { .binding = 0, @@ -241,21 +248,39 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } }; smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + + const asset::SPushConstantRange saRanges[] = { { + .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + } }; + const asset::SPushConstantRange rayRanges[] = { { + .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstantRayVis) + } }; + if (!dsLayout) logFail("Failed to create a Descriptor Layout!\n"); + solidAngleVisLayout = m_device->createPipelineLayout(saRanges, dsLayout); - auto visualizationLayout = m_device->createPipelineLayout(ranges -#if DEBUG_DATA - , dsLayout -#endif - ); - m_visualizationPipeline = fsTriProtoPPln.createPipeline(fragSpec, visualizationLayout.get(), m_solidAngleRenderpass.get()); - if (!m_visualizationPipeline) - return logFail("Could not create Graphics Pipeline!"); + rayVisLayout = m_device->createPipelineLayout(rayRanges, dsLayout); + + { + m_solidAngleVisPipeline = fsTriProtoPPln.createPipeline(solidAngleFragSpec, solidAngleVisLayout.get(), m_solidAngleRenderpass.get()); + if (!m_solidAngleVisPipeline) + return logFail("Could not create Graphics Pipeline!"); + + asset::SRasterizationParams rasterParams = ext::FullScreenTriangle::ProtoPipeline::DefaultRasterParams; + rasterParams.depthWriteEnable = true; + rasterParams.depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER; + m_rayVisualizationPipeline = fsTriProtoPPln.createPipeline(RayFragSpec, rayVisLayout.get(), m_mainRenderpass.get(), 0, {}, rasterParams); + if (!m_rayVisualizationPipeline) + return logFail("Could not create Graphics Pipeline!"); + } // Allocate the memory -#if DEBUG_DATA { constexpr size_t BufferSize = sizeof(ResultData); @@ -297,7 +322,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const ILogicalDevice::MappedMemoryRange memoryRange(m_allocation.memory.get(), 0ull, m_allocation.memory->getAllocationSize()); if (!m_allocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) m_device->invalidateMappedMemoryRanges(1, &memoryRange); -#endif } // Create ImGUI @@ -391,7 +415,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; if (m_solidAngleViewFramebuffer) { -#if DEBUG_DATA asset::SBufferRange range { .offset = 0, @@ -399,40 +422,43 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .buffer = m_outputStorageBuffer }; cb->fillBuffer(range, 0u); -#endif - auto creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); - cb->beginDebugMarker("Draw Circle View Frame"); { - const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; - const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + + const auto& creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); + cb->beginDebugMarker("Draw Circle View Frame"); { - .framebuffer = m_solidAngleViewFramebuffer.get(), - .colorClearValues = &clearValue, - .depthStencilClearValues = &farValue, - .renderArea = { - .offset = {0,0}, - .extent = {creationParams.width, creationParams.height} - } - }; - beginRenderpass(cb, renderpassInfo); - } - // draw scene - { - PushConstants pc{ - .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, - .samplingMode = m_samplingMode, - .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u - }; - auto pipeline = m_visualizationPipeline; - cb->bindGraphicsPipeline(pipeline.get()); - cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); - ext::FullScreenTriangle::recordDrawCall(cb); + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {creationParams.width, creationParams.height} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + static uint32_t lastFrameSeed = 0u; + lastFrameSeed = m_frameSeeding ? static_cast(m_realFrameIx) : lastFrameSeed; + PushConstants pc{ + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, + .samplingMode = m_samplingMode, + .frameIndex = lastFrameSeed + }; + auto pipeline = m_solidAngleVisPipeline; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); } - cb->endRenderPass(); - cb->endDebugMarker(); - #if DEBUG_DATA m_device->waitIdle(); std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); @@ -442,11 +468,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // draw main view if (m_mainViewFramebuffer) { - cb->beginDebugMarker("Main Scene Frame"); { auto creationParams = m_mainViewFramebuffer->getCreationParameters(); const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { .framebuffer = m_mainViewFramebuffer.get(), .colorClearValues = &clearValue, @@ -457,9 +483,33 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } }; beginRenderpass(cb, renderpassInfo); + + } + { // draw rays visualization + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); + + cb->beginDebugMarker("Draw Rays visualization"); + // draw scene + { + float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); + PushConstantRayVis pc{ + .viewProjMatrix = viewProj, + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u + }; + auto pipeline = m_rayVisualizationPipeline; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endDebugMarker(); } // draw scene { + cb->beginDebugMarker("Main Scene Frame"); + float32_t3x4 viewMatrix; float32_t4x4 viewProjMatrix; // TODO: get rid of legacy matrices @@ -472,8 +522,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // tear down scene every frame auto& instance = m_renderer->m_instances[0]; - auto transposed = hlsl::transpose(interface.m_OBBModelMatrix); - memcpy(&instance.world, &transposed, sizeof(instance.world)); + instance.world = float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)); instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB @@ -481,9 +530,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk m_renderer->render(cb, viewParams); } - cb->endRenderPass(); + cb->endDebugMarker(); + cb->endRenderPass(); } + { cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); { @@ -781,12 +832,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->setViewport(0u, 1u, &viewport); } -#if DEBUG_DATA ~SolidAngleVisualizer() override { m_allocation.memory->unmap(); } -#endif // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers constexpr static inline uint32_t MaxFramesInFlight = 3u; @@ -806,7 +855,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_renderer; smart_refctd_ptr m_solidAngleViewFramebuffer; smart_refctd_ptr m_mainViewFramebuffer; - smart_refctd_ptr m_visualizationPipeline; + smart_refctd_ptr m_solidAngleVisPipeline; + smart_refctd_ptr m_rayVisualizationPipeline; // nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; smart_refctd_ptr m_outputStorageBuffer; @@ -859,6 +909,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); ImGui::Begin("Editor"); + ImGui::Text("Benchmarking Solid Angle Visualizer"); + + if (ImGui::Button("Run Benchmark")) + { + SolidAngleVisualizer::SamplingBenchmark benchmark(*m_visualizer); + benchmark.run(); + } + ImGui::Separator(); + ImGui::Text("Sampling Mode: "); ImGui::SameLine(); @@ -1119,7 +1178,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); - ImGui::Text("More Than Two Bit Transitions: %s", m_GPUOutResulData.maxTrianglesExcceded ? "true" : "false"); + ImGui::Text("Max triangles exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); + ImGui::Text("spherical lune detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1138,23 +1198,27 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } static bool modalShown = false; + static bool modalDismissed = false; static uint32_t lastSilhouetteIndex = ~0u; - // Reset modal flag if silhouette configuration changed + // Reset modal flags if silhouette configuration changed if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) { modalShown = false; + modalDismissed = false; // Allow modal to show again for new configuration lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; } - if (!m_GPUOutResulData.edgeVisibilityMismatch || !m_GPUOutResulData.maxTrianglesExcceded) + // Reset flags when mismatch is cleared + if (!m_GPUOutResulData.edgeVisibilityMismatch && !m_GPUOutResulData.maxTrianglesExceeded && !m_GPUOutResulData.sphericalLuneDetected) { - // Reset flag when mismatch is cleared modalShown = false; + modalDismissed = false; } - if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExcceded) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + + // Open modal only if not already shown/dismissed + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExceeded || m_GPUOutResulData.sphericalLuneDetected) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown && !modalDismissed) // Don't reopen if user dismissed it { - // Open modal popup only once per configuration ImGui::OpenPopup("Edge Visibility Mismatch Warning"); modalShown = true; } @@ -1164,19 +1228,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "Warning: Edge Visibility Mismatch Detected!"); ImGui::Separator(); - ImGui::Text("The silhouette lookup table (LUT) does not match the computed edge visibility."); ImGui::Text("This indicates the pre-computed silhouette data may be incorrect."); ImGui::Spacing(); - - // Show configuration info ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); ImGui::TextWrapped("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); ImGui::Spacing(); - ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); - - // Show which specific vertices are mismatched ImGui::Text("Vertices involved in mismatched edges:"); ImGui::Indent(); for (int i = 0; i < 8; i++) @@ -1188,12 +1246,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } ImGui::Unindent(); ImGui::Spacing(); - if (ImGui::Button("OK", ImVec2(120, 0))) { ImGui::CloseCurrentPopup(); + modalShown = false; + modalDismissed = true; // Mark as dismissed to prevent reopening } - ImGui::EndPopup(); } @@ -1203,6 +1261,25 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Text("region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + // print solidAngles for each triangle + { + ImGui::Text("Solid Angles per Triangle:"); + ImGui::BeginTable("SolidAnglesTable", 2); + ImGui::TableSetupColumn("Triangle Index"); + ImGui::TableSetupColumn("Solid Angle"); + ImGui::TableHeadersRow(); + for (uint32_t i = 0; i < m_GPUOutResulData.triangleCount; ++i) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::Text("%u", i); + ImGui::TableSetColumnIndex(1); + ImGui::Text("%.6f", m_GPUOutResulData.solidAngles[i]); + } + ImGui::Text("Total: %.6f", m_GPUOutResulData.totalSolidAngles); + ImGui::EndTable(); + } + ImGui::Separator(); // Silhouette mask printed in binary @@ -1255,14 +1332,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Separator(); }; - static RandomSampler rng(69); // Initialize RNG with seed + static RandomSampler rng(0x45); // Initialize RNG with seed // Helper function to check if cube intersects unit sphere at origin - auto isCubeOutsideUnitSphere = [](const float32_t3& translation, const float32_t3& scale) -> bool { - float cubeRadius = glm::length(scale) * 0.5f; - float distanceToCenter = glm::length(translation); - return (distanceToCenter - cubeRadius) > 1.0f; - }; + auto isCubeOutsideUnitSphere = [](const float32_t3& translation, const float32_t3& scale) -> bool + { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; static TRS lastTRS = {}; if (ImGui::Button("Randomize Translation")) @@ -1404,7 +1482,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // mutables struct TRS // Source of truth { - float32_t3 translation{ 0.0f, 0.0f, 3.0f }; + float32_t3 translation{ 0.0f, 0.0f, 1.5f }; float32_t3 rotation{ 0.0f }; // MUST stay orthonormal float32_t3 scale{ 1.0f }; } m_TRS; @@ -1415,7 +1493,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR TransformReturnInfo mainViewTransformReturnInfo; TransformReturnInfo solidAngleViewTransformReturnInfo; - const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; @@ -1425,7 +1502,289 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; bool firstFrame = true; + + SolidAngleVisualizer* m_visualizer; } interface; + + class SamplingBenchmark final + { + public: + SamplingBenchmark(SolidAngleVisualizer& base) + : m_api(base.m_api), m_device(base.m_device), m_logger(base.m_logger), m_visualizer(&base) + { + + // setting up pipeline in the constructor + m_queueFamily = base.getComputeQueue()->getFamilyIndex(); + m_cmdpool = base.m_device->createCommandPool(m_queueFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + //core::smart_refctd_ptr* cmdBuffs[] = { &m_cmdbuf, &m_timestampBeforeCmdBuff, &m_timestampAfterCmdBuff }; + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampBeforeCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampAfterCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipeline + { + smart_refctd_ptr shader; + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = base.m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + // this time we load a shader directly from a file + auto key = nbl::this_example::builtin::build::get_spirv_key<"benchmark">(m_device.get()); + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + base.logFail("Could not load shader!"); + assert(0); + } + + // It would be super weird if loading a shader from a file produced more than 1 asset + assert(assets.size() == 1); + shader = IAsset::castDown(assets[0]); + } + + if (!shader) + base.logFail("Failed to load precompiled \"benchmark\" shader!\n"); + + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { + { + .binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + } + }; + smart_refctd_ptr dsLayout = base.m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + base.logFail("Failed to create a Descriptor Layout!\n"); + + SPushConstantRange pushConstantRanges[] = { + { + .stageFlags = ShaderStage::ESS_COMPUTE, + .offset = 0, + .size = sizeof(BenchmarkPushConstants) + } + }; + m_pplnLayout = base.m_device->createPipelineLayout(pushConstantRanges, smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + base.logFail("Failed to create a Pipeline Layout!\n"); + + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate the memory + { + constexpr size_t BufferSize = BENCHMARK_WORKGROUP_COUNT * BENCHMARK_WORKGROUP_DIMENSION_SIZE_X * + BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y * BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z * sizeof(uint32_t); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + smart_refctd_ptr dummyBuff = base.m_device->createBuffer(std::move(params)); + if (!dummyBuff) + base.logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + dummyBuff->setObjectDebugName("benchmark buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = dummyBuff->getMemoryReqs(); + + m_allocation = base.m_device->allocate(reqs, dummyBuff.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + base.logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(dummyBuff->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(dummyBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + base.m_device->updateDescriptorSets(writes, {}); + } + } + } + + IQueryPool::SCreationParams queryPoolCreationParams{}; + queryPoolCreationParams.queryType = IQueryPool::TYPE::TIMESTAMP; + queryPoolCreationParams.queryCount = 2; + queryPoolCreationParams.pipelineStatisticsFlags = IQueryPool::PIPELINE_STATISTICS_FLAGS::NONE; + m_queryPool = m_device->createQueryPool(queryPoolCreationParams); + + m_computeQueue = m_device->getQueue(m_queueFamily, 0); + } + + void run() + { + m_logger->log("\n\nsampling benchmark result:", ILogger::ELL_PERFORMANCE); + m_logger->log("sampling benchmark, triangle solid angle result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_BENCHMARK_MODE::TRIANGLE_SOLID_ANGLE, SAMPLING_MODE_SOLID_ANGLE); + + m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_BENCHMARK_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE, SAMPLING_MODE_PROJECTED_SOLID_ANGLE); + } + + private: + void performBenchmark(SAMPLING_BENCHMARK_MODE mode, uint32_t solidAngleMode) + { + m_device->waitIdle(); + + recordTimestampQueryCmdBuffers(); + + uint64_t semaphoreCounter = 0; + smart_refctd_ptr semaphore = m_device->createSemaphore(semaphoreCounter); + + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + IQueue::SSubmitInfo::SSemaphoreInfo waits[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT } }; + + IQueue::SSubmitInfo beforeTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = { {.cmdbuf = m_timestampBeforeCmdBuff.get()} }; + beforeTimestapSubmitInfo[0].commandBuffers = cmdbufsBegin; + beforeTimestapSubmitInfo[0].signalSemaphores = signals; + beforeTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo afterTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsEnd[] = { {.cmdbuf = m_timestampAfterCmdBuff.get()} }; + afterTimestapSubmitInfo[0].commandBuffers = cmdbufsEnd; + afterTimestapSubmitInfo[0].signalSemaphores = signals; + afterTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo benchmarkSubmitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + benchmarkSubmitInfos[0].commandBuffers = cmdbufs; + benchmarkSubmitInfos[0].signalSemaphores = signals; + benchmarkSubmitInfos[0].waitSemaphores = waits; + + + m_pushConstants.benchmarkMode = mode; + m_pushConstants.samplingMode = solidAngleMode; + m_pushConstants.modelMatrix = float32_t3x4(transpose(m_visualizer->interface.m_OBBModelMatrix)); + recordCmdBuff(); + + // warmup runs + for (int i = 0; i < WarmupIterations; ++i) + { + if (i == 0) + m_api->startCapture(); + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + if (i == 0) + m_api->endCapture(); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(beforeTimestapSubmitInfo); + + // actual benchmark runs + for (int i = 0; i < Iterations; ++i) + { + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(afterTimestapSubmitInfo); + + m_device->waitIdle(); + + const uint64_t nativeBenchmarkTimeElapsedNanoseconds = calcTimeElapsed(); + const float nativeBenchmarkTimeElapsedSeconds = double(nativeBenchmarkTimeElapsedNanoseconds) / 1000000000.0; + + m_logger->log("%llu ns, %f s", ILogger::ELL_PERFORMANCE, nativeBenchmarkTimeElapsedNanoseconds, nativeBenchmarkTimeElapsedSeconds); + } + + void recordCmdBuff() + { + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::SIMULTANEOUS_USE_BIT); + m_cmdbuf->beginDebugMarker("sampling compute dispatch", vectorSIMDf(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->pushConstants(m_pplnLayout.get(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(BenchmarkPushConstants), &m_pushConstants); + m_cmdbuf->dispatch(BENCHMARK_WORKGROUP_COUNT, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + } + + void recordTimestampQueryCmdBuffers() + { + static bool firstInvocation = true; + + if (!firstInvocation) + { + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + } + + m_timestampBeforeCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampBeforeCmdBuff->resetQueryPool(m_queryPool.get(), 0, 2); + m_timestampBeforeCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 0); + m_timestampBeforeCmdBuff->end(); + + m_timestampAfterCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampAfterCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 1); + m_timestampAfterCmdBuff->end(); + + firstInvocation = false; + } + + uint64_t calcTimeElapsed() + { + uint64_t timestamps[2]; + const core::bitflag flags = core::bitflag(IQueryPool::RESULTS_FLAGS::_64_BIT) | core::bitflag(IQueryPool::RESULTS_FLAGS::WAIT_BIT); + m_device->getQueryPoolResults(m_queryPool.get(), 0, 2, ×tamps, sizeof(uint64_t), flags); + return timestamps[1] - timestamps[0]; + } + + private: + core::smart_refctd_ptr m_api; + smart_refctd_ptr m_device; + smart_refctd_ptr m_logger; + SolidAngleVisualizer* m_visualizer; + + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_cmdpool = nullptr; + smart_refctd_ptr m_cmdbuf = nullptr; + smart_refctd_ptr m_ds = nullptr; + smart_refctd_ptr m_pplnLayout = nullptr; + BenchmarkPushConstants m_pushConstants; + smart_refctd_ptr m_pipeline; + + smart_refctd_ptr m_timestampBeforeCmdBuff = nullptr; + smart_refctd_ptr m_timestampAfterCmdBuff = nullptr; + smart_refctd_ptr m_queryPool = nullptr; + + uint32_t m_queueFamily; + IQueue* m_computeQueue; + static constexpr int WarmupIterations = 50; + static constexpr int Iterations = 1; + }; + + template + inline bool logFail(const char* msg, Args&&... args) + { + m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); + return false; + } + + std::ofstream m_logFile; }; + NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file From 18a3a07667c226bdc7c6b5df68b651c634fd81ac Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 12:07:31 +0700 Subject: [PATCH 145/219] slerp test --- 59_QuaternionTests/CQuaternionTester.h | 14 ++++++++------ 59_QuaternionTests/app_resources/common.hlsl | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index c6a4707c6..db39b8936 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -33,7 +33,9 @@ class CQuaternionTester final : public ITester::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + //testInput.quat0 = hlsl::normalize(testInput.quat0); testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + //testInput.quat1 = hlsl::normalize(testInput.quat1); testInput.pitch = realDistribution(getRandomEngine()); testInput.yaw = realDistribution(getRandomEngine()); testInput.roll = realDistribution(getRandomEngine()); @@ -77,11 +79,11 @@ class CQuaternionTester final : public ITester quatFromEulerAngles; float32_t3x3 rotationMat; math::quaternion quatMult; - math::quaternion quatLerp; + math::quaternion quatSlerp; }; struct QuaternionTestExecutor @@ -70,7 +70,7 @@ struct QuaternionTestExecutor output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); output.rotationMat = input.quat0.constructMatrix(); output.quatMult = input.quat0 * input.quat1; - output.quatLerp = math::quaternion::unnormLerp(input.quat0, input.quat1, input.factor); + output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.factor); } }; From c670bc3d0a7175de636027ffbb916dfa9b9d7247 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 15:26:43 +0700 Subject: [PATCH 146/219] added create from rot mat test --- 59_QuaternionTests/CQuaternionTester.h | 21 +++++++++++++++++--- 59_QuaternionTests/app_resources/common.hlsl | 5 +++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index db39b8936..5f245c979 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -6,6 +6,7 @@ #include #define GLM_ENABLE_EXPERIMENTAL #include +#include #include "nbl/examples/examples.hpp" #include "app_resources/common.hlsl" @@ -33,13 +34,13 @@ class CQuaternionTester final : public ITester::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); - //testInput.quat0 = hlsl::normalize(testInput.quat0); testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); - //testInput.quat1 = hlsl::normalize(testInput.quat1); testInput.pitch = realDistribution(getRandomEngine()); testInput.yaw = realDistribution(getRandomEngine()); testInput.roll = realDistribution(getRandomEngine()); + testInput.rotationMat = float32_t3x3(glm::rotate(realDistribution(getRandomEngine()), float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())))); testInput.factor = realDistribution(getRandomEngine()); + testInput.someVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); return testInput; } @@ -65,6 +66,17 @@ class CQuaternionTester final : public ITester quatFromAngleAxis; math::quaternion quatFromEulerAngles; + math::quaternion quatFromMat; float32_t3x3 rotationMat; math::quaternion quatMult; math::quaternion quatSlerp; + float32_t3 transformedVec; }; struct QuaternionTestExecutor @@ -68,9 +71,11 @@ struct QuaternionTestExecutor { output.quatFromAngleAxis = math::quaternion::create(input.axis, input.angle); output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); + output.quatFromMat = math::quaternion::create(input.rotationMat); output.rotationMat = input.quat0.constructMatrix(); output.quatMult = input.quat0 * input.quat1; output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.factor); + output.transformedVec = input.quat0.transformVector(input.someVec, true); } }; From 8c96b6cc9e882ed1af69bbad2d595d70f2d64f63 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 16:19:42 +0700 Subject: [PATCH 147/219] fix test value ranges --- 59_QuaternionTests/CQuaternionTester.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 5f245c979..2d1d300d5 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -1,7 +1,7 @@ #ifndef _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ #define _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ - +#define GLM_FORCE_RADIANS #include #include #define GLM_ENABLE_EXPERIMENTAL @@ -26,20 +26,19 @@ class CQuaternionTester final : public ITester realDistribution(-100.0f, 100.0f); - std::uniform_real_distribution realDistributionSmall(1.0f, 4.0f); - std::uniform_int_distribution intDistribution(-100, 100); - std::uniform_int_distribution coinFlipDistribution(0, 1); + std::uniform_real_distribution realDistribution01(0.0f, 1.0f); + std::uniform_real_distribution realDistributionRad(-numbers::pi, numbers::pi); QuaternionInputTestValues testInput; - testInput.axis = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); - testInput.angle = realDistribution(getRandomEngine()); + testInput.axis = hlsl::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.angle = realDistributionRad(getRandomEngine()); testInput.quat0 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); - testInput.pitch = realDistribution(getRandomEngine()); - testInput.yaw = realDistribution(getRandomEngine()); - testInput.roll = realDistribution(getRandomEngine()); - testInput.rotationMat = float32_t3x3(glm::rotate(realDistribution(getRandomEngine()), float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())))); - testInput.factor = realDistribution(getRandomEngine()); + testInput.pitch = realDistributionRad(getRandomEngine()); + testInput.yaw = realDistributionRad(getRandomEngine()); + testInput.roll = realDistributionRad(getRandomEngine()); + testInput.rotationMat = float32_t3x3(glm::rotate(realDistributionRad(getRandomEngine()), hlsl::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))))); + testInput.factor = realDistribution01(getRandomEngine()); testInput.someVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); return testInput; From e78ae9e2d8ff440ae5af40d47503535add2afb07 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 17:14:43 +0700 Subject: [PATCH 148/219] normalize quaternion test values --- 59_QuaternionTests/CQuaternionTester.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 2d1d300d5..dc836adef 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -25,7 +25,7 @@ class CQuaternionTester final : public ITester realDistribution(-100.0f, 100.0f); + std::uniform_real_distribution realDistribution(-1.0f, 1.0f); std::uniform_real_distribution realDistribution01(0.0f, 1.0f); std::uniform_real_distribution realDistributionRad(-numbers::pi, numbers::pi); @@ -33,7 +33,9 @@ class CQuaternionTester final : public ITester::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat0 = hlsl::normalize(testInput.quat0); testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat1 = hlsl::normalize(testInput.quat1); testInput.pitch = realDistributionRad(getRandomEngine()); testInput.yaw = realDistributionRad(getRandomEngine()); testInput.roll = realDistributionRad(getRandomEngine()); From 194df8aa43f5d753da25fecdff75f72af10d2d96 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 7 Jan 2026 15:08:59 +0100 Subject: [PATCH 149/219] Created ZIP archive loader test --- 73_ZipArchiveLoaderTest/CMakeLists.txt | 24 ++++++++ 73_ZipArchiveLoaderTest/config.json.template | 28 +++++++++ 73_ZipArchiveLoaderTest/main.cpp | 62 ++++++++++++++++++++ CMakeLists.txt | 1 + 4 files changed, 115 insertions(+) create mode 100644 73_ZipArchiveLoaderTest/CMakeLists.txt create mode 100644 73_ZipArchiveLoaderTest/config.json.template create mode 100644 73_ZipArchiveLoaderTest/main.cpp diff --git a/73_ZipArchiveLoaderTest/CMakeLists.txt b/73_ZipArchiveLoaderTest/CMakeLists.txt new file mode 100644 index 000000000..b7e52875d --- /dev/null +++ b/73_ZipArchiveLoaderTest/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/73_ZipArchiveLoaderTest/config.json.template b/73_ZipArchiveLoaderTest/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/73_ZipArchiveLoaderTest/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/73_ZipArchiveLoaderTest/main.cpp b/73_ZipArchiveLoaderTest/main.cpp new file mode 100644 index 000000000..a15ba99f2 --- /dev/null +++ b/73_ZipArchiveLoaderTest/main.cpp @@ -0,0 +1,62 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::examples; + +class ZipArchiveLoaderTest final : public BuiltinResourcesApplication +{ + using asset_base_t = BuiltinResourcesApplication; + +public: + ZipArchiveLoaderTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + const std::filesystem::path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; + auto archive = m_system->openFileArchive(zipPath); + + auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); + + std::stringstream ss; + for (const auto& file : archiveFiles) + { + ss << "ID: " << file.ID; + ss << " offset: " << file.offset; + ss << " path relative od archive: " << file.pathRelativeToArchive; + ss << " size: " << file.size << '\n'; + } + + m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); + + // TODO: test GZIP files and ZIP files with AES encryption + + return true; + } + + void onAppTerminated_impl() override + { + } + + void workLoopBody() override + { + } + + bool keepRunning() override + { + return false; + } +}; + +NBL_MAIN_FUNC(ZipArchiveLoaderTest) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b82f37f..d7b7ba055 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,6 +97,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(73_ZipArchiveLoaderTest) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 6df2ffb548b43a8c69702543cc4a949efe5bc09a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 7 Jan 2026 16:23:46 +0100 Subject: [PATCH 150/219] Changed number of the ZIP archive loader test example --- .../CMakeLists.txt | 0 .../config.json.template | 0 .../main.cpp | 26 ++++++++++--------- CMakeLists.txt | 2 +- 4 files changed, 15 insertions(+), 13 deletions(-) rename {73_ZipArchiveLoaderTest => 16_ZipArchiveLoaderTest}/CMakeLists.txt (100%) rename {73_ZipArchiveLoaderTest => 16_ZipArchiveLoaderTest}/config.json.template (100%) rename {73_ZipArchiveLoaderTest => 16_ZipArchiveLoaderTest}/main.cpp (66%) diff --git a/73_ZipArchiveLoaderTest/CMakeLists.txt b/16_ZipArchiveLoaderTest/CMakeLists.txt similarity index 100% rename from 73_ZipArchiveLoaderTest/CMakeLists.txt rename to 16_ZipArchiveLoaderTest/CMakeLists.txt diff --git a/73_ZipArchiveLoaderTest/config.json.template b/16_ZipArchiveLoaderTest/config.json.template similarity index 100% rename from 73_ZipArchiveLoaderTest/config.json.template rename to 16_ZipArchiveLoaderTest/config.json.template diff --git a/73_ZipArchiveLoaderTest/main.cpp b/16_ZipArchiveLoaderTest/main.cpp similarity index 66% rename from 73_ZipArchiveLoaderTest/main.cpp rename to 16_ZipArchiveLoaderTest/main.cpp index a15ba99f2..ba5ed5b9c 100644 --- a/73_ZipArchiveLoaderTest/main.cpp +++ b/16_ZipArchiveLoaderTest/main.cpp @@ -24,21 +24,23 @@ class ZipArchiveLoaderTest final : public BuiltinResourcesApplication if (!asset_base_t::onAppInitialized(std::move(system))) return false; - const std::filesystem::path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; - auto archive = m_system->openFileArchive(zipPath); + { + const std::filesystem::path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; + auto archive = m_system->openFileArchive(zipPath); - auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); + auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); - std::stringstream ss; - for (const auto& file : archiveFiles) - { - ss << "ID: " << file.ID; - ss << " offset: " << file.offset; - ss << " path relative od archive: " << file.pathRelativeToArchive; - ss << " size: " << file.size << '\n'; - } + std::stringstream ss; + for (const auto& file : archiveFiles) + { + ss << "ID: " << file.ID; + ss << " offset: " << file.offset; + ss << " path relative od archive: " << file.pathRelativeToArchive; + ss << " size: " << file.size << '\n'; + } - m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); + m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); + } // TODO: test GZIP files and ZIP files with AES encryption diff --git a/CMakeLists.txt b/CMakeLists.txt index d7b7ba055..755cec5b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ if(NBL_BUILD_EXAMPLES) if (NBL_BUILD_MITSUBA_LOADER) add_subdirectory(15_MitsubaLoader) endif() + add_subdirectory(16_ZipArchiveLoaderTest) # Waiting for a refactor @@ -97,7 +98,6 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) - add_subdirectory(73_ZipArchiveLoaderTest) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 465c77aa7bd65717874627c816d83fe2cf546da0 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 7 Jan 2026 22:22:05 +0100 Subject: [PATCH 151/219] get the sensors loading more or less --- 40_PathTracer/include/io/CSceneLoader.h | 138 ++++++++++--- 40_PathTracer/src/io/CSceneLoader.cpp | 246 +++++++++++++++++++++++- 2 files changed, 346 insertions(+), 38 deletions(-) diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index 3d036ceaf..d8ba42f82 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -33,38 +33,65 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov }; static core::smart_refctd_ptr create(SCreationParams&& params); + // When outputFilePath isn't set in Film Element in Mitsuba, use this to find the extension string. + static inline std::string_view fileExtensionFromFormat(ext::MitsubaLoader::CElementFilm::FileFormat format) + { + using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; + switch (format) + { + case FileFormat::PNG: + return ".png"; + case FileFormat::OPENEXR: + return ".exr"; + case FileFormat::JPEG: + return ".jpg"; + default: + break; + } + return ""; + } + struct SLoadResult { struct SSensor { using type_e = ext::MitsubaLoader::CElementSensor::Type; - struct SConstants + inline SSensor() = default; + inline SSensor(const SSensor&) = default; + inline SSensor(SSensor&&) = default; + inline SSensor& operator=(const SSensor&) = default; + inline SSensor& operator=(SSensor&&) = default; + + inline operator bool() const { - struct DenoiserArgs - { - // where the FFT bloom kernel is - system::path bloomFilePath = {}; - float bloomScale = 0.0f; - float bloomIntensity = 0.0f; - std::string tonemapperArgs = ""; - }; + return bool(constants) && mutableDefaults.valid(constants) && bool(dynamicDefaults); + } + struct SConstants + { constexpr static inline uint32_t MaxWidth = 0x1u<<(sizeof(uint16_t)*8-2); constexpr static inline uint32_t MaxHeight = MaxWidth; constexpr static inline uint32_t MaxCascadeCount = 15; - system::path outputFilePath = {}; - DenoiserArgs denoiserInfo = {}; + inline operator bool() const + { + if (width <= 0 || width >= MaxWidth) + return false; + if (height <= 0 || height >= MaxHeight) + return false; + if (type != type_e::INVALID) + return false; + if (cascadeCount <= 0 || cascadeCount >= MaxCascadeCount) + return false; + return true; + } + + // where the FFT bloom kernel is + system::path bloomFilePath = {}; // uint32_t width = 0u; uint32_t height = 0u; - // do we need to keep the crops? - int32_t cropWidth = 0u; - int32_t cropHeight = 0u; - // could the offsets be dynamic ? - int32_t cropOffsetX = 0u; - int32_t cropOffsetY = 0u; // type_e type = type_e::INVALID; // @@ -76,7 +103,7 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov { constexpr static inline uint8_t MaxClipPlanes = 6; - inline uint8_t getClipPlaneCount() + inline uint8_t getClipPlaneCount() const { using namespace nbl::hlsl; for (uint8_t i=0; i clipPlanes = {}; + // denoiser and bloom require rendering with a "skirt" this controls the skirt size + int32_t cropWidth = 0u; + int32_t cropHeight = 0u; + int32_t cropOffsetX = 0u; + int32_t cropOffsetY = 0u; + // + float nearClip; + float farClip; // float cascadeLuminanceBase = core::nan(); float cascadeLuminanceStart = core::nan(); @@ -99,31 +148,55 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov // these can change without having to reset accumulations, etc. struct SDynamic { + // For a legacy `smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true)` constexpr static inline float DefaultRotateSpeed = 300.0f; constexpr static inline float DefaultZoomSpeed = 1.0f; constexpr static inline float DefaultMoveSpeed = 100.0f; - constexpr static inline float DefaultSceneDiagonal = 50.0f; // reference for default zoom and move speed; + constexpr static inline float DefaultSceneSize = 50.0f; // reference for default zoom and move speed; + // no constexpr std::pow + //constexpr static inline float DefaultZoomSpeedMultiplier = std::pow(DefaultSceneSize,DefaultZoomSpeed/DefaultSceneSize); - // - union Raygen - { - hlsl::float32_t4x4 linearProj = {}; - } raygen; - union + struct SPostProcess { - struct SOrientable // spherical can't move - { - hlsl::float32_t3 up = {}; - float speed = core::nan(); - } orientable = {}; + float bloomScale = 0.0f; + float bloomIntensity = 0.0f; + std::string tonemapperArgs = ""; }; + + // + inline operator bool() const + { + // TODO more checks + return !hlsl::isnan(moveSpeed); + } + + // members + system::path outputFilePath = {}; + SPostProcess postProc = {}; + // even though spherical can't rotate, the preview camera can + hlsl::float32_t3 up = {}; + float rotateSpeed = core::nan(); union { + /* + float linearStepZoomSpeed = sensorData.stepZoomSpeed; + if(core::isnan(sensorData.stepZoomSpeed)) + { + linearStepZoomSpeed = sceneDiagonal * (DefaultZoomSpeed / DefaultSceneDiagonal); + } + + // Set Zoom Multiplier + { + float logarithmicZoomSpeed = std::pow(sceneDiagonal, linearStepZoomSpeed / sceneDiagonal); + sensorData.stepZoomSpeed = logarithmicZoomSpeed; + sensorData.getInteractiveCameraAnimator()->setStepZoomMultiplier(logarithmicZoomSpeed); + */ struct SZoomable // spherical can't zoom { float speed = core::nan(); } zoomable = {}; }; + // float moveSpeed = core::nan(); // uint32_t samplesNeeded = 0u; @@ -165,4 +238,9 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov }; } + +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_LOADER_CPP_ +extern template struct nbl::system::impl::to_string_helper; +#endif + #endif diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index 8189de6ca..ef86ebc5c 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -1,11 +1,61 @@ // Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#define _NBL_THIS_EXAMPLE_C_SCENE_LOADER_CPP_ #include "io/CSceneLoader.h" #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#include "nlohmann/json.hpp" + + +// +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + public: + static inline std::string __call(const nbl::this_example::CSceneLoader::SLoadResult::SSensor& value) + { + nlohmann::json j; + j["valid"] = bool(value); + + auto& constants = j["constants"]; + { + auto& mutableDefaults = j["mutableDefaults"]; + const auto& _mutableDefaults = value.mutableDefaults; + { + auto& clipPlanes = mutableDefaults["clipPlanes"]; + for (uint8_t i=0,count=_mutableDefaults.getClipPlaneCount(); i SLoadResult { IAssetLoader::SAssetLoadParams params = {}; params.workingDirectory = _params.workingDirectory; - params.logger = m_params.logger.get().get(); + auto logger = params.logger = m_params.logger.get().get(); auto* const assMan = m_params.assMan.get(); // handle archive stuff @@ -73,13 +123,13 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult auto asset = assMan->getAsset(relPathStr,params); if (asset.getContents().empty()) { - m_params.logger.log( + logger.log( "Failed to Load Mitsuba scene from \"%s\" with working directory \"%s\"", ILogger::ELL_ERROR,relPathStr.c_str(),_params.workingDirectory.lexically_normal().string().c_str() ); return {}; } - m_params.logger.log("Loaded %s",ILogger::ELL_INFO,relPathStr.c_str()); + logger.log("Loaded %s",ILogger::ELL_INFO,relPathStr.c_str()); // now unmount the archives for (; !archiveStack.empty(); archiveStack.pop()) @@ -88,31 +138,211 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult const auto type = asset.getAssetType(); if (type!=IAsset::E_TYPE::ET_SCENE) { - m_params.logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); + logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); return {}; } const auto* const untypedMeta = asset.getMetadata(); if (!untypedMeta || strcmpi(untypedMeta->getLoaderName(),CMitsubaMetadata::LoaderName)!=0) { - m_params.logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); + logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); return {}; } const auto* const meta = static_cast(untypedMeta); + // TODO: compute/get this from minumum extent of scene + float sceneSize = 50.f; + // core::vector sensors; auto& _sensors = meta->m_global.m_sensors; if (_sensors.empty()) { - m_params.logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); + logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); return {}; } else { sensors.resize(_sensors.size()); - m_params.logger.log("Total number of Sensors = %d",ILogger::ELL_INFO,sensors.size()); - //for () // TODO: load the stuff + logger.log("Total number of Sensors = %d",ILogger::ELL_INFO,sensors.size()); + const bool shouldHaveSensorIdxInFileName = sensors.size()>1; + const auto mainFileName = relPath.filename(); + for (auto i=0; i::min) + { + logger.log("Sensor %s (%d-th in XML) has non invertible singular transformation!",ILogger::ELL_ERROR,id,i); + constants = {}; + continue; + } + constants.rightHandedCamera = det>0.f; + } + constants.cascadeCount = hlsl::max(film.cascadeCount,1); +#if 0 // move to raygen compute + const float aspectRatio = float(constants.width) / float(constants.height); + auto convertFromXFoV = [=](float xfov) -> float + { + float aspectX = tan(radians(xfov)*0.5f); + return degrees(atan(aspectX/aspectRatio)*2.f); + }; +#endif + } + { + auto& mutableDefaults = sensors[i].mutableDefaults; + mutableDefaults.absoluteTransform = absoluteTransform; + auto outClipPlane = mutableDefaults.clipPlanes.begin(); + for (auto i=0; i(0.f); + if (any(glsl::notEqual(plane,rhs))) + *(outClipPlane++) = plane; + } + // ignore crops for spherical cameras + if (!isSpherical) + { + mutableDefaults.cropWidth = film.cropWidth; + mutableDefaults.cropHeight = film.cropHeight; + mutableDefaults.cropOffsetX = film.cropOffsetX; + mutableDefaults.cropOffsetY = film.cropOffsetY; + } + // + mutableDefaults.nearClip = base.nearClip; + mutableDefaults.farClip = base.farClip; + // + mutableDefaults.cascadeLuminanceBase = film.cascadeLuminanceBase; + mutableDefaults.cascadeLuminanceStart = film.cascadeLuminanceStart; + } + { + using dyn_t = SLoadResult::SSensor::SDynamic; + dyn_t& dynamicDefaults = sensors[i].dynamicDefaults; + // output file settings + { + std::filesystem::path outputFilePath = film.outputFilePath; + // handle missing output path + if (outputFilePath.empty()) + { + const auto extensionStr = fileExtensionFromFormat(film.fileFormat); + core::string filename = "Render_" + mainFileName.stem().string(); + if(shouldHaveSensorIdxInFileName) + filename += "_Sensor_" + system::to_string(i) + extensionStr.data(); + else + filename += extensionStr; + logger.log("Sensor %s (%d-th in XML) has no output path, deduced to \"%s\"",ILogger::ELL_WARNING,id,i,filename.c_str()); + outputFilePath = filename; + } + std::string_view extension = ""; + bool invalid = false; + if (auto ext=outputFilePath.extension().string(); ext.size()>2) + { + extension = {ext.begin()+1,ext.end()}; + using format_e = ext::MitsubaLoader::CElementFilm::FileFormat; + switch (film.fileFormat) + { + case format_e::PNG: + invalid = strcmpi(extension.data(),"png")!=0; + break; + case format_e::OPENEXR: + invalid = strcmpi(extension.data(),"exr")!=0; + break; + case format_e::JPEG: + invalid = strcmpi(extension.data(),"jpg")!=0 && strcmpi(extension.data(),"jpe")!=0 && strcmpi(extension.data(),"jpeg")!=0 && + strcmpi(extension.data(),"jif")!=0 && strcmpi(extension.data(),"jfif")!=0 && strcmpi(extension.data(),"jfi")!=0; + break; + default: + break; + } + } + if (invalid) + { + logger.log("Sensor %s (%d-th in XML) has invalid format %d or extension \"%s\"",ILogger::ELL_ERROR,id,i,system::to_string(film.fileFormat),extension.data()); + dynamicDefaults = {}; + continue; + } + dynamicDefaults.outputFilePath = std::move(outputFilePath); +#if 0 // not part of the loader, do somewhere else + // + if (outputFilePath.is_relative()) + { + logger.log("Film output path is relative: \"%s\"",ILogger::ELL_INFO,outputFilePath.c_str()); + // output relative to output dir + // or the XML if so wished (walk backward and determine which directories are read only) + } +#endif + } + // post process + { + dynamicDefaults.postProc.bloomScale = film.denoiserBloomScale; + dynamicDefaults.postProc.bloomIntensity = film.denoiserBloomIntensity; + dynamicDefaults.postProc.tonemapperArgs = std::string(film.denoiserTonemapperArgs); + } + // rotate + { + dynamicDefaults.rotateSpeed = hlsl::isnan(base.rotateSpeed) ? base.rotateSpeed:dyn_t::DefaultRotateSpeed; + } + // move speed + { + if (hlsl::isnan(base.moveSpeed)) + { + dynamicDefaults.moveSpeed = sceneSize*(dyn_t::DefaultMoveSpeed/dyn_t::DefaultSceneSize); + logger.log("Sensor %s (%d-th in XML) Move Spped is NaN, deducing default from Scene Bounds",ILogger::ELL_WARNING,id,i); + } + else + dynamicDefaults.moveSpeed = base.moveSpeed; + logger.log("Sensor %s (%d-th in XML) move speed is %f",ILogger::ELL_INFO,id,i,dynamicDefaults.moveSpeed); + } + // ignore zoom for spherical cameras + if (!isSpherical) + { +#if 0 // TODO + // Deduce Move and Zoom Speeds if it is nan + { + float linearStepZoomSpeed = base.zoomSpeed; + if (hlsl::isnan(linearStepZoomSpeed)) + linearStepZoomSpeed = sceneSize * (dyn_t::DefaultZoomSpeed / dyn_t::DefaultSceneSize); + } + dynamicDefaults.zoomable.speed = ; +#endif + } + else if (!hlsl::isnan(base.zoomSpeed)) + logger.log("Sensor %s (%d-th in XML) is SPHERICAL, zoom speed gets ignored!",ILogger::ELL_WARNING,id,i); + dynamicDefaults.samplesNeeded = _sensor.sampler.sampleCount; + dynamicDefaults.kappa = constants.cascadeCount<2 ? 0.f:film.rfilter.kappa; + dynamicDefaults.Emin = film.rfilter.Emin; + if (film.envmapRegularizationFactor>0.f) + logger.log("Sensor %s (%d-th in XML) `envmapRegularizationFactor=%f` is deprecated and ignored, we do MIS now",ILogger::ELL_WARNING,id,i,film.envmapRegularizationFactor); + } + } + // log + for (auto i=0; i Date: Thu, 8 Jan 2026 12:07:17 +0700 Subject: [PATCH 152/219] Fix example to remove CDrawAABB::getTransformFromOBB --- 12_MeshLoaders/main.cpp | 12 +++++------- 73_GeometryInspector/main.cpp | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index a04ede3f4..57a2a50b6 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -499,17 +499,15 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc auto& obbInst = m_obbInstances[i]; const auto& cpuGeom = geometries[i].get(); - const auto obb = CPolygonGeometryManipulator::calculateOBB({ - .fetch = [geo = cpuGeom, &world4x4](size_t vertex_i) { + const auto obb = CPolygonGeometryManipulator::calculateOBB( + cpuGeom->getPositionView().getElementCount(), + [geo = cpuGeom, &world4x4](size_t vertex_i) { hlsl::float32_t3 pt; geo->getPositionView().decodeElement(vertex_i, pt); return pt; - }, - .size = cpuGeom->getPositionView().getElementCount(), - }); + }); obbInst.color = { 0, 0, 1, 1 }; - const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(obb); - obbInst.transform = math::linalg::promoted_mul(world4x4, obbTransform); + obbInst.transform = math::linalg::promoted_mul(world4x4, obb.transform); #endif } diff --git a/73_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp index 0ffcb6fa7..8a487d707 100644 --- a/73_GeometryInspector/main.cpp +++ b/73_GeometryInspector/main.cpp @@ -414,8 +414,7 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR } if (m_shouldDrawOBB) { - const auto obbTransform = ext::debug_draw::DrawAABB::getTransformFromOBB(meshInstance.obb); - debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, obbTransform), .color = float32_t4(0, 0, 1, 1)}); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, meshInstance.obb.transform), .color = float32_t4(0, 0, 1, 1)}); } m_bbRenderer->render({ cb, viewProjMatrix }, drawFinished, debugDrawInstances); @@ -672,14 +671,13 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR bound = hlsl::shapes::util::union_(transformed,bound); const auto& cpuGeom = geometries[i].get(); - const auto obb = CPolygonGeometryManipulator::calculateOBB({ - .fetch = [geo = cpuGeom](size_t vertex_i) { + const auto obb = CPolygonGeometryManipulator::calculateOBB( + cpuGeom->getPositionView().getElementCount(), + [geo = cpuGeom](size_t vertex_i) { hlsl::float32_t3 pt; geo->getPositionView().decodeElement(vertex_i, pt); return pt; - }, - .size = cpuGeom->getPositionView().getElementCount(), - }); + }); m_meshInstances.push_back({ .name = std::format("Mesh {}", i), .aabb = aabb, .obb = obb }); } From f387455d56c6ef75dd8700cc935477339eac0adf Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 8 Jan 2026 14:17:41 +0100 Subject: [PATCH 153/219] flesh out raygen for all sensor types TODOs: - make camera orientation always right handed? - ortho camera raygen - zoom speed --- 40_PathTracer/include/io/CSceneLoader.h | 48 +++++- 40_PathTracer/src/io/CSceneLoader.cpp | 209 ++++++++++++++++++++---- 2 files changed, 222 insertions(+), 35 deletions(-) diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index d8ba42f82..6e55bc66e 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -95,7 +95,6 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov // type_e type = type_e::INVALID; // - uint8_t rightHandedCamera : 1 = true; uint8_t cascadeCount : 4 = 1; } constants = {}; // these could theoretically change without recreating session resources @@ -120,16 +119,55 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov inline bool valid(const SConstants& cst) const { // TODO more checks - return getClipPlaneCount()0.f) + return Type::Persp; + if (encoded[0][0]<0.f) + return Type::Ortho; + return Type::Env; + } + + // for a raygen shader to transform the [0,1]^2 NDC coord into a ray (without tMin/tStart) + // PERSP `dir = normalize(float3(pseudo_mul(mat,ndc),rightHanded ? 1:(-1))); origin = dir*nearClip/abs(dir.z);` + // ORTHO `origin = float3(pseudo_mul(mat,ndc),rightHanded ? nearClip:(-nearClip)); dir = float32_t(0,0,rightHanded ? 1:(-1))` + inline explicit operator hlsl::float32_t2x3() const + { + auto retval = encoded; + for (auto c=0; c<2; c++) + { + const float flipCol = hlsl::sign(encoded[c][c]); + for (auto r=0; r<2; r++) + retval[r][c] *= flipCol; + } + return retval; + } + + // Whether Z+ or Z- is forward for the camera + inline bool isRightHanded() const {return encoded[1][1]>0.f;} + + private: + friend class CSceneLoader; + + hlsl::float32_t2x3 encoded = {}; } raygen; // std::array clipPlanes = {}; diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index ef86ebc5c..37c8e6364 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -4,6 +4,8 @@ #define _NBL_THIS_EXAMPLE_C_SCENE_LOADER_CPP_ #include "io/CSceneLoader.h" +#include "nbl/builtin/hlsl/testing/relative_approx_compare.hlsl" + #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" @@ -26,16 +28,26 @@ struct to_string_helper { auto& mutableDefaults = j["mutableDefaults"]; const auto& _mutableDefaults = value.mutableDefaults; + mutableDefaults["absoluteTransform"] = system::to_string(_mutableDefaults.absoluteTransform); { - auto& clipPlanes = mutableDefaults["clipPlanes"]; - for (uint8_t i=0,count=_mutableDefaults.getClipPlaneCount(); i SLoadResult using mts_sensor_t = ext::MitsubaLoader::CElementSensor; const auto& _sensor = _sensors[i]; const char* id = _sensor.id.c_str(); - auto absoluteTransform = float32_t3x4(_sensor.transform.matrix); const bool isSpherical = _sensor.type==mts_sensor_t::SPHERICAL; const auto& base = _sensor.base; const auto& film = _sensor.film; @@ -191,36 +202,157 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult constants.width = film.width; constants.height = film.height; } + constants.cascadeCount = hlsl::max(film.cascadeCount,1); + } + float32_t3x3 orientationT; + { + auto& mutableDefaults = sensors[i].mutableDefaults; + // absolute transform + float32_t3 scaleRcp; + bool leftHanded = false; { - const float det = determinant(float32_t3x3(absoluteTransform)); - if (hlsl::abs(det)::min) + auto absoluteTransform = float32_t3x4(_sensor.transform.matrix); { - logger.log("Sensor %s (%d-th in XML) has non invertible singular transformation!",ILogger::ELL_ERROR,id,i); - constants = {}; - continue; + orientationT = transpose(float32_t3x3(absoluteTransform)); + // check orthogonality + constexpr float DiffThresh = 0.00001f; + if (!testing::relativeApproxCompare(dot(orientationT[0],orientationT[1]),0.f,DiffThresh) || + !testing::relativeApproxCompare(dot(orientationT[0],orientationT[2]),0.f,DiffThresh) || + !testing::relativeApproxCompare(dot(orientationT[1],orientationT[2]),0.f,DiffThresh)) + { + logger.log("Sensor %s (%d-th in XML) has a transformation involving skew!",ILogger::ELL_ERROR,id,i); + constants = {}; + continue; + } + // check invertibility + const float det = determinant(orientationT); + if (hlsl::abs(det)::min) + { + logger.log("Sensor %s (%d-th in XML) has non invertible singular transformation!",ILogger::ELL_ERROR,id,i); + constants = {}; + continue; + } + leftHanded = det<0.f; + // extract and remove scale, also make the transform right-handed + { + scaleRcp = rsqrt({ + dot(orientationT[0],orientationT[0]), + dot(orientationT[1],orientationT[1]), + dot(orientationT[2],orientationT[2]) + }); + // + for (auto r=0; r<3; r++) + { + orientationT[r] *= scaleRcp[r]; + absoluteTransform[r].xyz *= scaleRcp; + } + } } - constants.rightHandedCamera = det>0.f; + mutableDefaults.absoluteTransform = absoluteTransform; } - constants.cascadeCount = hlsl::max(film.cascadeCount,1); -#if 0 // move to raygen compute - const float aspectRatio = float(constants.width) / float(constants.height); - auto convertFromXFoV = [=](float xfov) -> float + // raygen + auto& ndc = mutableDefaults.raygen.encoded; + switch (_sensor.type) { - float aspectX = tan(radians(xfov)*0.5f); - return degrees(atan(aspectX/aspectRatio)*2.f); - }; -#endif - } - { - auto& mutableDefaults = sensors[i].mutableDefaults; - mutableDefaults.absoluteTransform = absoluteTransform; + case mts_sensor_t::Type::THINLENS: + logger.log("Sensor %s (%d-th in XML) is THINLENS, Depth of Field not implemented yet, demoting to PERSPECTIVE!",ILogger::ELL_WARNING,id,i); + [[fallthrough]]; + case mts_sensor_t::Type::PERSPECTIVE: + { + const auto& persp = _sensor.perspective; + // calculations for the projection plane behind the aperture (or in-front if thinking virtual) + const float halfFoVRad = hlsl::radians(persp.fov)*0.5f; + const auto halfSize = hlsl::tan(halfFoVRad); + // by default FoV is y-axis + float halfHeight = halfSize; + float halfWidth = halfSize; + // + const float aspectRatio = float(constants.width)/float(constants.height); + using fov_axis_e = mts_sensor_t::PerspectivePinhole::FOVAxis; + switch (persp.fovAxis) + { + case fov_axis_e::X: + halfHeight /= aspectRatio; + break; + case fov_axis_e::Y: + halfWidth *= aspectRatio; + break; + case fov_axis_e::DIAGONAL: + { + // halfSize^2 == halfWidth^2+halfHeight^2 == (1+aspectRatio^2)*halfHeight^2 + halfHeight /= hlsl::sqrt(1.f+aspectRatio*aspectRatio); + halfWidth = halfHeight*aspectRatio; + } + break; + case fov_axis_e::SMALLER: + if (aspectRatio<1.f) + halfHeight /= aspectRatio; + else + halfWidth *= aspectRatio; + break; + case fov_axis_e::LARGER: + if (aspectRatio<1.f) + halfWidth *= aspectRatio; + else + halfHeight /= aspectRatio; + break; + default: + break; + } + // max 1/4 circle + if (!(halfWidth>0.f && halfHeight>0.f)) + { + ndc[0][0] = core::nan(); + logger.log("Sensor %s (%d-th in XML) had a Field of View of %f degrees!",ILogger::ELL_ERROR,id,i,persp.fov); + break; + } + // + ndc[0] = float32_t3(1.f,0.f,persp.shiftX)*halfWidth; + // column gets negated because in Vulkan NDC.y runs downwards + ndc[1] = -float32_t3(0.f,1.f,persp.shiftY)*halfHeight; + } + break; + case mts_sensor_t::Type::TELECENTRIC: + logger.log("Sensor %s (%d-th in XML) is TELECENTRIC, Depth of Field not implemented yet, demoting to ORTHOGRAPHIC!",ILogger::ELL_WARNING,id,i); + [[fallthrough]]; + case mts_sensor_t::Type::ORTHOGRAPHIC: + { + const auto& ortho = _sensor.orthographic; + // extract and negate the scale from the + } + break; + case mts_sensor_t::Type::SPHERICAL: + // irrelevant for spherical cameras, we send rays everywhere + ndc[0] = promote(0); + ndc[1] = promote(0); + break; + default: + ndc[0][0] = core::nan(); + break; + } + if (hlsl::isnan(ndc[0][0])) + { + logger.log("Sensor %s (%d-th in XML) has invalid projection, had type %s!",ILogger::ELL_ERROR,id,i,system::to_string(_sensor.type).c_str()); + constants = {}; + continue; + } + if (leftHanded) + ndc[1][1] *= -1.f; + // clip planes auto outClipPlane = mutableDefaults.clipPlanes.begin(); - for (auto i=0; i(0.f); if (any(glsl::notEqual(plane,rhs))) + { + if (outClipPlane>mutableDefaults.clipPlanes.end()) + { + logger.log("Sensor %s (%d-th in XML) has more than %d clip planes, ignoreing the rest!",ILogger::ELL_ERROR,id,i); + break; + } *(outClipPlane++) = plane; + } } // ignore crops for spherical cameras if (!isSpherical) @@ -300,10 +432,27 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult dynamicDefaults.postProc.bloomIntensity = film.denoiserBloomIntensity; dynamicDefaults.postProc.tonemapperArgs = std::string(film.denoiserTonemapperArgs); } - // rotate + // up vector { - dynamicDefaults.rotateSpeed = hlsl::isnan(base.rotateSpeed) ? base.rotateSpeed:dyn_t::DefaultRotateSpeed; + // true forward may be Z+ or Z- + const auto viewSpaceZ = orientationT[2]; + // our "right" will only be X+ if forward is Z- + const auto reconstructedRight = cross(base.up,viewSpaceZ); + const auto actualRight = cross(orientationT[1],viewSpaceZ); + // but it doesn't matter here for this check (both will be flipped, dot product identical) + const float dp = dot(reconstructedRight,actualRight); + const float pb = dot(base.up,viewSpaceZ); + // special formulation avoiding multiple sqrt and inversesqrt to preserve precision + const auto reconstructedLen = hlsl::length(reconstructedRight); + logger.log("Camera Reconstructed Up Vector match score = %f",system::ILogger::ELL_INFO,dp/reconstructedLen); + const float64_t threshold = 0.9996*hlsl::length(base.up); + if (testing::relativeApproxCompare(dp,reconstructedLen,0.03f) && hlsl::abs(pb) Date: Thu, 8 Jan 2026 16:33:58 +0100 Subject: [PATCH 154/219] Fixed example 22 --- 22_CppCompat/main.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 6a8e51cf2..aa840607d 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -97,7 +97,7 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); lp.workingDirectory = "app_resources"; // virtual root - auto key = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); + auto key = nbl::this_example::builtin::build::get_spirv_key<"test">(m_device.get()); auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) @@ -141,7 +141,6 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat return logFail("Failed to create compute pipeline!\n"); } - for (int i = 0; i < 2; ++i) { m_images[i] = m_device->createImage(IGPUImage::SCreationParams { From 6fa05211a58f2cbb61510e37e068a121890126d8 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 13 Jan 2026 15:08:57 +0700 Subject: [PATCH 155/219] refactor changes to quaternion usage --- 59_QuaternionTests/app_resources/common.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/59_QuaternionTests/app_resources/common.hlsl b/59_QuaternionTests/app_resources/common.hlsl index 6ec740207..db46b9ac9 100644 --- a/59_QuaternionTests/app_resources/common.hlsl +++ b/59_QuaternionTests/app_resources/common.hlsl @@ -72,7 +72,7 @@ struct QuaternionTestExecutor output.quatFromAngleAxis = math::quaternion::create(input.axis, input.angle); output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); output.quatFromMat = math::quaternion::create(input.rotationMat); - output.rotationMat = input.quat0.constructMatrix(); + output.rotationMat = _static_cast(input.quat0); output.quatMult = input.quat0 * input.quat1; output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.factor); output.transformedVec = input.quat0.transformVector(input.someVec, true); From 55afb97d734ac99a95c92da526244cd9990cd905 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 13 Jan 2026 21:43:07 +0100 Subject: [PATCH 156/219] improve 16_ZipArchiveLoaderTest, make CI friendly --- 16_ZipArchiveLoaderTest/CMakeLists.txt | 10 +- 16_ZipArchiveLoaderTest/main.cpp | 226 +++++++++++++++++++++++-- 2 files changed, 223 insertions(+), 13 deletions(-) diff --git a/16_ZipArchiveLoaderTest/CMakeLists.txt b/16_ZipArchiveLoaderTest/CMakeLists.txt index b7e52875d..f60757aad 100644 --- a/16_ZipArchiveLoaderTest/CMakeLists.txt +++ b/16_ZipArchiveLoaderTest/CMakeLists.txt @@ -21,4 +21,12 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +enable_testing() + +add_test(NAME NBL_ZIP_ARCHIVE_LOADER_TEST + COMMAND "$" + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS +) diff --git a/16_ZipArchiveLoaderTest/main.cpp b/16_ZipArchiveLoaderTest/main.cpp index ba5ed5b9c..ccdef7a06 100644 --- a/16_ZipArchiveLoaderTest/main.cpp +++ b/16_ZipArchiveLoaderTest/main.cpp @@ -4,6 +4,13 @@ #include +#include +#include +#include +#include +#include +#include + using namespace nbl; using namespace nbl::core; using namespace nbl::system; @@ -24,25 +31,220 @@ class ZipArchiveLoaderTest final : public BuiltinResourcesApplication if (!asset_base_t::onAppInitialized(std::move(system))) return false; + const path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; + auto archive = m_system->openFileArchive(zipPath); + if (!archive) + { + m_logger->log("Failed to open zip archive: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); + if (archiveFiles.empty()) + { + m_logger->log("Zip archive is empty: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const path scenePath = "scene.xml"; + auto sceneIt = std::find_if(archiveFiles.begin(), archiveFiles.end(), [&scenePath](const auto& entry) + { + return entry.pathRelativeToArchive == scenePath; + }); + if (sceneIt == archiveFiles.end()) + { + m_logger->log("Zip archive missing scene.xml: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + auto sceneFile = archive->getFile(scenePath, IFileBase::ECF_READ, ""); + if (!sceneFile) + { + m_logger->log("Failed to open scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + if (sceneIt->size == 0 || sceneFile->getSize() != sceneIt->size) { - const std::filesystem::path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; - auto archive = m_system->openFileArchive(zipPath); + m_logger->log("scene.xml size mismatch in zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } - auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); + const size_t probeSize = std::min(sceneIt->size, 64u); + std::array probe{}; + IFile::success_t probeRead; + sceneFile->read(probeRead, probe.data(), 0, probeSize); + if (!probeRead) + { + m_logger->log("Failed to read scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } - std::stringstream ss; - for (const auto& file : archiveFiles) + const std::string_view probeView(probe.data(), probeSize); + if (probeView.find("log("scene.xml header is unexpected in zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const size_t linesToPrint = 6u; + const char* mapped = static_cast(sceneFile->getMappedPointer()); + if (mapped) + { + std::vector headLines; + headLines.reserve(linesToPrint); + std::deque tailLines; + + size_t lineStart = 0; + for (size_t i = 0; i < sceneIt->size; ++i) { - ss << "ID: " << file.ID; - ss << " offset: " << file.offset; - ss << " path relative od archive: " << file.pathRelativeToArchive; - ss << " size: " << file.size << '\n'; + if (mapped[i] != '\n') + continue; + + size_t lineLen = i - lineStart; + if (lineLen && mapped[i - 1] == '\r') + --lineLen; + + const std::string_view line(mapped + lineStart, lineLen); + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(line); + lineStart = i + 1; + } + if (lineStart < sceneIt->size) + { + size_t lineLen = sceneIt->size - lineStart; + if (lineLen && mapped[sceneIt->size - 1] == '\r') + --lineLen; + const std::string_view line(mapped + lineStart, lineLen); + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(line); + } + + std::string head; + for (const auto& line : headLines) + { + head.append(line); + head.push_back('\n'); + } + std::string tail; + for (const auto& line : tailLines) + { + tail.append(line); + tail.push_back('\n'); } - m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); + m_logger->log("scene.xml head (%u lines):\n%s", ILogger::ELL_INFO, static_cast(headLines.size()), head.c_str()); + m_logger->log("scene.xml tail (%u lines):\n%s", ILogger::ELL_INFO, static_cast(tailLines.size()), tail.c_str()); + } + else + { + std::vector headLines; + headLines.reserve(linesToPrint); + std::deque tailLines; + std::string carry; + const size_t chunkSize = 64u * 1024u; + std::string buffer(chunkSize, '\0'); + size_t offset = 0; + while (offset < sceneIt->size) + { + const size_t toRead = std::min(chunkSize, sceneIt->size - offset); + IFile::success_t chunkRead; + sceneFile->read(chunkRead, buffer.data(), offset, toRead); + if (!chunkRead) + { + m_logger->log("Failed to read scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + size_t lineStart = 0; + for (size_t i = 0; i < toRead; ++i) + { + if (buffer[i] != '\n') + continue; + + size_t lineEnd = i; + if (lineEnd > lineStart && buffer[lineEnd - 1] == '\r') + --lineEnd; + + std::string line; + if (!carry.empty()) + { + line = carry; + if (lineEnd > lineStart) + line.append(buffer.data() + lineStart, lineEnd - lineStart); + if (!line.empty() && line.back() == '\r') + line.pop_back(); + carry.clear(); + } + else + { + line.assign(buffer.data() + lineStart, lineEnd - lineStart); + } + + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(std::move(line)); + lineStart = i + 1; + } + + if (lineStart < toRead) + { + const size_t tailSize = toRead - lineStart; + if (carry.empty()) + carry.assign(buffer.data() + lineStart, tailSize); + else + carry.append(buffer.data() + lineStart, tailSize); + } + + offset += toRead; + } + if (!carry.empty()) + { + if (!carry.empty() && carry.back() == '\r') + carry.pop_back(); + if (headLines.size() < linesToPrint) + headLines.push_back(carry); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(carry); + carry.clear(); + } + + std::string head; + for (const auto& line : headLines) + { + head.append(line); + head.push_back('\n'); + } + std::string tail; + for (const auto& line : tailLines) + { + tail.append(line); + tail.push_back('\n'); + } + + m_logger->log("scene.xml head (%u lines):\n%s", ILogger::ELL_INFO, static_cast(headLines.size()), head.c_str()); + m_logger->log("scene.xml tail (%u lines):\n%s", ILogger::ELL_INFO, static_cast(tailLines.size()), tail.c_str()); + } + + std::stringstream ss; + for (const auto& file : archiveFiles) + { + ss << "ID: " << file.ID; + ss << " offset: " << file.offset; + ss << " path relative od archive: " << file.pathRelativeToArchive; + ss << " size: " << file.size << '\n'; } - // TODO: test GZIP files and ZIP files with AES encryption + m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); return true; } @@ -61,4 +263,4 @@ class ZipArchiveLoaderTest final : public BuiltinResourcesApplication } }; -NBL_MAIN_FUNC(ZipArchiveLoaderTest) \ No newline at end of file +NBL_MAIN_FUNC(ZipArchiveLoaderTest) From a11816dc212510c18fcdd1d4a1a41e1f17edb849 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 14 Jan 2026 10:59:49 +0700 Subject: [PATCH 157/219] removed commented out and obsolete code --- 59_QuaternionTests/main.cpp | 716 +----------------------------------- 1 file changed, 2 insertions(+), 714 deletions(-) diff --git a/59_QuaternionTests/main.cpp b/59_QuaternionTests/main.cpp index 866356d4e..07f44b9d6 100644 --- a/59_QuaternionTests/main.cpp +++ b/59_QuaternionTests/main.cpp @@ -23,25 +23,6 @@ using namespace nbl::examples; //using namespace glm; -void cpu_tests(); - -struct S -{ - float32_t3 f; -}; - -struct T -{ - float32_t a; - float32_t3 b; - S c; - float32_t2x3 d; - float32_t2x3 e; - int f[3]; - float32_t2 g[2]; - float32_t4 h; -}; - class QuaternionTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; @@ -72,143 +53,6 @@ class QuaternionTest final : public application_templates::MonoDeviceApplication quaternionTester.setupPipeline(pplnSetupData); quaternionTester.performTestsAndVerifyResults("QuaternionTestLog.txt"); } - - //m_queue = m_device->getQueue(0, 0); - //m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - //m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); - - //smart_refctd_ptr shader; - //{ - // IAssetLoader::SAssetLoadParams lp = {}; - // lp.logger = m_logger.get(); - // lp.workingDirectory = "app_resources"; // virtual root - // auto key = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); - // auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - // const auto assets = assetBundle.getContents(); - // if (assets.empty()) - // return logFail("Could not load shader!"); - - // auto source = IAsset::castDown(assets[0]); - // // The down-cast should not fail! - // assert(source); - - // // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple - // shader = m_device->compileShader({ source.get() }); - // if (!shader) - // return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); - //} - - //const uint32_t bindingCount = 4u; - //IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {}; - //bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE; - //bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE; - //bindings[2].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; - //bindings[3].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; - // - // for(int i = 0; i < bindingCount; ++i) - // { - // bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; - // bindings[i].count = 1; - // bindings[i].binding = i; - // } - //m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - // { - // SPushConstantRange pcRange = {}; - // pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; - // pcRange.offset = 0u; - // pcRange.size = 2 * sizeof(uint32_t); - // auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); - // IGPUComputePipeline::SCreationParams params = {}; - // params.layout = layout.get(); - // params.shader.shader = shader.get(); - // params.shader.entryPoint = "main"; - // if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - // return logFail("Failed to create compute pipeline!\n"); - // } - - - // for (int i = 0; i < 2; ++i) - // { - // m_images[i] = m_device->createImage(IGPUImage::SCreationParams { - // { - // .type = IGPUImage::E_TYPE::ET_2D, - // .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, - // .format = E_FORMAT::EF_R32G32B32A32_SFLOAT, - // .extent = { 1920,1080,1 }, - // .mipLevels = 1, - // .arrayLayers = 1, - // .usage = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT - // | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT - // | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT, - // }, {}, IGPUImage::TILING::LINEAR, - // }); - - // auto reqs = m_images[i]->getMemoryReqs(); - // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - // m_device->allocate(reqs, m_images[i].get()); - - // m_imageViews[i] = m_device->createImageView(IGPUImageView::SCreationParams { - // .image = m_images[i], - // .viewType = IGPUImageView::E_TYPE::ET_2D, - // .format = E_FORMAT::EF_R32G32B32A32_SFLOAT, - // // .subresourceRange = { IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, 0, 1, 0, 1 }, - // }); - - // m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { - // {.size = reqs.size, .usage = - // IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | - // IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, - // } - // }); - - // reqs = m_buffers[i]->getMemoryReqs(); - // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); - // m_device->allocate(reqs, m_buffers[i].get()); - - // m_readbackBuffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { - // {.size = reqs.size, .usage = IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT } - // }); - - // reqs = m_readbackBuffers[i]->getMemoryReqs(); - // reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); - // m_device->allocate(reqs, m_readbackBuffers[i].get()); - // } - - // smart_refctd_ptr descriptorPool = nullptr; - // { - // IDescriptorPool::SCreateInfo createInfo = {}; - // createInfo.maxSets = 1; - // createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] = 2; - // createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 2; - // descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); - // } - - // m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout)); - - - // IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {}; - // IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {}; - // - // for(int i = 0; i < bindingCount; ++i) - // { - // writeDescriptorSets[i].info = &descriptorInfos[i]; - // writeDescriptorSets[i].dstSet = m_descriptorSet.get(); - // writeDescriptorSets[i].binding = i; - // writeDescriptorSets[i].count = bindings[i].count; - - // if(i<2) - // { - // descriptorInfos[i].desc = m_imageViews[i]; - // descriptorInfos[i].info.image.imageLayout = IImage::LAYOUT::GENERAL; - // } - // else - // { - // descriptorInfos[i].desc = m_buffers[i-2]; - // descriptorInfos[i].info.buffer.size = ~0ull; - // } - // } - - // m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); // In contrast to fences, we just need one semaphore to rule all dispatches return true; @@ -219,565 +63,9 @@ class QuaternionTest final : public application_templates::MonoDeviceApplication m_device->waitIdle(); } - void workLoopBody() override - { - cpu_tests(); - - //constexpr auto StartedValue = 0; - - //smart_refctd_ptr progress = m_device->createSemaphore(StartedValue); - - //m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - //m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + void workLoopBody() override {} - - //IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t layoutTransBarriers[2] = { { - // .barrier = { - // .dep = { - // .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT, - // .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT, - // .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - // .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - // } - // }, - // .image = m_images[0].get(), - // .subresourceRange = { - // .aspectMask = IImage::EAF_COLOR_BIT, - // .baseMipLevel = 0u, - // .levelCount = 1u, - // .baseArrayLayer = 0u, - // .layerCount = 1u, - // }, - // .oldLayout = IImage::LAYOUT::UNDEFINED, - // .newLayout = IImage::LAYOUT::GENERAL - //} }; - //layoutTransBarriers[1] = layoutTransBarriers[0]; - //layoutTransBarriers[1].image = m_images[1].get(); - - //const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .imgBarriers = layoutTransBarriers }; - //m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); - // - - //const uint32_t pushConstants[2] = { 1920, 1080 }; - //const IGPUDescriptorSet* set = m_descriptorSet.get(); - //m_cmdbuf->bindComputePipeline(m_pipeline.get()); - //m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); - //m_cmdbuf->dispatch(240, 135, 1u); - //for (int i = 0; i < 2; ++i) - //{ - // layoutTransBarriers[i].barrier.dep = layoutTransBarriers[i].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); - // layoutTransBarriers[i].oldLayout = layoutTransBarriers[i].newLayout; - // layoutTransBarriers[i].newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; - //} - //m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); - - ////{ - //// constexpr auto FinishedValue1 = 42; - //// IQueue::SSubmitInfo submitInfos[1] = {}; - //// const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - //// submitInfos[0].commandBuffers = cmdbufs; - //// const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue1,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - //// submitInfos[0].signalSemaphores = signals; - //// m_api->startCapture(); - //// m_queue->submit(submitInfos); //Command buffer is NOT IN THE EXECUTABLE STATE - //// m_api->endCapture(); - //// const ISemaphore::SWaitInfo waitInfos[] = { { - //// .semaphore = progress.get(), - //// .value = FinishedValue1 - //// } }; - //// m_device->blockForSemaphores(waitInfos); - - ////} - //IImage::SBufferCopy copy = { - // .imageSubresource = { - // .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - // .mipLevel = 0, - // .baseArrayLayer = 0, - // .layerCount = 1, - // }, - // .imageExtent = {1920, 1080, 1}, - //}; - // - //bool succ = m_cmdbuf->copyImageToBuffer(m_images[0].get(), IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, m_readbackBuffers[0].get(), 1, ©); - //succ &= m_cmdbuf->copyImageToBuffer(m_images[1].get(), IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, m_readbackBuffers[1].get(), 1, ©); - //assert(succ); - //m_cmdbuf->end(); - - //{ - // constexpr auto FinishedValue = 69; - // IQueue::SSubmitInfo submitInfos[1] = {}; - // const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - // submitInfos[0].commandBuffers = cmdbufs; - // const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - // submitInfos[0].signalSemaphores = signals; - // m_api->startCapture(); - // m_queue->submit(submitInfos); - // m_api->endCapture(); - // const ISemaphore::SWaitInfo waitInfos[] = { { - // .semaphore = progress.get(), - // .value = FinishedValue - // } }; - // m_device->blockForSemaphores(waitInfos); - //} - - //using res = std::array, 1080>, 1920>; - //res* ptrs[4] = {}; - // - //static_assert(sizeof(res) == sizeof(float) * 4 * 1920 * 1080); - - //for (int i = 0; i < 4; ++i) - //{ - // auto mem = (i < 2 ? m_buffers[i] : m_readbackBuffers[i-2])->getBoundMemory(); - // assert(mem.memory->isMappable()); - // auto* ptr = mem.memory->map({ .offset = 0, .length = mem.memory->getAllocationSize() }); - // ptrs[i] = (res*)ptr; - //} - //res& buf = *ptrs[1]; - //res& img = *ptrs[3]; - - //std::cout << buf[0][0][0] << " " - // << buf[0][0][1] << " " - // << buf[0][0][2] << " " - // << buf[0][0][3] << " " - // << "\n"; - // - //const std::ios::fmtflags f(std::cout.flags()); - //std::cout << std::hex - // << std::bit_cast(buf[0][0][0]) << " " - // << std::bit_cast(buf[0][0][1]) << " " - // << std::bit_cast(buf[0][0][2]) << " " - // << std::bit_cast(buf[0][0][3]) << " " - // << "\n"; - //std::cout.flags(f); - - //if(buf[0][0][0] != -1.f) - //{ - // std::cout << "Shader tests failed\n"; - //} - - m_keepRunning = false; - } - - bool keepRunning() override - { - return m_keepRunning; - } - - -private: - smart_refctd_ptr m_pipeline = nullptr; - smart_refctd_ptr m_descriptorSetLayout; - smart_refctd_ptr m_descriptorSet; - - smart_refctd_ptr m_images[2]; - smart_refctd_ptr m_buffers[2]; - smart_refctd_ptr m_readbackBuffers[2]; - smart_refctd_ptr m_imageViews[2]; - smart_refctd_ptr m_cmdbuf = nullptr; - IQueue* m_queue; - smart_refctd_ptr m_commandPool; - uint64_t m_iteration = 0; - constexpr static inline uint64_t MaxIterations = 200; - - bool m_keepRunning = true; + bool keepRunning() override { return false; } }; -template -constexpr bool val(T a) -{ - return std::is_const_v; -} - -template -bool equal(T l, U r) -{ - static_assert(sizeof(T) == sizeof(U)); - return 0==memcmp(&l, &r, sizeof(T)); -} - - -bool almost_equal(float l, float r) -{ - return fabs(l - r) < std::numeric_limits::epsilon() * 1000; -} - -template -constexpr auto limits_var(T obj) -{ - if constexpr (std::is_function_v>) - return obj(); - else - return obj; -} - -template -T random(T lo, T hi) -{ - return (hi-lo)/RAND_MAX * rand() + lo; -} - NBL_MAIN_FUNC(QuaternionTest) - -void cpu_tests() -{ - float32_t3 a = float32_t3(1.0f, 2.0f, 3.0f); - float32_t3 b = float32_t3(2.0f, 3.0f, 4.0f); - b = a * 3.0f; - bool3 asdf = bool3(true, false, true); - pow(a, b); - - // TODO: later this whole test should be templated so we can check all `T` not just `float`, but for this we need `type_traits` - - // DO NOT EVER THINK TO CHANGE `using type1 = vector` to `using type1 = type` EVER! - static_assert(!std::is_same_v); - static_assert(!std::is_same_v); - static_assert(!std::is_same_v); - static_assert(!std::is_same_v); - //static_assert(!std::is_same_v,T>); - - // checking matrix memory layout - { - float32_t4x3 a; - float32_t3x4 b; - float32_t3 v; - float32_t4 u; - mul(a, b); - mul(b, a); - mul(a, v); - mul(v, b); - mul(u, a); - mul(b, u); - - float32_t4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - a - a; - b + b; - static_assert(std::is_same_v); - static_assert(std::is_same_v); - static_assert(std::is_same_v); - static_assert(std::is_same_v); - static_assert(std::is_same_v); - static_assert(std::is_same_v); - - } - - // making sure linear operators returns the correct type - - static_assert(std::is_same_v>); - static_assert(std::is_same_v>); - static_assert(std::is_same_v>); - static_assert(std::is_same_v>); - - // checking scalar packing - static_assert(offsetof(T, a) == 0); - static_assert(offsetof(T, b) == offsetof(T, a) + sizeof(T::a)); - static_assert(offsetof(T, c) == offsetof(T, b) + sizeof(T::b)); - static_assert(offsetof(T, d) == offsetof(T, c) + sizeof(T::c)); - static_assert(offsetof(T, e) == offsetof(T, d) + sizeof(T::d)); - static_assert(offsetof(T, f) == offsetof(T, e) + sizeof(T::e)); - static_assert(offsetof(T, g) == offsetof(T, f) + sizeof(T::f)); - static_assert(offsetof(T, h) == offsetof(T, g) + sizeof(T::g)); - - // use some functions - float32_t3 x; - float32_t2x3 y; - float32_t3x3 z; - //barycentric::reconstructBarycentrics(x, y); - //barycentric::reconstructBarycentrics(x, z); - - // color matrix tests: - //testColorMatrices(); - - // promote.hlsl tests: - // promote scalar to vector - //float32_t3 v0 = nbl::hlsl::promote(2.0f); - // promote scalar to matrix - //float32_t3x3 m0 = nbl::hlsl::promote(2.0f); - - // TODO?: promote vector to matrix - //glm::mat3 m1 = nbl::hlsl::promote(glm::vec3(1.0f, 2.0f, 3.0f)); - - // test vector comparison operators - { - /*float32_t3 a = float32_t3(1.0f, 2.0f, 3.0f); - float32_t3 b = float32_t3(0.5f, 0.5f, 0.5f); - assert(glm::all(a > b)); - assert(glm::all(b < a)); - - b = float32_t3(0.5f, 2.0f, 0.5f); - assert(glm::all(a >= b)); - assert(glm::all(b <= a));*/ - } - - // TODO[Przemek]: tests function output - float32_t3 ZERO_VEC = float32_t3(0.0f, 0.0f, 0.0f); - float32_t3 ONE_VEC = float32_t3(1.0f, 1.0f, 1.0f); - - // test functions from EOTF.hlsl - //assert(areVectorsEqual(colorspace::eotf::identity(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::impl_shared_2_4(ZERO_VEC, 0.5f), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::sRGB(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::Display_P3(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::DCI_P3_XYZ(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::SMPTE_170M(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::SMPTE_ST2084(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::HDR10_HLG(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::AdobeRGB(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::eotf::Gamma_2_2(ZERO_VEC), ZERO_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::ACEScc(ZERO_VEC), ZERO_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::ACEScct(ZERO_VEC), ZERO_VEC)); - - //assert(areVectorsEqual(colorspace::eotf::identity(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::impl_shared_2_4(ONE_VEC, 0.5f), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::sRGB(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::Display_P3(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::DCI_P3_XYZ(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::SMPTE_170M(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::SMPTE_ST2084(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::HDR10_HLG(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::AdobeRGB(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::eotf::Gamma_2_2(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::ACEScc(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::eotf::ACEScct(ONE_VEC), ONE_VEC)); - - //// test functions from OETF.hlsl - //assert(areVectorsEqual(colorspace::oetf::identity(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::impl_shared_2_4(ZERO_VEC, 0.5f), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::sRGB(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::Display_P3(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::DCI_P3_XYZ(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::SMPTE_170M(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::SMPTE_ST2084(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::HDR10_HLG(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::AdobeRGB(ZERO_VEC), ZERO_VEC)); - //assert(areVectorsEqual(colorspace::oetf::Gamma_2_2(ZERO_VEC), ZERO_VEC)); - ////assert(areVectorsEqual(colorspace::oetf::ACEScc(ZERO_VEC), ZERO_VEC)); - ////assert(areVectorsEqual(colorspace::oetf::ACEScct(ZERO_VEC), ZERO_VEC)); - - //assert(areVectorsEqual(colorspace::oetf::identity(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::impl_shared_2_4(ONE_VEC, 0.5f), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::sRGB(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::Display_P3(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::oetf::DCI_P3_XYZ(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::SMPTE_170M(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::SMPTE_ST2084(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::HDR10_HLG(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::AdobeRGB(ONE_VEC), ONE_VEC)); - //assert(areVectorsEqual(colorspace::oetf::Gamma_2_2(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::oetf::ACEScc(ONE_VEC), ONE_VEC)); - ////assert(areVectorsEqual(colorspace::oetf::ACEScct(ONE_VEC), ONE_VEC)); - - // xoroshiro64 tests - //constexpr uint32_t2 state = uint32_t2(12u, 34u); - //Xoroshiro64Star xoroshiro64Star = Xoroshiro64Star::construct(state); - //xoroshiro64Star(); - //Xoroshiro64StarStar xoroshiro64StarStar = Xoroshiro64StarStar::construct(state); - //xoroshiro64StarStar(); - - auto zero = cross(x,x); - auto lenX2 = dot(x,x); - //auto z_inv = inverse(z); //busted return type conversion - auto mid = nbl::hlsl::mix(x,x,float32_t3(0.5f)); - //auto w = transpose(y); //also busted - - - // half test - { - - float16_t MIN = 6.103515e-05F; - float16_t MAX = 65504.0F; - float16_t DENORM_MIN = 5.96046448e-08F; - uint16_t QUIET_NAN = 0x7FFF; - uint16_t SIGNALING_NAN = 0x7DFF; - -// TODO: reenable after port to OpenEXR 3.0 -// TODO: This whole test is wrong, the uint constants should be reinterpret casted into `float16_t` not static-casted -#if 0 // disabling test, because Imath 2.4.0 doesn't provide constexpr limits, which makes the specialization of `nbl::hlsl::numeric_limits` impossible - if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::min, nbl::hlsl::numeric_limits::min())) - { - std::cout << "numeric_limits::min does not match\n"; - } - if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::max, nbl::hlsl::numeric_limits::max())) - { - std::cout << "numeric_limits::max does not match\n"; - } - if(!equal((float16_t)nbl::hlsl::impl::numeric_limits::denorm_min, nbl::hlsl::numeric_limits::denorm_min())) - { - std::cout << "numeric_limits::denorm_min does not match\n"; - } - if(!equal(nbl::hlsl::impl::numeric_limits::quiet_NaN, nbl::hlsl::numeric_limits::quiet_NaN())) - { - std::cout << "numeric_limits::quiet_NaN does not match\n"; - } - if(!equal(nbl::hlsl::impl::numeric_limits::signaling_NaN, nbl::hlsl::numeric_limits::signaling_NaN())) - { - std::cout << "numeric_limits::signaling_NaN does not match\n"; - } -#endif - } - - auto test_type_limits = []() - { - using L = std::numeric_limits; - using R = nbl::hlsl::impl::numeric_limits; - - #define TEST_AND_LOG(var) \ - { \ - auto rhs = limits_var(R::var); \ - auto lhs = limits_var(L::var); \ - if(!equal(lhs, rhs)) \ - { \ - std::cout << typeid(T).name() << " " << #var << " does not match : " << double(lhs) << " - " << double(rhs) << "\n"; \ - } \ - } - - TEST_AND_LOG(is_specialized); - TEST_AND_LOG(is_signed); - TEST_AND_LOG(is_integer); - TEST_AND_LOG(is_exact); - TEST_AND_LOG(has_infinity); - TEST_AND_LOG(has_quiet_NaN); - TEST_AND_LOG(has_signaling_NaN); - TEST_AND_LOG(has_denorm); - TEST_AND_LOG(has_denorm_loss); - TEST_AND_LOG(round_style); - TEST_AND_LOG(is_iec559); - TEST_AND_LOG(is_bounded); - TEST_AND_LOG(is_modulo); - TEST_AND_LOG(digits); - TEST_AND_LOG(digits10); - TEST_AND_LOG(max_digits10); - TEST_AND_LOG(radix); - TEST_AND_LOG(min_exponent); - TEST_AND_LOG(min_exponent10); - TEST_AND_LOG(max_exponent); - TEST_AND_LOG(max_exponent10); - TEST_AND_LOG(traps); - TEST_AND_LOG(tinyness_before); - TEST_AND_LOG(min); - TEST_AND_LOG(max); - TEST_AND_LOG(lowest); - TEST_AND_LOG(epsilon); - TEST_AND_LOG(round_error); - TEST_AND_LOG(infinity); - TEST_AND_LOG(quiet_NaN); - TEST_AND_LOG(signaling_NaN); - TEST_AND_LOG(denorm_min); - #undef TEST_AND_LOG - }; - - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - test_type_limits.template operator()(); - - // countl_zero test - mpl::countl_zero::value; - // TODO: fix warning about nodiscard - std::countl_zero(5u); - nbl::hlsl::countl_zero(5u); - - // bit.hlsl test - /*nbl::hlsl::rotl(1u, 1u); - nbl::hlsl::rotr(1u, 1u);*/ - - - // cmath - - -#define PASS_VARS1 x0 -#define PASS_VARS2 x0,x1 -#define PASS_VARS3 x0,x1,x2 -#define PASS_VARS(count) PASS_VARS##count - - -#define ASSERT_EQ(fn) \ - if (!almost_equal(lhs, rhs)) \ - std::cout << #fn << " does not match " << lhs << " vs " << rhs << '\n'; - -#define INIT_VARS(T) \ - T x0 = random(T(-10000), T(10000)); \ - T x1 = random(T(1), T(1000)); \ - T x2 = random(T(1), T(1000)); \ - -#define TEST_CMATH(fn, varcount, T) \ - { INIT_VARS(T)\ - auto lhs = nbl::hlsl::fn(PASS_VARS(varcount)); \ - auto rhs = std::fn(PASS_VARS(varcount)); \ - ASSERT_EQ(fn); \ - } - -#define TEST_CMATHT(fn, out_type, varcount, T) \ - { INIT_VARS(T) \ - out_type o0, o1; \ - auto lhs = nbl::hlsl::fn(PASS_VARS(varcount),o0); \ - auto rhs = std::fn(PASS_VARS(varcount),&o1); \ - ASSERT_EQ(fn); \ - assert(almost_equal(o0,o1)); \ - } - -#ifndef DISABLE_TGMATH_TESTS -#define TEST_CMATH_FOR_TYPE(type) \ - TEST_CMATH(cos, 1, type) \ - TEST_CMATH(sin, 1, type) \ - TEST_CMATH(tan, 1, type) \ - TEST_CMATH(acos, 1, type) \ - TEST_CMATH(asin, 1, type) \ - TEST_CMATH(atan, 1, type) \ - TEST_CMATH(atan2, 2, type) \ - TEST_CMATH(cosh, 1, type) \ - TEST_CMATH(sinh, 1, type) \ - TEST_CMATH(tanh, 1, type) \ - TEST_CMATH(acosh, 1, type) \ - TEST_CMATH(asinh, 1, type) \ - TEST_CMATH(atanh, 1, type) \ - TEST_CMATH(exp, 1, type) \ - TEST_CMATHT(frexp, int, 1, type) \ - TEST_CMATH(ldexp, 2, type) \ - TEST_CMATH(log,1,type) \ - TEST_CMATH(log10,1,type) \ - TEST_CMATHT(modf, type, 1, type) \ - TEST_CMATH(exp2, 1, type) \ - TEST_CMATH(log2, 1, type) \ - TEST_CMATH(logb, 1, type) \ - TEST_CMATH(expm1, 1, type) \ - TEST_CMATH(log1p, 1, type) \ - TEST_CMATH(ilogb, 1, type) \ - TEST_CMATH(scalbn, 2, type) \ - TEST_CMATH(pow, 2, type) \ - TEST_CMATH(sqrt, 1, type) \ - TEST_CMATH(cbrt, 1, type) \ - TEST_CMATH(hypot, 2, type) \ - TEST_CMATH(copysign, 2, type) \ - TEST_CMATH(erf, 1, type) \ - TEST_CMATH(erfc, 1, type) \ - TEST_CMATH(tgamma, 1, type) \ - TEST_CMATH(lgamma, 1, type) \ - TEST_CMATH(ceil, 1, type) \ - TEST_CMATH(floor, 1, type) \ - TEST_CMATH(fmod, 2, type) \ - TEST_CMATH(trunc, 1, type) \ - TEST_CMATH(round, 1, type) \ - TEST_CMATH(rint, 1, type) \ - TEST_CMATH(nearbyint, 1, type) \ - TEST_CMATHT(remquo, int, 2, type) \ - TEST_CMATH(remainder, 2, type) \ - TEST_CMATH(abs, 1, type) \ - TEST_CMATH(fabs, 1, type) \ - TEST_CMATH(fma, 3, type) \ - TEST_CMATH(fmax, 2, type) \ - TEST_CMATH(fmin, 2, type) \ - TEST_CMATH(fdim, 2, type) \ - - - //TEST_CMATH_FOR_TYPE(float32_t) - //TEST_CMATH_FOR_TYPE(float64_t) -#endif - std::cout << "cpu tests done\n"; -} From bf95ca41a9c0436a91bc90641f39f0aee62ee5c9 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 14 Jan 2026 12:33:20 +0700 Subject: [PATCH 158/219] added more tests for scaled quaternions --- 59_QuaternionTests/CQuaternionTester.h | 50 ++++++++++++++++++- 59_QuaternionTests/app_resources/common.hlsl | 51 +++++++------------- 2 files changed, 65 insertions(+), 36 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index dc836adef..fe19415e3 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -36,11 +36,22 @@ class CQuaternionTester final : public ITester::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); testInput.quat1 = hlsl::normalize(testInput.quat1); + testInput.quat2 = testInput.quat0 * realDistribution(getRandomEngine()) * 1000.f; + testInput.quat3 = testInput.quat1 * realDistribution(getRandomEngine()) * 1000.f; testInput.pitch = realDistributionRad(getRandomEngine()); testInput.yaw = realDistributionRad(getRandomEngine()); testInput.roll = realDistributionRad(getRandomEngine()); testInput.rotationMat = float32_t3x3(glm::rotate(realDistributionRad(getRandomEngine()), hlsl::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))))); - testInput.factor = realDistribution01(getRandomEngine()); + testInput.scaleFactor = realDistribution01(getRandomEngine()) * 1000.f; + + glm::mat4 scaleRotationMat = glm::mat4(1); + scaleRotationMat[0] = float32_t4(testInput.rotationMat[0], 0); + scaleRotationMat[1] = float32_t4(testInput.rotationMat[1], 0); + scaleRotationMat[2] = float32_t4(testInput.rotationMat[2], 0); + scaleRotationMat = scaleRotationMat * glm::scale(float32_t3(testInput.scaleFactor)); + testInput.scaleRotationMat = float32_t3x3(scaleRotationMat); + + testInput.interpolationFactor = realDistribution01(getRandomEngine()); testInput.someVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); return testInput; @@ -50,6 +61,8 @@ class CQuaternionTester final : public ITester::type a = 0; } - -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include - -#include - - -#include -#include -#include - -#include -#include - #include using namespace nbl::hlsl; @@ -44,13 +12,17 @@ struct QuaternionInputTestValues { math::quaternion quat0; math::quaternion quat1; + math::quaternion quat2; + math::quaternion quat3; float32_t3 axis; float angle; float pitch; float yaw; float roll; float32_t3x3 rotationMat; - float factor; + float scaleFactor; + float32_t3x3 scaleRotationMat; + float interpolationFactor; float32_t3 someVec; }; @@ -59,9 +31,13 @@ struct QuaternionTestValues math::quaternion quatFromAngleAxis; math::quaternion quatFromEulerAngles; math::quaternion quatFromMat; + math::quaternion quatFromScaledMat; float32_t3x3 rotationMat; + float32_t3x3 scaleRotationMat; math::quaternion quatMult; math::quaternion quatSlerp; + math::quaternion quatFlerp; + math::quaternion quatScaledMult; float32_t3 transformedVec; }; @@ -72,10 +48,17 @@ struct QuaternionTestExecutor output.quatFromAngleAxis = math::quaternion::create(input.axis, input.angle); output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); output.quatFromMat = math::quaternion::create(input.rotationMat); + output.quatFromScaledMat = math::quaternion::create(input.scaleRotationMat); + output.rotationMat = _static_cast(input.quat0); + output.scaleRotationMat = _static_cast(input.quat2); + output.quatMult = input.quat0 * input.quat1; - output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.factor); + output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.interpolationFactor); + output.quatFlerp = math::quaternion::flerp(input.quat0, input.quat1, input.interpolationFactor); output.transformedVec = input.quat0.transformVector(input.someVec, true); + + output.quatScaledMult = input.quat2 * input.quat3; } }; From 72e2105de0c5b707ee92f4255f0cb280641de399 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 14 Jan 2026 12:35:14 +0700 Subject: [PATCH 159/219] removed redundant lines --- 59_QuaternionTests/CQuaternionTester.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index fe19415e3..4424d7609 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -90,17 +90,6 @@ class CQuaternionTester final : public ITester Date: Wed, 14 Jan 2026 19:34:23 +0100 Subject: [PATCH 160/219] link mitsuba to 73 --- 73_GeometryInspector/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/73_GeometryInspector/CMakeLists.txt b/73_GeometryInspector/CMakeLists.txt index 57e32dd63..8eed20a70 100644 --- a/73_GeometryInspector/CMakeLists.txt +++ b/73_GeometryInspector/CMakeLists.txt @@ -6,12 +6,14 @@ endif() if(NBL_BUILD_IMGUI AND NBL_BUILD_DEBUG_DRAW) set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" ) list(APPEND NBL_LIBRARIES imtestengine imguizmo "${NBL_EXT_IMGUI_UI_LIB}" + "${NBL_EXT_MITSUBA_LOADER_LIB}" ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") From 9795b5d3a35621126247076333f9ab17406e3ff3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 14 Jan 2026 19:35:47 +0100 Subject: [PATCH 161/219] one more correction --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 204b1b1df..5825f80eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,10 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) - add_subdirectory(73_GeometryInspector) + + if (NBL_BUILD_MITSUBA_LOADER) + add_subdirectory(73_GeometryInspector) + endif() # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From b475d2056e77a73fd3d46154186052b2ecee8d87 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 15 Jan 2026 11:32:48 +0700 Subject: [PATCH 162/219] fix how quat from scaled mat is created --- 59_QuaternionTests/CQuaternionTester.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 4424d7609..39f71f623 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -85,13 +85,23 @@ class CQuaternionTester final : public ITester Date: Thu, 15 Jan 2026 14:34:23 +0700 Subject: [PATCH 163/219] fixes rotation-scale matrix calc on glm because quat_cast only takes rotation mat --- 59_QuaternionTests/CQuaternionTester.h | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 39f71f623..5ea0f7e29 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -44,12 +44,8 @@ class CQuaternionTester final : public ITester Date: Thu, 15 Jan 2026 17:26:33 +0700 Subject: [PATCH 164/219] test slerp and flerp quaternion values by orientation --- 59_QuaternionTests/CQuaternionTester.h | 22 ++++++++++++++++++-- common/include/nbl/examples/Tester/ITester.h | 8 +++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 5ea0f7e29..d667759f3 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -140,12 +140,30 @@ class CQuaternionTester final : public ITester + void verifyVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference, const bool testOrientation) + { + if (compareVectorTestValues(expectedVal, testVal, maxAllowedDifference, testOrientation)) + return; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + } + + template requires concepts::FloatingPointLikeVectorial + bool compareVectorTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference, const bool testOrientation) + { + if (testOrientation) + return nbl::hlsl::testing::orientationCompare(lhs, rhs, maxAllowedDifference); + return nbl::hlsl::testing::relativeApproxCompare(lhs, rhs, maxAllowedDifference); + } }; #endif diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 16572acdf..6a5438bdf 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -5,6 +5,7 @@ #include #include #include +#include using namespace nbl; @@ -313,6 +314,13 @@ class ITester if (compareTestValues(expectedVal, testVal, maxAllowedDifference)) return; + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + } + + template + void printTestFail(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType) + { std::stringstream ss; switch (testType) { From bafe60ab566867cae4096c3f34ccaa2443ae927b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 15:10:26 +0700 Subject: [PATCH 165/219] use orientation comparator for all quaternions, except scaled --- 59_QuaternionTests/CQuaternionTester.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index d667759f3..739d1c83b 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -131,15 +131,15 @@ class CQuaternionTester final : public ITester Date: Fri, 16 Jan 2026 09:48:35 +0100 Subject: [PATCH 166/219] only supply RH orientation matrix to a future camera, handle Left-handedness and orthogonal projection in loaded sensors --- 22_RaytracedAO/main.cpp | 408 +----------------------- 40_PathTracer/include/io/CSceneLoader.h | 24 +- 40_PathTracer/src/io/CSceneLoader.cpp | 30 +- 3 files changed, 46 insertions(+), 416 deletions(-) diff --git a/22_RaytracedAO/main.cpp b/22_RaytracedAO/main.cpp index bead404f7..2f80bfae8 100644 --- a/22_RaytracedAO/main.cpp +++ b/22_RaytracedAO/main.cpp @@ -169,7 +169,6 @@ int main(int argc, char** argv) } bool takeScreenShots = true; - std::string mainFileName; // std::filesystem::path(filePath).filename().string(); // DEVICE CREATION EMITTED @@ -300,36 +299,7 @@ int main(int argc, char** argv) struct SensorData { -int32_t width = 0u; -int32_t height = 0u; -int32_t cropWidth = 0u; -int32_t cropHeight = 0u; -int32_t cropOffsetX = 0u; -int32_t cropOffsetY = 0u; -bool rightHandedCamera = true; -uint32_t samplesNeeded = 0u; -float moveSpeed = core::nan(); -float stepZoomSpeed = core::nan(); -float rotateSpeed = core::nan(); - scene::ICameraSceneNode * staticCamera; - scene::ICameraSceneNode * interactiveCamera; -std::filesystem::path outputFilePath; -ext::MitsubaLoader::CElementSensor::Type type; - ext::MitsubaLoader::CElementFilm::FileFormat fileFormat; -Renderer::DenoiserArgs denoiserInfo = {}; -int32_t cascadeCount = 1; -float cascadeLuminanceBase = core::nan(); -float cascadeLuminanceStart = core::nan(); -float kappa = 0.f; -float Emin = 0.05f; - bool envmap = false; - float envmapRegFactor = 0.0f; -core::vector clipPlanes; - - scene::CSceneNodeAnimatorCameraModifiedMaya* getInteractiveCameraAnimator() - { - return reinterpret_cast(interactiveCamera->getAnimators()[0]); - } +// ... void resetInteractiveCamera() { @@ -347,242 +317,22 @@ core::vector clipPlanes; modifiedMayaAnim->setZoomAndRotationBasedOnTargetAndPosition(cameraPos, cameraTarget); } }; - -struct CubemapRender -{ - uint32_t sensorIdx = 0u; - uint32_t getSensorsBeginIdx() const { return sensorIdx; } - uint32_t getSensorsEndIdx() const { return sensorIdx + 5; } -}; - - auto smgr = device->getSceneManager(); - - // When outputFilePath isn't set in Film Element in Mitsuba, use this to find the extension string. - auto getFileExtensionFromFormat= [](ext::MitsubaLoader::CElementFilm::FileFormat format) -> std::string - { - std::string ret = ""; - using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; - switch (format) - { - case FileFormat::PNG: - ret = ".png"; - break; - case FileFormat::OPENEXR: - ret = ".exr"; - break; - case FileFormat::JPEG: - ret = ".jpg"; - break; - default: // TODO? - break; - } - return ret; - }; - - auto isFileExtensionCompatibleWithFormat = [](std::string extension, ext::MitsubaLoader::CElementFilm::FileFormat format) -> bool - { - if(extension.empty()) - return false; - if(extension[0] == '.') - extension = extension.substr(1, extension.size()); - // TODO: get the supported extensions from loaders(?) - using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; - switch (format) - { - case FileFormat::PNG: - return extension == "png"; - case FileFormat::OPENEXR: - return extension == "exr"; - case FileFormat::JPEG: - return extension == "jpg" || extension == "jpeg" || extension == "jpe" || extension == "jif" || extension == "jfif" || extension == "jfi"; - default: - return false; - } - }; - - const bool shouldHaveSensorIdxInFileName = globalMeta->m_global.m_sensors.size() > 1; - std::vector sensors; - std::vector cubemapRenders; +// ... auto extractAndAddToSensorData = [&](const ext::MitsubaLoader::CElementSensor& sensor, uint32_t idx) -> bool { SensorData mainSensorData = {}; - const auto& film = sensor.film; - mainSensorData.denoiserInfo.bloomFilePath = std::filesystem::path(film.denoiserBloomFilePath); - mainSensorData.denoiserInfo.bloomScale = film.denoiserBloomScale; - mainSensorData.denoiserInfo.bloomIntensity = film.denoiserBloomIntensity; - mainSensorData.denoiserInfo.tonemapperArgs = std::string(film.denoiserTonemapperArgs); - mainSensorData.fileFormat = film.fileFormat; - mainSensorData.cascadeCount = film.cascadeCount; - mainSensorData.cascadeLuminanceBase = film.cascadeLuminanceBase; - mainSensorData.cascadeLuminanceStart = film.cascadeLuminanceStart; - mainSensorData.kappa = mainSensorData.cascadeCount<2 ? 0.f:film.rfilter.kappa; - mainSensorData.Emin = film.rfilter.Emin; - mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.0f, 0.8f); - mainSensorData.outputFilePath = std::filesystem::path(film.outputFilePath); - // handle missing output path - if (mainSensorData.outputFilePath.empty()) - { - auto extensionStr = getFileExtensionFromFormat(mainSensorData.fileFormat); - if(shouldHaveSensorIdxInFileName) - mainSensorData.outputFilePath = std::filesystem::path("Render_" + mainFileName + "_Sensor_" + std::to_string(idx) + extensionStr); - else - mainSensorData.outputFilePath = std::filesystem::path("Render_" + mainFileName + extensionStr); - } - if(!isFileExtensionCompatibleWithFormat(mainSensorData.outputFilePath.extension().string(), mainSensorData.fileFormat)) - std::cout << "[ERROR] film.outputFilePath's extension is not compatible with film.fileFormat" << std::endl; - mainSensorData.samplesNeeded = sensor.sampler.sampleCount; - std::cout << "\t SamplesPerPixelNeeded = " << mainSensorData.samplesNeeded << std::endl; +// ... - const ext::MitsubaLoader::CElementSensor::PerspectivePinhole* persp = nullptr; - const ext::MitsubaLoader::CElementSensor::Orthographic* ortho = nullptr; - const ext::MitsubaLoader::CElementSensor::CameraBase* cameraBase = nullptr; - switch (sensor.type) - { - case ext::MitsubaLoader::CElementSensor::Type::PERSPECTIVE: - persp = &sensor.perspective; - cameraBase = persp; - std::cout << "\t Type = PERSPECTIVE" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::THINLENS: - persp = &sensor.thinlens; - cameraBase = persp; - std::cout << "\t Type = THINLENS" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::ORTHOGRAPHIC: - ortho = &sensor.orthographic; - cameraBase = ortho; - std::cout << "\t Type = ORTHOGRAPHIC" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::TELECENTRIC: - ortho = &sensor.telecentric; - cameraBase = ortho; - std::cout << "\t Type = TELECENTRIC" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::SPHERICAL: - cameraBase = &sensor.spherical; - std::cout << "\t Type = SPHERICAL" << std::endl; - break; - default: - std::cout << "\tSensor Type is not valid" << std::endl; - return false; - } - mainSensorData.type = sensor.type; - - for (auto i=0; iclipPlanes[i]; - if ((plane!=core::vectorSIMDf()).any()) - { - mainSensorData.clipPlanes.push_back(plane); - printf("Found Clip Plane %f,%f,%f,%f\n",plane[0],plane[1],plane[2],plane[3]); - } - } - mainSensorData.rotateSpeed = cameraBase->rotateSpeed; - mainSensorData.stepZoomSpeed = cameraBase->zoomSpeed; - mainSensorData.moveSpeed = cameraBase->moveSpeed; - - if(core::isnan(mainSensorData.rotateSpeed)) - { - mainSensorData.rotateSpeed = DefaultRotateSpeed; - std::cout << "\t Camera Rotate Speed = " << mainSensorData.rotateSpeed << " = [Default Value]" << std::endl; - } - else - std::cout << "\t Camera Rotate Speed = " << mainSensorData.rotateSpeed << std::endl; - - if(core::isnan(mainSensorData.stepZoomSpeed)) - std::cout << "\t Camera Step Zoom Speed [Linear] = " << "[Value will be deduced from Scene Bounds] " << std::endl; - else - std::cout << "\t Camera Step Zoom Speed [Linear] = " << mainSensorData.stepZoomSpeed << std::endl; - - if(core::isnan(mainSensorData.moveSpeed)) - std::cout << "\t Camera Move Speed = " << "[Value will be deduced from Scene Bounds] " << std::endl; - else - std::cout << "\t Camera Move Speed = " << mainSensorData.moveSpeed << std::endl; - - float defaultZoomSpeedMultiplier = std::pow(DefaultSceneDiagonal, DefaultZoomSpeed / DefaultSceneDiagonal); - mainSensorData.interactiveCamera = smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true); - - nbl::core::vectorSIMDf mainCamPos; - nbl::core::vectorSIMDf mainCamUp; - nbl::core::vectorSIMDf mainCamView; - // need to extract individual components from matrix to camera - { - auto relativeTransform = sensor.transform.matrix.extractSub3x4(); - if (applicationState.isInteractiveMode && (idx == applicationState.startSensorID) && (core::abs(applicationState.interactiveCameraViewMatrix.getPseudoDeterminant().x) > 1e-6f)) - { - if (!applicationState.interactiveCameraViewMatrix.getInverse(relativeTransform)) - printf("[ERROR]: Previously saved interactive camera's view matrix is not invertible.\n"); - - if (applicationState.isInteractiveViewMatrixLH) - { - // invert signs in the first col only - relativeTransform.rows[0].x *= -1.f; - relativeTransform.rows[1].x *= -1.f; - relativeTransform.rows[2].x *= -1.f; - } - else - { - // invert signs both in the first and third cols - relativeTransform.rows[0].x *= -1.f; - relativeTransform.rows[1].x *= -1.f; - relativeTransform.rows[2].x *= -1.f; - - relativeTransform.rows[0].z *= -1.f; - relativeTransform.rows[1].z *= -1.f; - relativeTransform.rows[2].z *= -1.f; - } - } - - if (relativeTransform.getPseudoDeterminant().x < 0.f) - mainSensorData.rightHandedCamera = false; - else - mainSensorData.rightHandedCamera = true; - - std::cout << "\t IsRightHanded=" << ((mainSensorData.rightHandedCamera) ? "TRUE" : "FALSE") << std::endl; - - mainCamPos = relativeTransform.getTranslation(); - - std::cout << "\t Camera Position = <" << mainCamPos.x << "," << mainCamPos.y << "," << mainCamPos.z << ">" << std::endl; - - auto tpose = core::transpose(core::matrix4SIMD(relativeTransform)); - mainCamUp = tpose.rows[1]; - mainCamView = tpose.rows[2]; - - std::cout << "\t Camera Reconstructed UpVector = <" << mainCamUp.x << "," << mainCamUp.y << "," << mainCamUp.z << ">" << std::endl; - std::cout << "\t Camera Reconstructed Forward = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; - } - - float realFoVDegrees; - auto width = film.cropWidth; - auto height = film.cropHeight; - - float aspectRatio = float(width) / float(height); - auto convertFromXFoV = [=](float fov) -> float - { - float aspectX = tan(core::radians(fov)*0.5f); - return core::degrees(atan(aspectX/aspectRatio)*2.f); - }; - - float nearClip = cameraBase->nearClip; - float farClip = cameraBase->farClip; - if(farClip > nearClip * 10'000.0f) - std::cout << "[WARN] Depth Range is too big: nearClip = " << nearClip << ", farClip = " << farClip << std::endl; if (mainSensorData.type == ext::MitsubaLoader::CElementSensor::Type::SPHERICAL) { - mainSensorData.width = film.width; - mainSensorData.height = film.height; - mainSensorData.cropWidth = film.cropWidth; - mainSensorData.cropHeight = film.cropHeight; - mainSensorData.cropOffsetX = film.cropOffsetX; - mainSensorData.cropOffsetY = film.cropOffsetY; - +#ifdef 0 // camera setup cubemap nbl::core::vectorSIMDf camViews[6] = { nbl::core::vectorSIMDf(-1, 0, 0, 0), // -X @@ -592,7 +342,7 @@ struct CubemapRender nbl::core::vectorSIMDf(0, 0, -1, 0), // -Z nbl::core::vectorSIMDf(0, 0, +1, 0), // +Z }; - + const nbl::core::vectorSIMDf upVectors[6] = { nbl::core::vectorSIMDf(0, +1, 0, 0), // +Y @@ -609,15 +359,6 @@ struct CubemapRender for(uint32_t i = 0; i < 6; ++i) { - SensorData cubemapFaceSensorData = mainSensorData; - cubemapFaceSensorData.envmap = true; - - if (mainSensorData.cropWidth != mainSensorData.cropHeight) - { - std::cout << "[ERROR] Cannot generate cubemap faces where film.cropWidth and film.cropHeight are not equal. (Aspect Ratio must be 1)" << std::endl; - assert(false); - } - // FIXME: suffix added after extension cubemapFaceSensorData.outputFilePath.replace_extension(); constexpr const char* suffixes[6] = @@ -631,128 +372,24 @@ struct CubemapRender }; cubemapFaceSensorData.outputFilePath += suffixes[i]; - cubemapFaceSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); - auto& staticCamera = cubemapFaceSensorData.staticCamera; - - const auto& camView = camViews[i]; - const auto& upVector = upVectors[i]; - - staticCamera->setPosition(mainCamPos.getAsVector3df()); - staticCamera->setTarget((mainCamPos + camView).getAsVector3df()); - staticCamera->setUpVector(upVector); + staticCamera->setTarget((mainCamPos + camViews[i]).getAsVector3df()); + staticCamera->setUpVector(upVectors[i]); const float w = float(cubemapFaceSensorData.width)/float(cubemapFaceSensorData.cropWidth); const float h = float(cubemapFaceSensorData.height)/float(cubemapFaceSensorData.cropHeight); - const auto fov = atanf(h)*2.f; - const auto aspectRatio = h/w; + const auto fov = 45 degree nondiag; + const auto aspectRatio = 1.f; if (mainSensorData.rightHandedCamera) staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(fov, aspectRatio, nearClip, farClip)); else staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(fov, aspectRatio, nearClip, farClip)); - - cubemapFaceSensorData.interactiveCamera = smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true); - cubemapFaceSensorData.resetInteractiveCamera(); - sensors.push_back(cubemapFaceSensorData); } +#endif } else { - mainSensorData.width = film.cropWidth; - mainSensorData.height = film.cropHeight; - - if(film.cropOffsetX != 0 || film.cropOffsetY != 0) - { - std::cout << "[WARN] CropOffsets are non-zero. cropping is not supported for non cubemap renders." << std::endl; - } - -#if 0 // camera setup non spherical - mainSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); - auto& staticCamera = mainSensorData.staticCamera; - - staticCamera->setPosition(mainCamPos.getAsVector3df()); - - { - auto target = mainCamView+mainCamPos; - std::cout << "\t Camera Target = <" << target.x << "," << target.y << "," << target.z << ">" << std::endl; - staticCamera->setTarget(target.getAsVector3df()); - } - - { - auto declaredUp = cameraBase->up; - auto reconstructedRight = core::cross(declaredUp,mainCamView); - auto actualRight = core::cross(mainCamUp,mainCamView); - // special formulation avoiding multiple sqrt and inversesqrt to preserve precision - const float dp = core::dot(reconstructedRight,actualRight).x/core::sqrt((core::dot(reconstructedRight,reconstructedRight)*core::dot(actualRight,actualRight)).x); - const float pb = core::dot(declaredUp,mainCamView).x/core::sqrt((core::dot(declaredUp,declaredUp)*core::dot(mainCamView,mainCamView)).x); - std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; - if (dp>0.97f && dp<1.03f && abs(pb)<0.9996f) - staticCamera->setUpVector(declaredUp); - else - staticCamera->setUpVector(mainCamUp); - } - - // - if (ortho) - { - const auto scale = sensor.transform.matrix.extractSub3x4().getScale(); - const float volumeX = 2.f*scale.x; - const float volumeY = (2.f/aspectRatio)*scale.y; - if (mainSensorData.rightHandedCamera) - staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixOrthoRH(volumeX, volumeY, nearClip, farClip)); - else - staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixOrthoLH(volumeX, volumeY, nearClip, farClip)); - } - else if (persp) - { - switch (persp->fovAxis) - { - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::X: - realFoVDegrees = convertFromXFoV(persp->fov); - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::Y: - realFoVDegrees = persp->fov; - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::DIAGONAL: - { - float aspectDiag = tan(core::radians(persp->fov)*0.5f); - float aspectY = aspectDiag/core::sqrt(1.f+aspectRatio*aspectRatio); - realFoVDegrees = core::degrees(atan(aspectY)*2.f); - } - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::SMALLER: - if (width < height) - realFoVDegrees = convertFromXFoV(persp->fov); - else - realFoVDegrees = persp->fov; - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::LARGER: - if (width < height) - realFoVDegrees = persp->fov; - else - realFoVDegrees = convertFromXFoV(persp->fov); - break; - default: - realFoVDegrees = NAN; - assert(false); - break; - } - core::matrix4SIMD projMat; - projMat.setTranslation(core::vectorSIMDf(persp->shiftX,-persp->shiftY,0.f,1.f)); - if (mainSensorData.rightHandedCamera) - projMat = core::concatenateBFollowedByA(projMat,core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(realFoVDegrees), aspectRatio, nearClip, farClip)); - else - projMat = core::concatenateBFollowedByA(projMat,core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(realFoVDegrees), aspectRatio, nearClip, farClip)); - staticCamera->setProjectionMatrix(projMat); - } - else - { - assert(false); - } -#endif - - mainSensorData.resetInteractiveCamera(); - sensors.push_back(mainSensorData); +// camera setup non spherical } return true; @@ -790,17 +427,7 @@ device->getAssetManager()->clearAllGPUObjects(); printf("[INFO] Sensor[%d] Camera Step Zoom Speed deduced from scene bounds = %f [Linear], %f [Logarithmic] \n", s, linearStepZoomSpeed, logarithmicZoomSpeed); } - if(core::isnan(sensorData.moveSpeed)) - { - float newMoveSpeed = DefaultMoveSpeed * (sceneDiagonal / DefaultSceneDiagonal); - sensorData.moveSpeed = newMoveSpeed; - sensorData.getInteractiveCameraAnimator()->setMoveSpeed(newMoveSpeed); - printf("[INFO] Sensor[%d] Camera Move Speed deduced from scene bounds = %f\n", s, newMoveSpeed); - } - assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getRotateSpeed())); - //assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getStepZoomSpeed())); - assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getMoveSpeed())); } core::SRange nonInteractiveSensors = { nullptr, nullptr }; @@ -817,18 +444,7 @@ device->getAssetManager()->clearAllGPUObjects(); } assert(nonInteractiveSensors.size() <= sensors.size()); - auto reloadApplication = [argv]() - { - printf("[INFO]: Reloading..\n"); - - // Set up the special reload condition. - const char* cmdLineParams = "-SCENE="; - HINSTANCE result = ShellExecuteA(NULL, "open", argv[0], cmdLineParams, NULL, SW_SHOWNORMAL); - if ((uint64_t)result <= 32) - printf("[ERROR]: Failed to reload.\n"); - else - exit(0); - }; +// ... // Render To file int32_t prevWidth = 0; diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index 6e55bc66e..d8dfc4430 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -138,31 +138,33 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov // inline Type getType() const { - // note that actual matrix always requires columns to have X+ and Y- directions - if (encoded[0][0]>0.f) + // note that actual matrix always requires columns to have Y- directions + if (encoded[1][1]<0.f) return Type::Persp; - if (encoded[0][0]<0.f) + if (encoded[1][1]>0.f) return Type::Ortho; return Type::Env; } // for a raygen shader to transform the [0,1]^2 NDC coord into a ray (without tMin/tStart) - // PERSP `dir = normalize(float3(pseudo_mul(mat,ndc),rightHanded ? 1:(-1))); origin = dir*nearClip/abs(dir.z);` - // ORTHO `origin = float3(pseudo_mul(mat,ndc),rightHanded ? nearClip:(-nearClip)); dir = float32_t(0,0,rightHanded ? 1:(-1))` + // PERSP `dir = normalize(float3(pseudo_mul(mat,ndc),-1)); + // origin = -float32_t3(dir.xy/dir.z,nearClip);` + // ORTHO `origin = float32_t3(pseudo_mul(mat,ndc),-nearClip); + // dir = float32_t(0,0,-1)` inline explicit operator hlsl::float32_t2x3() const { auto retval = encoded; - for (auto c=0; c<2; c++) + // y-axis column shall always be negative + if (encoded[1][1]>0.f) { - const float flipCol = hlsl::sign(encoded[c][c]); - for (auto r=0; r<2; r++) - retval[r][c] *= flipCol; + retval[0][1] = -encoded[0][1]; + retval[1][1] = -encoded[1][1]; } return retval; } - // Whether Z+ or Z- is forward for the camera - inline bool isRightHanded() const {return encoded[1][1]>0.f;} + // Whether Z+ or Z- is forward,and X- or X+ is right for the camera + inline bool isRightHanded() const {return encoded[0][0]>0.f;} private: friend class CSceneLoader; diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index 37c8e6364..ae79b7e0a 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -209,7 +209,6 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult auto& mutableDefaults = sensors[i].mutableDefaults; // absolute transform float32_t3 scaleRcp; - bool leftHanded = false; { auto absoluteTransform = float32_t3x4(_sensor.transform.matrix); { @@ -232,7 +231,6 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult constants = {}; continue; } - leftHanded = det<0.f; // extract and remove scale, also make the transform right-handed { scaleRcp = rsqrt({ @@ -240,7 +238,21 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult dot(orientationT[1],orientationT[1]), dot(orientationT[2],orientationT[2]) }); - // + // unflip X if left handed + if (det<0.f) + scaleRcp.x = -scaleRcp.x; + // Old Code View Matrix: + // LH X+ = Left, Y+ = Up, Z+ = Backward + // RH X+ = Right, Y+ = Up, Z+ = Forward + // Basically RH view matrix used to make the Forward direction Z-, so LH projection matrix flupped it to have Z+ and W+ (cancel out) + // The only thing that stayed was the flipping of the X direction. + // ------------------------------------------ + // If we're using our animators, then we can't have negative scales on odd number of axes + // the animators will re-create the camera from forward and up axes with right handed matrix + // New Sensor code should take a look at inverse Projection Matrix to determine the dNDC/dView directions + // nearPlaneCenter = mul(invProj,float(0,0,0,1)) = invProj.column[3] + // ndcXDir = normalize(invProj.column[0].xyz*nearPlaneCenter.w-nearPlaneCenter.xyz*invProj.column[3].w) = if regular matrix = normalize(invProj.column[0].xyz) + // ndcYDir = normalize(invProj.column[1].xyz*nearPlaneCenter.w-nearPlaneCenter.xyz*invProj.column[3].w) = if regular matrix = normalize(invProj.column[1].xyz) for (auto r=0; r<3; r++) { orientationT[r] *= scaleRcp[r]; @@ -302,14 +314,14 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult // max 1/4 circle if (!(halfWidth>0.f && halfHeight>0.f)) { - ndc[0][0] = core::nan(); + ndc[1][1] = core::nan(); logger.log("Sensor %s (%d-th in XML) had a Field of View of %f degrees!",ILogger::ELL_ERROR,id,i,persp.fov); break; } - // - ndc[0] = float32_t3(1.f,0.f,persp.shiftX)*halfWidth; + // elongating camera along Z will shrink the effective FOV + ndc[0] = float32_t3(scaleRcp.z/scaleRcp.x,0.f,hlsl::sign(scaleRcp.x)*persp.shiftX); // column gets negated because in Vulkan NDC.y runs downwards - ndc[1] = -float32_t3(0.f,1.f,persp.shiftY)*halfHeight; + ndc[1] = -float32_t3(0.f,scaleRcp.z/scaleRcp.y,persp.shiftY)*halfHeight; } break; case mts_sensor_t::Type::TELECENTRIC: @@ -319,6 +331,8 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult { const auto& ortho = _sensor.orthographic; // extract and negate the scale from the + ndc[0] = float32_t3(scaleRcp.x,0.f,0.f); + ndc[1] = float32_t3(0.f,scaleRcp.y*float(constants.height)/float(constants.width),0.f); } break; case mts_sensor_t::Type::SPHERICAL: @@ -336,8 +350,6 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult constants = {}; continue; } - if (leftHanded) - ndc[1][1] *= -1.f; // clip planes auto outClipPlane = mutableDefaults.clipPlanes.begin(); for (auto i=0; i Date: Fri, 16 Jan 2026 09:52:46 +0100 Subject: [PATCH 167/219] deduce zoom speed --- 22_RaytracedAO/main.cpp | 25 ------------------------- 40_PathTracer/src/io/CSceneLoader.cpp | 17 ++++++++--------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/22_RaytracedAO/main.cpp b/22_RaytracedAO/main.cpp index 2f80bfae8..45db9f715 100644 --- a/22_RaytracedAO/main.cpp +++ b/22_RaytracedAO/main.cpp @@ -405,31 +405,6 @@ meshes = {}; device->getAssetManager()->clearAllGPUObjects(); -// Deduce Move and Zoom Speeds if it is nan - auto sceneBoundsExtent = renderer->getSceneBound().getExtent(); - auto sceneDiagonal = sceneBoundsExtent.getLength(); - - for(uint32_t s = 0u; s < sensors.size(); ++s) - { - auto& sensorData = sensors[s]; - - float linearStepZoomSpeed = sensorData.stepZoomSpeed; - if(core::isnan(sensorData.stepZoomSpeed)) - { - linearStepZoomSpeed = sceneDiagonal * (DefaultZoomSpeed / DefaultSceneDiagonal); - } - - // Set Zoom Multiplier - { - float logarithmicZoomSpeed = std::pow(sceneDiagonal, linearStepZoomSpeed / sceneDiagonal); - sensorData.stepZoomSpeed = logarithmicZoomSpeed; - sensorData.getInteractiveCameraAnimator()->setStepZoomMultiplier(logarithmicZoomSpeed); - printf("[INFO] Sensor[%d] Camera Step Zoom Speed deduced from scene bounds = %f [Linear], %f [Logarithmic] \n", s, linearStepZoomSpeed, logarithmicZoomSpeed); - } - - - } - core::SRange nonInteractiveSensors = { nullptr, nullptr }; if (!applicationState.isInteractiveMode) { diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index ae79b7e0a..5f82356d0 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -479,15 +479,14 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult // ignore zoom for spherical cameras if (!isSpherical) { -#if 0 // TODO - // Deduce Move and Zoom Speeds if it is nan - { - float linearStepZoomSpeed = base.zoomSpeed; - if (hlsl::isnan(linearStepZoomSpeed)) - linearStepZoomSpeed = sceneSize * (dyn_t::DefaultZoomSpeed / dyn_t::DefaultSceneSize); - } - dynamicDefaults.zoomable.speed = ; -#endif + // deduce the Zoom Speed if it is nan + float linearStepZoomSpeed = base.zoomSpeed/sceneSize; + if (hlsl::isnan(linearStepZoomSpeed)) + linearStepZoomSpeed = dyn_t::DefaultZoomSpeed/dyn_t::DefaultSceneSize; + // set Zoom Multiplier + const float logarithmicZoomSpeed = hlsl::pow(sceneSize,linearStepZoomSpeed); + dynamicDefaults.zoomable.speed = logarithmicZoomSpeed; + // .getInteractiveCameraAnimator()->setStepZoomMultiplier(logarithmicZoomSpeed); } else if (!hlsl::isnan(base.zoomSpeed)) logger.log("Sensor %s (%d-th in XML) is SPHERICAL, zoom speed gets ignored!",ILogger::ELL_WARNING,id,i); From 065674617c8cda860d00cbaf9066eb7079f7c770 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 20:30:59 +0700 Subject: [PATCH 168/219] scaled quats test use compare length as well --- 59_QuaternionTests/CQuaternionTester.h | 17 +++++++++++++++-- common/include/nbl/examples/Tester/ITester.h | 1 - 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 739d1c83b..71546d783 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -11,6 +11,8 @@ #include "nbl/examples/examples.hpp" #include "app_resources/common.hlsl" #include "nbl/examples/Tester/ITester.h" +#include +#include using namespace nbl; @@ -134,7 +136,7 @@ class CQuaternionTester final : public ITester + void verifyScaledVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + if (nbl::hlsl::testing::orientationCompare(expectedVal, testVal, maxRelativeDifference) && + nbl::hlsl::testing::vectorLengthCompare(expectedVal, testVal, maxAbsoluteDifference, maxRelativeDifference)) + return; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); } template diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 6a5438bdf..5482aea63 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -5,7 +5,6 @@ #include #include #include -#include using namespace nbl; From 0f8ca47bef97adef1d93c821fd7ab1cce849f5c3 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 16 Jan 2026 22:17:47 +0100 Subject: [PATCH 169/219] Session init and deinit done, but without Scramble Key Texture filling --- 22_RaytracedAO/Renderer.cpp | 1128 +------------------- 22_RaytracedAO/Renderer.h | 25 +- 22_RaytracedAO/raytraceCommon.h | 74 -- 40_PathTracer/CMakeLists.txt | 2 + 40_PathTracer/include/io/CSceneLoader.h | 20 +- 40_PathTracer/include/renderer/CRenderer.h | 153 +-- 40_PathTracer/include/renderer/CScene.h | 108 +- 40_PathTracer/include/renderer/CSession.h | 161 +++ 40_PathTracer/main.cpp | 25 +- 40_PathTracer/src/io/CSceneLoader.cpp | 21 + 40_PathTracer/src/renderer/CRenderer.cpp | 240 ++++- 40_PathTracer/src/renderer/CScene.cpp | 70 ++ 40_PathTracer/src/renderer/CSession.cpp | 239 +++++ 13 files changed, 916 insertions(+), 1350 deletions(-) create mode 100644 40_PathTracer/include/renderer/CSession.h create mode 100644 40_PathTracer/src/renderer/CScene.cpp create mode 100644 40_PathTracer/src/renderer/CSession.cpp diff --git a/22_RaytracedAO/Renderer.cpp b/22_RaytracedAO/Renderer.cpp index 265f4986d..589baba74 100644 --- a/22_RaytracedAO/Renderer.cpp +++ b/22_RaytracedAO/Renderer.cpp @@ -21,18 +21,6 @@ using namespace nbl::video; constexpr uint32_t kOptiXPixelSize = sizeof(uint16_t)*3u; -core::smart_refctd_ptr specializedShaderFromFile(IAssetManager* assetManager, const char* path) -{ - auto bundle = assetManager->getAsset(path, {}); - return core::smart_refctd_ptr_static_cast(*bundle.getContents().begin()); -} -core::smart_refctd_ptr gpuSpecializedShaderFromFile(IAssetManager* assetManager, IVideoDriver* driver, const char* path) -{ - auto shader = specializedShaderFromFile(assetManager,path); - // TODO: @Crisspl find a way to stop the user from such insanity as moving from the bundle's dynamic array - //return std::move(driver->getGPUObjectsFromAssets(&shader,&shader+1u)->operator[](0)); - return driver->getGPUObjectsFromAssets(&shader,&shader+1u)->operator[](0); -} // TODO: make these util function in `IDescriptorSetLayout` -> Assign: @Vib auto fillIotaDescriptorBindingDeclarations = [](auto* outBindings, uint32_t accessFlags, uint32_t count, asset::E_DESCRIPTOR_TYPE descType=asset::EDT_INVALID, uint32_t startIndex=0u) -> void { @@ -53,7 +41,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f}, m_staticViewData{ {0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u), - m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr), + m_raygenWorkGroups{0u,0u},m_colorBuffer(nullptr), m_envMapImportanceSampling(_driver) { // TODO: reimplement @@ -1495,8 +1483,6 @@ void Renderer::initScreenSizedResources( } // set up m_raygenDS - core::smart_refctd_ptr visibilityBuffer = createScreenSizedTexture(EF_R32G32B32A32_UINT); - { { constexpr auto ScrambleStateChannels = 2u; auto tmpBuff = m_driver->createCPUSideGPUVisibleGPUBufferOnDedMem(sizeof(uint32_t)*ScrambleStateChannels*renderPixelCount); @@ -1522,7 +1508,6 @@ void Renderer::initScreenSizedResources( m_driver->copyBufferToImage(tmpBuff.get(),scrambleKeys->getCreationParameters().image.get(),1u,®ion); setImageInfo(infos+0,asset::EIL_SHADER_READ_ONLY_OPTIMAL,std::move(scrambleKeys)); } - setImageInfo(infos+1,asset::EIL_SHADER_READ_ONLY_OPTIMAL,core::smart_refctd_ptr(visibilityBuffer)); setImageInfo(infos+2,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); setDstSetAndDescTypesOnWrites(m_raygenDS.get(),writes,infos,{ @@ -1530,8 +1515,7 @@ void Renderer::initScreenSizedResources( EDT_COMBINED_IMAGE_SAMPLER, EDT_STORAGE_IMAGE }); - } - m_driver->updateDescriptorSets(3u,writes,0u,nullptr); + m_driver->updateDescriptorSets(3u,writes,0u,nullptr); // set up m_closestHitDS for (auto i=0u; i<2u; i++) @@ -1580,9 +1564,6 @@ void Renderer::initScreenSizedResources( } m_driver->updateDescriptorSets(8u,writes,0u,nullptr); - m_visibilityBuffer = m_driver->addFrameBuffer(); - m_visibilityBuffer->attach(EFAP_DEPTH_ATTACHMENT,createScreenSizedTexture(EF_D32_SFLOAT)); - m_visibilityBuffer->attach(EFAP_COLOR_ATTACHMENT0,std::move(visibilityBuffer)); m_colorBuffer = m_driver->addFrameBuffer(); m_colorBuffer->attach(EFAP_COLOR_ATTACHMENT0, core::smart_refctd_ptr(m_tonemapOutput)); @@ -1599,81 +1580,6 @@ void Renderer::initScreenSizedResources( std::cout << std::endl; } -void Renderer::deinitScreenSizedResources() -{ - auto commandQueue = m_rrManager->getCLCommandQueue(); - ocl::COpenCLHandler::ocl.pclFinish(commandQueue); - - glFinish(); - - // make sure descriptor sets dont dangle - //m_driver->bindDescriptorSets(video::EPBP_COMPUTE,nullptr,0u,4u,nullptr); - m_closestHitDS[0] = m_closestHitDS[1] = nullptr; - m_commonRaytracingDS[0] = m_commonRaytracingDS[1] = nullptr; - - // unset the framebuffer (dangling smartpointer in state cache can prevent the framebuffer from being dropped until the next framebuffer set) - m_driver->setRenderTarget(nullptr,false); - if (m_visibilityBuffer) - { - m_driver->removeFrameBuffer(m_visibilityBuffer); - m_visibilityBuffer = nullptr; - } - if (m_colorBuffer) - { - m_driver->removeFrameBuffer(m_colorBuffer); - m_colorBuffer = nullptr; - } - m_accumulation = m_tonemapOutput = nullptr; - m_albedoAcc = m_albedoRslv = nullptr; - m_normalAcc = m_normalRslv = nullptr; - m_maskAcc = nullptr; - - glFinish(); - - // wait for OpenCL to finish - ocl::COpenCLHandler::ocl.pclFlush(commandQueue); - ocl::COpenCLHandler::ocl.pclFinish(commandQueue); - for (auto i=0; i<2u; i++) - { - auto deleteInteropBuffer = [&](InteropBuffer& buffer) -> void - { - m_rrManager->unlinkBuffer(std::move(buffer.asRRBuffer)); - buffer = {}; - }; - deleteInteropBuffer(m_intersectionBuffer[i]); - deleteInteropBuffer(m_rayBuffer[i]); - } - - m_raygenWorkGroups[0] = m_raygenWorkGroups[1] = 0u; - - m_cullPipeline = nullptr; - m_raygenPipeline = nullptr; - m_closestHitPipeline = nullptr; - m_resolvePipeline = nullptr; - - m_staticViewData.imageDimensions[0] = 0u; - m_staticViewData.imageDimensions[1] = 0u; - m_staticViewData.maxPathDepth = DefaultPathDepth; - m_staticViewData.noRussianRouletteDepth = 5u; - m_staticViewData.samplesPerPixelPerDispatch = 1u; - m_staticViewData.hideEnvmap = false; - m_staticViewData.envMapPDFNormalizationFactor = core::infinity(); - m_staticViewData.cascadeParams = {}; - m_totalRaysCast = 0ull; - m_rcpPixelSize = {0.f,0.f}; - m_framesDispatched = 0u; - std::fill_n(m_prevView.pointer(),12u,0.f); - m_prevCamTform = nbl::core::matrix4x3(); -} - -void Renderer::resetSampleAndFrameCounters() -{ - m_totalRaysCast = 0ull; - m_framesDispatched = 0u; - std::fill_n(m_prevView.pointer(),12u,0.f); - m_prevCamTform = nbl::core::matrix4x3(); -} - void Renderer::takeAndSaveScreenShot(const std::filesystem::path& screenshotFilePath, bool denoise, const DenoiserArgs& denoiserArgs) { auto commandQueue = m_rrManager->getCLCommandQueue(); @@ -2160,1032 +2066,4 @@ bool Renderer::traceBounce(uint32_t& raycount) } return true; -} - -const float Renderer::AntiAliasingSequence[Renderer::AntiAliasingSequenceLength][2] = -{ -{0.229027962000000, 0.100901043000000}, -{0.934988661250000, 0.900492937500000}, -{0.693936740750000, 0.477888665000000}, -{0.396013875250000, 0.867381653000000}, -{0.151208663250000, 0.331649132250000}, -{0.919338615000000, 0.306386117750000}, -{0.454737456500000, 0.597940860250000}, -{0.911951413000000, 0.584874565000000}, -{0.471331207500000, 0.117509299250000}, -{0.724981748000000, 0.988645892000000}, -{0.227727943750000, 0.553082892250000}, -{0.927148254750000, 0.059077206250000}, -{0.170420940250000, 0.853803466500000}, -{0.369496963250000, 0.372492160250000}, -{0.709055501500000, 0.719526612750000}, -{0.708593019750000, 0.236308825250000}, -{0.053515783250000, 0.244794542562500}, -{0.759417624125000, 0.846532545187500}, -{0.572365454937500, 0.341559262437500}, -{0.269128942562500, 0.962581831375000}, -{0.246508261687500, 0.286661635812500}, -{0.819542439062500, 0.459099133812500}, -{0.411348913687500, 0.737420359250000}, -{0.896647944437500, 0.717554343125000}, -{0.358057598000000, 0.050206801437500}, -{0.605871046250000, 0.779868041500000}, -{0.036816445812500, 0.506511135625000}, -{0.806931985937500, 0.138270723062500}, -{0.045020470000000, 0.818334270875000}, -{0.433264399500000, 0.254739200375000}, -{0.556258709500000, 0.559776624000000}, -{0.611048395312500, 0.162518625750000}, -{0.028918631812500, 0.053438072375000}, -{0.856252533125000, 0.916712681500000}, -{0.580344816187500, 0.463534157062500}, -{0.291334488000000, 0.774756179000000}, -{0.157847279187500, 0.464948199125000}, -{0.775478249937500, 0.320623736250000}, -{0.306258709500000, 0.653526624000000}, -{0.798533046937500, 0.552896543187500}, -{0.349953270437500, 0.123764825500000}, -{0.534027961437500, 0.969931745937500}, -{0.122488661312500, 0.681742937625000}, -{0.849003468812500, 0.216845413250000}, -{0.145343900750000, 0.962506045625000}, -{0.395929912437500, 0.488477370312500}, -{0.675219736437500, 0.601237158875000}, -{0.728921568625000, 0.053308823500000}, -{0.153721825125000, 0.145597505062500}, -{0.852763510375000, 0.797682223125000}, -{0.644595719312500, 0.367380713687500}, -{0.475934665312500, 0.787623234375000}, -{0.037670496437500, 0.386130180750000}, -{0.916111850937500, 0.403604173437500}, -{0.307256453062500, 0.518207928812500}, -{0.836158139312500, 0.677526975812500}, -{0.291525812500000, 0.197831715312500}, -{0.632543215125000, 0.896220934750000}, -{0.039235045687500, 0.629605464812500}, -{0.927263875375000, 0.179881653187500}, -{0.036335975187500, 0.990626511375000}, -{0.458406617875000, 0.372877193062500}, -{0.545614665812500, 0.676662283062500}, -{0.606815968812500, 0.044970413250000}, -{0.031533697125000, 0.184836288625000}, -{0.943869562500000, 0.830155934062500}, -{0.607026984312500, 0.286243495000000}, -{0.385468447812500, 0.923477959062500}, -{0.211591778000000, 0.432717372437500}, -{0.959561740812500, 0.477888665062500}, -{0.340921091062500, 0.599871303750000}, -{0.770926812125000, 0.740443845937500}, -{0.492972183312500, 0.243769330562500}, -{0.520086204062500, 0.865883539250000}, -{0.194132187625000, 0.711586172812500}, -{0.867832801875000, 0.029377324812500}, -{0.018898352500000, 0.755166315812500}, -{0.294110519250000, 0.340476317312500}, -{0.645436781125000, 0.669120978187500}, -{0.537010584750000, 0.070669853500000}, -{0.161951413000000, 0.209874565062500}, -{0.786335975187500, 0.990626511375000}, -{0.525681985937500, 0.419520723062500}, -{0.287619562500000, 0.834550465312500}, -{0.100299557750000, 0.367542953000000}, -{0.787670496437500, 0.386130180750000}, -{0.425010132750000, 0.666850725937500}, -{0.959417841312500, 0.712724761625000}, -{0.259027114250000, 0.027505482375000}, -{0.706747124500000, 0.863983912687500}, -{0.118758709500000, 0.559776624000000}, -{0.979834653750000, 0.076596529437500}, -{0.076814113250000, 0.879551982187500}, -{0.458038607062500, 0.495297691687500}, -{0.676899749875000, 0.533654791000000}, -{0.739509651750000, 0.162886922875000}, -{0.130635833000000, 0.032884578937500}, -{0.995486845875000, 0.879726983937500}, -{0.681683761437500, 0.415213866187500}, -{0.471888733500000, 0.975077322375000}, -{0.002080578437500, 0.292317740812500}, -{0.982026984312500, 0.286243495000000}, -{0.291525812500000, 0.713456715312500}, -{0.803515783250000, 0.619794542562500}, -{0.363736251000000, 0.241491573500000}, -{0.581375603187500, 0.850024182625000}, -{0.126134788437500, 0.739345154625000}, -{0.807256990625000, 0.025225260812500}, -{0.214063133312500, 0.979178170312500}, -{0.279120068187500, 0.455460706437500}, -{0.521614411125000, 0.748128257250000}, -{0.541375661500000, 0.191916865812500}, -{0.092374240812500, 0.093123040062500}, -{0.819780017000000, 0.863865176562500}, -{0.723535390937500, 0.290673655562500}, -{0.333626471625000, 0.991508772375000}, -{0.180081879937500, 0.273337083437500}, -{0.884853249937500, 0.353826861250000}, -{0.486489450437500, 0.649922456187500}, -{0.970355124125000, 0.588720045187500}, -{0.411054041562500, 0.190728892687500}, -{0.670557598000000, 0.782628676437500}, -{0.176686781125000, 0.590995978187500}, -{0.923484185187500, 0.119472166250000}, -{0.229834653750000, 0.826596529437500}, -{0.402229645500000, 0.427815757250000}, -{0.614887300500000, 0.582390020187500}, -{0.721331207625000, 0.117509299250000}, -{0.221780261562500, 0.160787322875000}, -{0.980871046250000, 0.779868041500000}, -{0.521614411125000, 0.498128257250000}, -{0.462698109750000, 0.855009158437500}, -{0.102148981812500, 0.485181351500000}, -{0.790505847500000, 0.272359588500000}, -{0.357263913000000, 0.553624565062500}, -{0.852875617687500, 0.518271589687500}, -{0.412788910312500, 0.072860258937500}, -{0.739509651750000, 0.912886922875000}, -{0.244715387500000, 0.610882883562500}, -{0.931245437000000, 0.247161473000000}, -{0.118495619500000, 0.827404835625000}, -{0.356241537562500, 0.307793951312500}, -{0.739954645312500, 0.601971750750000}, -{0.652229645500000, 0.240315757250000}, -{0.085230272750000, 0.149967825937500}, -{0.790487853250000, 0.802468641250000}, -{0.742972183312500, 0.431269330562500}, -{0.338023546687500, 0.864140358375000}, -{0.161359195250000, 0.386030244500000}, -{0.979622565375000, 0.415764143437500}, -{0.344324410875000, 0.743490102812500}, -{0.850000234687500, 0.588036936812500}, -{0.478921568625000, 0.053308823500000}, -{0.575878945812500, 0.904948635625000}, -{0.066809364125000, 0.711985215062500}, -{0.842374240812500, 0.093123040062500}, -{0.072833622000000, 0.943689044750000}, -{0.473982478062500, 0.309619342562500}, -{0.643468702500000, 0.727011596187500}, -{0.661784804062500, 0.096504548000000}, -{0.075593410125000, 0.020665263437500}, -{0.846367111937500, 0.980869489750000}, -{0.584417841312500, 0.402177886625000}, -{0.419264650000000, 0.807665176000000}, -{0.108911798812500, 0.274823750687500}, -{0.842214949125000, 0.395649388625000}, -{0.424460011250000, 0.541515061937500}, -{0.914875915625000, 0.525088448500000}, -{0.276815978250000, 0.138406141250000}, -{0.682946765937500, 0.941192325375000}, -{0.243922631125000, 0.674414353500000}, -{0.983747165312500, 0.225123234375000}, -{0.209039534812500, 0.919381743937500}, -{0.317979460312500, 0.396931190375000}, -{0.595789254625000, 0.645833852812500}, -{0.589063133312500, 0.229178170312500}, -{0.201732996437500, 0.034567680750000}, -{0.911951413000000, 0.959874565062500}, -{0.669465614812500, 0.307270670687500}, -{0.442773254937500, 0.918452206312500}, -{0.228659227750000, 0.498372525687500}, -{0.864786425062500, 0.258916160937500}, -{0.366015783250000, 0.682294542562500}, -{0.832368054687500, 0.749523853312500}, -{0.475148582250000, 0.180782790250000}, -{0.543804934062500, 0.806559289687500}, -{0.041831345187500, 0.574164114062500}, -{0.981787063687500, 0.014769301562500}, -{0.167325380812500, 0.796656456375000}, -{0.305883847687500, 0.260168413875000}, -{0.736593646187500, 0.544303438875000}, -{0.595631980687500, 0.113338942562500}, -{0.233747165312500, 0.225123234375000}, -{0.881420496437500, 0.854880180750000}, -{0.514120916562500, 0.361726452125000}, -{0.262088408375000, 0.897305108937500}, -{0.040764352812500, 0.448613484812500}, -{0.882527922875000, 0.453355770312500}, -{0.486593646187500, 0.544303438875000}, -{0.944193581437500, 0.650074503000000}, -{0.403764392000000, 0.003513614062500}, -{0.647805652187500, 0.839498260375000}, -{0.004346402437500, 0.700568695812500}, -{0.863684364125000, 0.149485215062500}, -{0.075593410125000, 0.770665263437500}, -{0.260573230937500, 0.378374937125000}, -{0.606947383687500, 0.518907935937500}, -{0.522543332062500, 0.131538315687500}, -{0.115674527437500, 0.213752289562500}, -{0.978861550187500, 0.943531534250000}, -{0.716222608750000, 0.357993041500000}, -{0.396123640562500, 0.988911053812500}, -{0.116417439062500, 0.427849133812500}, -{0.960375986437500, 0.355143408875000}, -{0.396123640562500, 0.613911053812500}, -{0.771872079375000, 0.679610301562500}, -{0.407651999187500, 0.129979780250000}, -{0.610967390562500, 0.957538983500000}, -{0.099030910125000, 0.622227763437500}, -{0.792605235062500, 0.213450866625000}, -{0.231787063687500, 0.764769301562500}, -{0.345102996500000, 0.451097146437500}, -{0.537639116937500, 0.609792796562500}, -{0.670557598000000, 0.032628676437500}, -{0.161148929812500, 0.091845178375000}, -{0.915446201000000, 0.774126136937500}, -{0.542495165812500, 0.275647345250000}, -{0.316935423562500, 0.923529210750000}, -{0.209068937250000, 0.340693571625000}, -{0.770926812125000, 0.490443845937500}, -{0.462732614000000, 0.712224844000000}, -{0.887121046250000, 0.643149291500000}, -{0.302373640562500, 0.082661053812500}, -{0.728921568625000, 0.803308823500000}, -{0.181258709500000, 0.653526624000000}, -{0.977987853250000, 0.146218641250000}, -{0.016702341062500, 0.927996303750000}, -{0.467374240812500, 0.431013665062500}, -{0.706158315437500, 0.659556033875000}, -{0.669301523312500, 0.186625825562500}, -{0.039461819812500, 0.116237049750000}, -{0.798533046937500, 0.927896543187500}, -{0.636245165812500, 0.463147345250000}, -{0.358057598000000, 0.800206801437500}, -{0.057953992187500, 0.323742603312500}, -{0.838196808062500, 0.318240323625000}, -{0.288441098000000, 0.563378403500000}, -{0.981947383687500, 0.518907935937500}, -{0.348181288187500, 0.183212688250000}, -{0.506420496437500, 0.917380180750000}, -{0.164875915625000, 0.525088448500000}, -{0.787802165812500, 0.082912283062500}, -{0.134409779187500, 0.902448199125000}, -{0.408376747562500, 0.326245734125000}, -{0.584561740812500, 0.727888665062500}, -{0.534266910125000, 0.009900787187500}, -{0.192731703031250, 0.122610961484375}, -{0.886403666453125, 0.919165570765625}, -{0.740362459437500, 0.479082682703125}, -{0.381143881328125, 0.825372076343750}, -{0.177058600328125, 0.355660703968750}, -{0.880411986109375, 0.304388585781250}, -{0.489954645359375, 0.601971750750000}, -{0.931245437015625, 0.622161472968750}, -{0.495691442609375, 0.089534956375000}, -{0.748617226093750, 0.964032599359375}, -{0.200716102781250, 0.538594564312500}, -{0.902762098828125, 0.040629656437500}, -{0.167366403703125, 0.826817667671875}, -{0.326986691484375, 0.360298081343750}, -{0.725793624375000, 0.689620323765625}, -{0.725003755000000, 0.197653489734375}, -{0.019203528312500, 0.219887995546875}, -{0.789461819796875, 0.866237049781250}, -{0.602987853250000, 0.364968641265625}, -{0.286530910140625, 0.997227763453125}, -{0.197691088203125, 0.299653371203125}, -{0.864509651765625, 0.475386922921875}, -{0.409509265921875, 0.695098575390625}, -{0.924966150343750, 0.731175805390625}, -{0.366549151562500, 0.016182260046875}, -{0.620446765921875, 0.753692325390625}, -{0.004126035234375, 0.512365679515625}, -{0.759204111453125, 0.126627783906250}, -{0.039461819796875, 0.866237049781250}, -{0.385324830531250, 0.282537197156250}, -{0.529500805046875, 0.539659192578125}, -{0.620486845921875, 0.129726983984375}, -{0.040287232453125, 0.022961294593750}, -{0.871648411546875, 0.886075859718750}, -{0.567731703031250, 0.497610961484375}, -{0.271219649968750, 0.785940731265625}, -{0.149813081953125, 0.495059302156250}, -{0.752354406031250, 0.336633136296875}, -{0.267950605953125, 0.630717656421875}, -{0.763064970921875, 0.526211358984375}, -{0.350302165859375, 0.082912283093750}, -{0.541375661546875, 0.941916865828125}, -{0.067888875390625, 0.648631653203125}, -{0.817282235640625, 0.240645457843750}, -{0.176686781125000, 0.965995978171875}, -{0.414407018781250, 0.458335411421875}, -{0.635381359671875, 0.622395865796875}, -{0.696580598671875, 0.010305563015625}, -{0.146140435203125, 0.181972166265625}, -{0.853197227578125, 0.768215064734375}, -{0.631158461921875, 0.330667103468750}, -{0.443098689046875, 0.770526325000000}, -{0.008860189078125, 0.404241883828125}, -{0.920499240812500, 0.436873040078125}, -{0.274931749687500, 0.517395920968750}, -{0.872488661328125, 0.681742937687500}, -{0.273658139312500, 0.240026975812500}, -{0.686178846875000, 0.902720720890625}, -{0.022328994562500, 0.659601535312500}, -{0.889064677375000, 0.139944156000000}, -{0.041831345203125, 0.949164114093750}, -{0.443262299140625, 0.313206182765625}, -{0.553107937421875, 0.637161480234375}, -{0.576361266343750, 0.010049207671875}, -{0.024757727531250, 0.155556940859375}, -{0.954885609765625, 0.864774783453125}, -{0.576988498046875, 0.268435650828125}, -{0.378272153750000, 0.889096529468750}, -{0.243922631171875, 0.424414353515625}, -{0.993449504812500, 0.487462829328125}, -{0.315047772046875, 0.590538342781250}, -{0.757436479140625, 0.715431613031250}, -{0.454737456671875, 0.222940860375000}, -{0.506538910328125, 0.822860258968750}, -{0.223798019234375, 0.699851317375000}, -{0.839514399500000, 0.012551700359375}, -{0.013378945812500, 0.811198635640625}, -{0.259404890625000, 0.333637616328125}, -{0.674460011265625, 0.635265061968750}, -{0.552179912484375, 0.113477370312500}, -{0.133506990359375, 0.242482936484375}, -{0.792605235078125, 0.963450866640625}, -{0.556245437015625, 0.434661472968750}, -{0.302640369765625, 0.866357903265625}, -{0.104025812484375, 0.322831715312500}, -{0.788292439093750, 0.420036633796875}, -{0.383947288453125, 0.645595154640625}, -{0.987679025703125, 0.720586141078125}, -{0.310166960328125, 0.049274940406250}, -{0.692634651765625, 0.826949422921875}, -{0.066739180750000, 0.551367061812500}, -{0.954885609765625, 0.114774783453125}, -{0.106252533187500, 0.916712681484375}, -{0.490362459437500, 0.479082682703125}, -{0.646028602781250, 0.509297689312500}, -{0.696508847734375, 0.182043413890625}, -{0.167639399500000, 0.008157169109375}, -{0.942731703031250, 0.935110961484375}, -{0.682010510203125, 0.383364961312500}, -{0.444750986453125, 0.993815283906250}, -{0.012293182515625, 0.265019109265625}, -{0.943520700468750, 0.285664643703125}, -{0.256436740812500, 0.727888665078125}, -{0.792605235078125, 0.588450866640625}, -{0.323828669343750, 0.228345414000000}, -{0.589727949703125, 0.818705937671875}, -{0.146647944500000, 0.717554343109375}, -{0.763378945812500, 0.061198635640625}, -{0.245217761562500, 0.944967010375000}, -{0.302009651765625, 0.475386922921875}, -{0.508157018781250, 0.708335411421875}, -{0.552058600328125, 0.230660703968750}, -{0.076470961921875, 0.065042103468750}, -{0.839060384390625, 0.826948487828125}, -{0.743383847734375, 0.260168413890625}, -{0.361729406031250, 0.961633136296875}, -{0.130411986109375, 0.304388585781250}, -{0.913057411375000, 0.372578939312500}, -{0.450791960328125, 0.635700721656250}, -{0.994715387546875, 0.610882883562500}, -{0.396123640625000, 0.238911053828125}, -{0.635171568625000, 0.803308823531250}, -{0.134436274500000, 0.588235294109375}, -{0.893091363734375, 0.085389815609375}, -{0.204885609765625, 0.864774783453125}, -{0.419763913046875, 0.397374565093750}, -{0.589063133281250, 0.604178170375000}, -{0.692634651765625, 0.076949422921875}, -{0.192731703031250, 0.185110961484375}, -{0.951814247656250, 0.756306315203125}, -{0.506689186515625, 0.439765218421875}, -{0.456461826015625, 0.821001137000000}, -{0.083707248625000, 0.461775383828125}, -{0.764249240812500, 0.280623040078125}, -{0.323579912671875, 0.557221730578125}, -{0.818079645359375, 0.508221750750000}, -{0.435674328421875, 0.095052115796875}, -{0.725148582281250, 0.930782790234375}, -{0.211591777984375, 0.620217372437500}, -{0.901467761562500, 0.194967010375000}, -{0.114509651765625, 0.873824422921875}, -{0.350302165859375, 0.270412283093750}, -{0.713196808109375, 0.568240323640625}, -{0.631158461921875, 0.205667103468750}, -{0.121648411546875, 0.136075859718750}, -{0.807256990687500, 0.775225260828125}, -{0.748441255000000, 0.385153489734375}, -{0.322881990687500, 0.822100260828125}, -{0.170499240812500, 0.436873040078125}, -{0.976735045687500, 0.379605464812500}, -{0.326988498046875, 0.705935650828125}, -{0.849030910140625, 0.622227763453125}, -{0.456628942609375, 0.025081831375000}, -{0.603718978906250, 0.881272112125000}, -{0.087671365468750, 0.733711546609375}, -{0.858316099875000, 0.063684800093750}, -{0.079233855890625, 0.980882302687500}, -{0.498831558375000, 0.275019330593750}, -{0.683006746078125, 0.696560873750000}, -{0.634421685203125, 0.070155760015625}, -{0.100941098000000, 0.000878403515625}, -{0.868983666328125, 0.946905808593750}, -{0.622823333015625, 0.407884578921875}, -{0.380701413046875, 0.772374565093750}, -{0.070966777984375, 0.276467372437500}, -{0.869730392156250, 0.411764705875000}, -{0.390973727828125, 0.533716984406250}, -{0.934919978109375, 0.561328326953125}, -{0.267725152796875, 0.170350753109375}, -{0.650556467859375, 0.939171186375000}, -{0.208092013671875, 0.656130963328125}, -{0.939854406031250, 0.211633136296875}, -{0.227987853250000, 0.896218641265625}, -{0.362037827187500, 0.411778179156250}, -{0.575564970921875, 0.682461358984375}, -{0.603861550250000, 0.193531534234375}, -{0.245936791328125, 0.056280808593750}, -{0.931245437015625, 0.997161472968750}, -{0.674341363734375, 0.272889815609375}, -{0.475148582281250, 0.930782790234375}, -{0.196690920375000, 0.490305748390625}, -{0.823073230953125, 0.290484312156250}, -{0.349801672015625, 0.643614082703125}, -{0.816809364156250, 0.711985215093750}, -{0.442773254953125, 0.168452206343750}, -{0.559175124515625, 0.768645847968750}, -{0.012608312281250, 0.564660480046875}, -{0.951732996484375, 0.034567680765625}, -{0.130635833015625, 0.782884578921875}, -{0.295859197828125, 0.295320202140625}, -{0.712431749687500, 0.517395920968750}, -{0.572626461875000, 0.068089897125000}, -{0.211591777984375, 0.245217372437500}, -{0.901756746078125, 0.821560873750000}, -{0.512364601359375, 0.315328221531250}, -{0.275838593406250, 0.932598880093750}, -{0.007956102718750, 0.451497525703125}, -{0.924966150343750, 0.481175805390625}, -{0.454495282953125, 0.559257955593750}, -{0.978187903312500, 0.673257136171875}, -{0.416103249937500, 0.041326861265625}, -{0.664447403859375, 0.864416693968750}, -{0.033521988046875, 0.696631179015625}, -{0.852837228421875, 0.184355089812500}, -{0.090934062750000, 0.810372893375000}, -{0.275746963312500, 0.411554660312500}, -{0.588037245453125, 0.558795337875000}, -{0.554922654015625, 0.160357524531250}, -{0.072833622000000, 0.193689044750000}, -{0.964063133281250, 0.979178170375000}, -{0.708517234312500, 0.319548392906250}, -{0.432256990687500, 0.962725260828125}, -{0.068079645359375, 0.414471750750000}, -{0.963190877593750, 0.324420555781250}, -{0.411502470921875, 0.573086358984375}, -{0.800162124781250, 0.669611615750000}, -{0.387554934109375, 0.150309289718750}, -{0.579945004250000, 0.965966294140625}, -{0.065522102093750, 0.599326277234375}, -{0.761255117500000, 0.204583567718750}, -{0.196950541453125, 0.770728070765625}, -{0.344324410937500, 0.493490102843750}, -{0.510111550250000, 0.568531534234375}, -{0.636389399500000, 0.008157169109375}, -{0.128530229140625, 0.090431613031250}, -{0.883566727531250, 0.752475196796875}, -{0.552206093281250, 0.309003302484375}, -{0.348181288187500, 0.933212688265625}, -{0.227987853250000, 0.364968641265625}, -{0.771924527437500, 0.448127289609375}, -{0.489679912484375, 0.745313307812500}, -{0.927148254953125, 0.684077206343750}, -{0.264066255000000, 0.103903489734375}, -{0.740057568187500, 0.764054456484375}, -{0.148947313656250, 0.630208463203125}, -{0.974161986109375, 0.179388585781250}, -{0.010457836296875, 0.893541028515625}, -{0.498441255000000, 0.385153489734375}, -{0.744468904421875, 0.637380397421875}, -{0.678301531546875, 0.136423458078125}, -{0.010191088203125, 0.112153371203125}, -{0.774757727531250, 0.905556940859375}, -{0.674229406031250, 0.446008136296875}, -{0.319922419343750, 0.784986039000000}, -{0.011042923812500, 0.349437248625000}, -{0.821379264859375, 0.354136628500000}, -{0.257237357453125, 0.579287821171875}, -{0.948151308765625, 0.522112716656250}, -{0.318520700468750, 0.160664643703125}, -{0.543804934109375, 0.900309289718750}, -{0.130607996843750, 0.519919121093750}, -{0.811627065421875, 0.071665408953125}, -{0.160867175625000, 0.931752899046875}, -{0.428297719390625, 0.362355138953125}, -{0.609505036140625, 0.690144858781250}, -{0.504804041609375, 0.003228892687500}, -{0.216196606265625, 0.064729040234375}, -{0.901736845921875, 0.879726983984375}, -{0.708719649968750, 0.453909481265625}, -{0.415337952218750, 0.849024716109375}, -{0.134853249937500, 0.353826861265625}, -{0.903787097500000, 0.267080047484375}, -{0.479025812484375, 0.572831715312500}, -{0.876605124109375, 0.604345045187500}, -{0.456461826015625, 0.071001137000000}, -{0.709494562484375, 0.955644215312500}, -{0.231947383703125, 0.518907935984375}, -{0.932230392156250, 0.013327205875000}, -{0.145086204046875, 0.865883539265625}, -{0.350930456281250, 0.348899376265625}, -{0.725034838203125, 0.739106496203125}, -{0.739954645359375, 0.226971750750000}, -{0.042605235078125, 0.213450866640625}, -{0.811627065421875, 0.821665408953125}, -{0.587735274500000, 0.312719600875000}, -{0.307230392156250, 0.950827205875000}, -{0.217487057734375, 0.273792953031250}, -{0.854401308765625, 0.440081466656250}, -{0.384747995484375, 0.706561433531250}, -{0.899206688062500, 0.699456330843750}, -{0.334068937265625, 0.028193571656250}, -{0.576864665859375, 0.801662283093750}, -{0.041360180171875, 0.539240991515625}, -{0.780622165328125, 0.170435734406250}, -{0.025074889437500, 0.841885738250000}, -{0.412686740812500, 0.274763665078125}, -{0.551686781125000, 0.512870978171875}, -{0.574633261734375, 0.138224135796875}, -{0.058436791328125, 0.056280808593750}, -{0.822110274500000, 0.890844600875000}, -{0.618449504812500, 0.487462829328125}, -{0.264066255000000, 0.807028489734375}, -{0.132527922859375, 0.453355770328125}, -{0.807953992203125, 0.323742603312500}, -{0.302148254953125, 0.684077206343750}, -{0.794171695281250, 0.522748994546875}, -{0.372686140625000, 0.110004803828125}, -{0.507741981953125, 0.951245734125000}, -{0.107097304093750, 0.651192048015625}, -{0.822833622000000, 0.193689044750000}, -{0.181245437015625, 0.997161472968750}, -{0.384747995484375, 0.456561433531250}, -{0.662226998781250, 0.569583683906250}, -{0.727197083078125, 0.021632137984375}, -{0.184988661328125, 0.150492937687500}, -{0.873243045828125, 0.810942332640625}, -{0.684963615078125, 0.357045297593750}, -{0.461525611203125, 0.759528012437500}, -{0.025718904421875, 0.410817897421875}, -{0.897009311359375, 0.420948834468750}, -{0.263037063734375, 0.546019301578125}, -{0.857097304093750, 0.651192048015625}, -{0.252866199843750, 0.205957512640625}, -{0.665602115484375, 0.895166775859375}, -{0.056996963312500, 0.684992160312500}, -{0.918804934109375, 0.150309289718750}, -{0.019203528312500, 0.969887995546875}, -{0.485853633703125, 0.339220435984375}, -{0.509352115484375, 0.676416775859375}, -{0.564952190359375, 0.039350341546875}, -{0.061178846875000, 0.152720720890625}, -{0.979027962734375, 0.850901043359375}, -{0.618986693593750, 0.251067502968750}, -{0.416788412578125, 0.890801763843750}, -{0.191935423609375, 0.392279210781250}, -{0.959424527437500, 0.448127289609375}, -{0.360967390625000, 0.582538983515625}, -{0.802390435203125, 0.744472166265625}, -{0.498617226093750, 0.214032599359375}, -{0.542495165859375, 0.838147345281250}, -{0.211457644609375, 0.742513028953125}, -{0.837488317609375, 0.030941206375000}, -{0.038430456281250, 0.786399376265625}, -{0.290324504812500, 0.370275329328125}, -{0.678301531546875, 0.667673458078125}, -{0.510613942796875, 0.106955928328125}, -{0.170736691484375, 0.235298081343750}, -{0.759083993796875, 0.997656627843750}, -{0.539572313656250, 0.380208463203125}, -{0.255388875390625, 0.867381653203125}, -{0.071379264859375, 0.354136628500000}, -{0.758860189078125, 0.404241883828125}, -{0.420321786390625, 0.636298924375000}, -{0.978659227718750, 0.748372525703125}, -{0.287503755000000, 0.010153489734375}, -{0.739679912484375, 0.870313307812500}, -{0.089315978250000, 0.513406141265625}, -{0.943869562484375, 0.080155934062500}, -{0.076564677375000, 0.913381656000000}, -{0.444542439093750, 0.459099133796875}, -{0.633162999796875, 0.540307445062500}, -{0.709818581500000, 0.157887002984375}, -{0.167325380828125, 0.046656456390625}, -{0.977987853250000, 0.896218641265625}, -{0.652229645515625, 0.427815757218750}, -{0.492972183375000, 0.993769330593750}, -{0.040505847500000, 0.272359588500000}, -{0.962978249937500, 0.260076861265625}, -{0.302640369765625, 0.741357903265625}, -{0.768208405500000, 0.610922261187500}, -{0.318273390046875, 0.193320190000000}, -{0.619905641343750, 0.853941035859375}, -{0.181258709546875, 0.747276624031250}, -{0.757229657953125, 0.013359518093750}, -{0.244715387546875, 0.985882883562500}, -{0.259008847734375, 0.494543413890625}, -{0.554055652187500, 0.714498260375000}, -{0.534027961468750, 0.219931745984375}, -{0.069780017046875, 0.113865176609375}, -{0.848982478109375, 0.872119342578125}, -{0.713196808109375, 0.271365323640625}, -{0.363736251062500, 0.991491573531250}, -{0.150433761421875, 0.282401366203125}, -{0.901208663437500, 0.331649132359375}, -{0.463675308375000, 0.681269330593750}, -{0.949633261734375, 0.606974135796875}, -{0.384851754687500, 0.208333852843750}, -{0.668614601359375, 0.752828221531250}, -{0.181245437015625, 0.622161472968750}, -{0.895086204046875, 0.115883539265625}, -{0.193869562484375, 0.830155934062500}, -{0.387335340921875, 0.396347453890625}, -{0.564854406031250, 0.586633136296875}, -{0.745691442609375, 0.089534956375000}, -{0.245486845921875, 0.129726983984375}, -{0.982811359250000, 0.811790368250000}, -{0.536503468843750, 0.474657913296875}, -{0.489679912484375, 0.870313307812500}, -{0.067888875390625, 0.492381653203125}, -{0.751398662484375, 0.307813307812500}, -{0.352987853250000, 0.521218641265625}, -{0.869468904421875, 0.543630397421875}, -{0.400000938734375, 0.102147747421875}, -{0.716287097500000, 0.892080047484375}, -{0.204945004250000, 0.590966294140625}, -{0.883506990359375, 0.242482936484375}, -{0.065143307734375, 0.844593734281250}, -{0.327001686515625, 0.299140218421875}, -{0.748446786390625, 0.573798924375000}, -{0.665686274500000, 0.213235294109375}, -{0.115683153500000, 0.178056211000000}, -{0.757883424281250, 0.796209072156250}, -{0.713222390562500, 0.397222857359375}, -{0.362679025703125, 0.845586141078125}, -{0.147009311359375, 0.420948834468750}, -{0.954472218843750, 0.404345413296875}, -{0.363836204046875, 0.740883539265625}, -{0.821904890625000, 0.583637616328125}, -{0.492832801906250, 0.029377324812500}, -{0.599161986109375, 0.929388585781250}, -{0.096331207625000, 0.695634299281250}, -{0.848982478109375, 0.122119342578125}, -{0.099003468843750, 0.966845413296875}, -{0.462431749687500, 0.267395920968750}, -{0.677354460328125, 0.725544471656250}, -{0.650008709546875, 0.122276624031250}, -{0.123243045828125, 0.060942332640625}, -{0.822833622000000, 0.943689044750000}, -{0.586591777984375, 0.432717372437500}, -{0.408903485687500, 0.776738982078125}, -{0.080479406031250, 0.305383136296875}, -{0.818079645359375, 0.414471750750000}, -{0.431931985968750, 0.513270723078125}, -{0.903169756421875, 0.555146535265625}, -{0.285517059468750, 0.166546514046875}, -{0.675219736453125, 0.976237158906250}, -{0.238942076937500, 0.643155269500000}, -{0.964063133281250, 0.229178170375000}, -{0.192731703031250, 0.935110961484375}, -{0.348181288187500, 0.401962688265625}, -{0.618003220203125, 0.658636770343750}, -{0.588190877593750, 0.199420555781250}, -{0.216958200468750, 0.007344331203125}, -{0.883506990359375, 0.992482936484375}, -{0.636042923812500, 0.294749748625000}, -{0.481304934109375, 0.900309289718750}, -{0.237679025703125, 0.470586141078125}, -{0.831750421625000, 0.262285054609375}, -{0.341264392046875, 0.675388614109375}, -{0.864509651765625, 0.725386922921875}, -{0.481304934109375, 0.150309289718750}, -{0.507180456281250, 0.786399376265625}, -{0.033602444796875, 0.600612049781250}, -{0.962250867203125, 0.054211353312500}, -{0.151703992203125, 0.761242603312500}, -{0.261464799453125, 0.261330050531250}, -{0.740667841375000, 0.511552886640625}, -{0.615093996609375, 0.088785852218750}, -{0.228861550250000, 0.193531534234375}, -{0.920420940359375, 0.853803466546875}, -{0.541111850968750, 0.345010423484375}, -{0.305472183375000, 0.900019330593750}, -{0.044319650703125, 0.478886922328125}, -{0.892725152796875, 0.482850753109375}, -{0.490667841375000, 0.511552886640625}, -{0.971780261562500, 0.629537322875000}, -{0.387554934109375, 0.056559289718750}, -{0.662939186515625, 0.814765218421875}, -{0.052390435203125, 0.744472166265625}, -{0.826564677375000, 0.163381656000000}, -{0.103197227578125, 0.768215064734375}, -{0.306265020015625, 0.415613958984375}, -{0.575564970921875, 0.526211358984375}, -{0.505411986109375, 0.183294835781250}, -{0.079233855890625, 0.230882302687500}, -{0.939854406031250, 0.961633136296875}, -{0.747488661328125, 0.369242937687500}, -{0.399926992500000, 0.954583567718750}, -{0.122446606265625, 0.392854040234375}, -{0.987719736453125, 0.351237158906250}, -{0.425219736453125, 0.601237158906250}, -{0.774082801906250, 0.642658574812500}, -{0.416299322109375, 0.161398788656250}, -{0.602987853250000, 0.989968641265625}, -{0.089514399500000, 0.575051700359375}, -{0.786335975187500, 0.240626511406250}, -{0.231815968843750, 0.794970413296875}, -{0.318750014656250, 0.443002308546875}, -{0.505566445812500, 0.623698635640625}, -{0.637067640531250, 0.039603148984375}, -{0.180883847734375, 0.072668413890625}, -{0.917325380828125, 0.796656456390625}, -{0.526756746078125, 0.259060873750000}, -{0.334878599875000, 0.893762925093750}, -{0.225425998046875, 0.315310650828125}, -{0.794319650703125, 0.478886922328125}, -{0.445957801906250, 0.724689824812500}, -{0.915440720703125, 0.656963966125000}, -{0.302640369765625, 0.116357903265625}, -{0.698976641187500, 0.774455388406250}, -{0.177148254953125, 0.684077206343750}, -{0.949633261734375, 0.138224135796875}, -{0.045314677375000, 0.913381656000000}, -{0.458038607125000, 0.401547691687500}, -{0.713980484156250, 0.625879926296875}, -{0.643255797593750, 0.155068738921875}, -{0.025074889437500, 0.091885738250000}, -{0.766702341031250, 0.927996303765625}, -{0.664447403859375, 0.489416693968750}, -{0.352787232453125, 0.772961294593750}, -{0.025478249937500, 0.320623736265625}, -{0.854074830531250, 0.352849697156250}, -{0.286530910140625, 0.622227763453125}, -{0.977727943875000, 0.553082892328125}, -{0.350685461906250, 0.153596186453125}, -{0.557230392156250, 0.880514705875000}, -{0.153169756421875, 0.555146535265625}, -{0.789461819796875, 0.116237049781250}, -{0.168804934109375, 0.900309289718750}, -{0.377355421296875, 0.330038940000000}, -{0.612679025703125, 0.720586141078125}, -{0.508506990359375, 0.054982936484375}, -{0.196917624109375, 0.096532545187500}, -{0.910867175625000, 0.931752899046875}, -{0.728171685203125, 0.443690916265625}, -{0.435674328421875, 0.845052115796875}, -{0.182230392156250, 0.325827205875000}, -{0.884087952218750, 0.286524716109375}, -{0.448834987281250, 0.579381097156250}, -{0.884436274500000, 0.588235294109375}, -{0.439752211921875, 0.123635853468750}, -{0.688559795171875, 0.981591765453125}, -{0.198151308765625, 0.522112716656250}, -{0.901703992203125, 0.011242603312500}, -{0.128530229140625, 0.840431613031250}, -{0.317826892046875, 0.321872989109375}, -{0.696508847734375, 0.744543413890625}, -{0.688847218843750, 0.216845413296875}, -{0.004724588125000, 0.188791578953125}, -{0.787802165859375, 0.832912283093750}, -{0.567052101359375, 0.371480565281250}, -{0.302148254953125, 0.996577206343750}, -{0.243986693593750, 0.251067502968750}, -{0.825795788375000, 0.474201552750000}, -{0.430456688062500, 0.699456330843750}, -{0.931258709546875, 0.747276624031250}, -{0.325564970921875, 0.057461358984375}, -{0.575724486109375, 0.777044835781250}, -{0.058436791328125, 0.525030808593750}, -{0.759808761421875, 0.157401366203125}, -{0.010191088203125, 0.862153371203125}, -{0.411476654468750, 0.296344298265625}, -{0.505607996843750, 0.519919121093750}, -{0.563898662484375, 0.182813307812500}, -{0.007229657953125, 0.013359518093750}, -{0.826564677375000, 0.913381656000000}, -{0.615744562484375, 0.440019215312500}, -{0.273049196593750, 0.752824731078125}, -{0.126134788453125, 0.489345154640625}, -{0.776528750687500, 0.346344691359375}, -{0.271093019843750, 0.673808825390625}, -{0.766504140937500, 0.549462718109375}, -{0.321690920375000, 0.115305748390625}, -{0.552058600328125, 0.980660703968750}, -{0.086158139312500, 0.677526975812500}, -{0.868983666328125, 0.196905808593750}, -{0.133506990359375, 0.992482936484375}, -{0.428389216390625, 0.479768192062500}, -{0.642183234343750, 0.594837245046875}, -{0.693414534828125, 0.060006743953125}, -{0.177263875390625, 0.179881653203125}, -{0.822881453125000, 0.799457928828125}, -{0.657602996515625, 0.342698708937500}, -{0.447833622000000, 0.795251544750000}, -{0.049908315421875, 0.432993533953125}, -{0.884744019140625, 0.384203634359375}, -{0.303495121031250, 0.531469981562500}, -{0.829156508796875, 0.635903919875000}, -{0.279003945812500, 0.225261135640625}, -{0.626154293906250, 0.912625724750000}, -{0.009546365468750, 0.639961546609375}, -{0.886403666453125, 0.169165570765625}, -{0.052259883703125, 0.979845435984375}, -{0.459494562484375, 0.330644215312500}, -{0.524813081953125, 0.651309302156250}, -{0.606787063734375, 0.014769301578125}, -{0.010457836296875, 0.143541028515625}, -{0.947626461875000, 0.818089897125000}, -{0.571917624109375, 0.284032545187500}, -{0.416299322109375, 0.911398788656250}, -{0.236033046906250, 0.396646543203125}, -{0.990797719390625, 0.456105138953125}, -{0.333626471656250, 0.616508772390625}, -{0.787209695250000, 0.716482502812500}, -{0.439756778562500, 0.194376370593750}, -{0.552179912484375, 0.863477370312500}, -{0.243449504812500, 0.737462829328125}, -{0.822881453125000, 0.049457928828125}, -{0.058436791328125, 0.806280808593750}, -{0.267237456671875, 0.347940860375000}, -{0.659100916609375, 0.628228892687500}, -{0.525109796625000, 0.082597554609375}, -{0.135951233515625, 0.201639822421875}, -{0.761255117500000, 0.954583567718750}, -{0.556490320328125, 0.399823343937500}, -{0.258466777984375, 0.838967372437500}, -{0.104074830531250, 0.352849697156250}, -{0.803389216390625, 0.386018192062500}, -{0.392068237890625, 0.666186056234375}, -{0.984505036140625, 0.690144858781250}, -{0.277932351500000, 0.052908284062500}, -{0.713196808109375, 0.833865323640625}, -{0.102915496515625, 0.537034646437500}, -{0.979027962734375, 0.100901043359375}, -{0.106115002812500, 0.885378765484375}, -{0.473302101359375, 0.455953221531250}, -{0.685199429843750, 0.551526284734375}, -{0.706158315421875, 0.128306033953125}, -{0.147265783328125, 0.057294542578125}, -{0.959039534828125, 0.919381743953125}, -{0.646013875390625, 0.398631653203125}, -{0.458517234312500, 0.944548392906250}, -{0.056758487734375, 0.295253452109375}, -{0.988836204046875, 0.303383539265625}, -{0.254309364156250, 0.711985215093750}, -{0.775799306890625, 0.567053766171875}, -{0.340921091031250, 0.224871303765625}, -{0.620506746078125, 0.821560873750000}, -{0.160012464359375, 0.690720392781250}, -{0.788430456281250, 0.036399376265625}, -{0.220355124109375, 0.963720045187500}, -{0.287648582281250, 0.493282790234375}, -{0.536503468843750, 0.724657913296875}, -{0.552148254953125, 0.246577206343750}, -{0.116843560203125, 0.107753416265625}, -{0.842374240812500, 0.843123040078125}, -{0.696912145562500, 0.310820697562500}, -{0.330881907437500, 0.958779769828125}, -{0.169338615078125, 0.306386117906250}, -{0.927058600328125, 0.355660703968750}, -{0.477915496515625, 0.630784646437500}, -{0.947107614062500, 0.571599844062500}, -{0.411298019234375, 0.231101317375000}, -{0.634196201015625, 0.774126137000000}, -{0.133506990359375, 0.617482936484375}, -{0.911148929828125, 0.091845178421875}, -{0.229027962734375, 0.850901043359375}, -{0.433436791328125, 0.431280808593750}, -{0.570900611203125, 0.618903012437500}, -{0.691994851500000, 0.099783284062500}, -{0.212500058671875, 0.147009234203125}, -{0.951732996484375, 0.784567680765625}, -{0.538479240078125, 0.442006235093750}, -{0.495691442609375, 0.839534956375000}, -{0.092295940359375, 0.445600341546875}, -{0.806758487734375, 0.295253452109375}, -{0.374367956281250, 0.505149376265625}, -{0.822584307093750, 0.538276272453125}, -{0.381143881328125, 0.075372076343750}, -{0.746648411546875, 0.886075859718750}, -{0.225690524703125, 0.572657414093750}, -{0.908135803781250, 0.224055233687500}, -{0.100557411375000, 0.856953939312500}, -{0.326749240812500, 0.257185540078125}, -{0.703382877203125, 0.594522854968750}, -{0.635381359671875, 0.247395865796875}, -{0.091503945812500, 0.170573635640625}, -{0.773093560203125, 0.773280760015625}, -{0.699864601359375, 0.424703221531250}, -{0.337978249937500, 0.822576861265625}, -{0.177278602781250, 0.415547689312500}, -{0.950716102781250, 0.382344564312500}, -{0.345102996515625, 0.701097146437500}, -{0.850941098000000, 0.563378403515625}, -{0.447833622000000, 0.045251544750000}, -{0.584417841375000, 0.933427886640625}, -{0.117581880000000, 0.710837083484375}, -{0.864601654468750, 0.093219298265625}, -{0.115674527437500, 0.963752289609375}, -{0.438901999203125, 0.285253217765625}, -{0.641702341031250, 0.693621303765625}, -{0.662939186515625, 0.064765218421875}, -{0.102763510390625, 0.047682223171875}, -{0.821904890625000, 0.958637616328125}, -{0.617697313656250, 0.380208463203125}, -{0.387554934109375, 0.806559289718750}, -{0.096212938656250, 0.263020963203125}, -{0.867422654015625, 0.379107524531250}, -{0.383386910937500, 0.555990102843750}, -{0.880607996843750, 0.519919121093750}, -{0.252252211921875, 0.162698353468750}, -{0.642183234343750, 0.969837245046875}, -{0.196583993796875, 0.685156627843750}, -{0.963190877593750, 0.199420555781250}, -{0.199633261734375, 0.888224135796875}, -{0.350685461906250, 0.434846186453125}, -{0.614399749906250, 0.627404791062500}, -{0.608747165328125, 0.225123234406250}, -{0.244958663437500, 0.019149132359375}, -{0.880021551015625, 0.966470884812500}, -{0.686627065421875, 0.259165408953125}, -{0.463246963312500, 0.880304660312500}, -{0.209424527437500, 0.448127289609375}, -{0.850148582281250, 0.305782790234375}, -{0.321802423796875, 0.647870625703125}, -{0.853921568625000, 0.709558823531250}, -{0.496648411546875, 0.136075859718750}, -{0.534266910125000, 0.759900787234375}, -{0.018208405500000, 0.610922261187500}, -{0.981815968843750, 0.044970413296875}, -{0.147265783328125, 0.807294542578125}, -{0.278764594718750, 0.293912076453125}, -{0.697168203437500, 0.508447093109375}, -{0.569936479140625, 0.090431613031250}, -{0.199633261734375, 0.231974135796875}, -{0.878530229140625, 0.840431613031250}, -{0.530111691484375, 0.313423081343750}, -{0.287503755000000, 0.877340989734375}, -{0.020926812156250, 0.490443845953125}, -{0.925118529859375, 0.463551966546875}, -{0.460281853234375, 0.514086682671875}, -{0.946583993796875, 0.685156627843750}, -{0.432953992203125, 0.011242603312500}, -{0.681996963312500, 0.841242160312500}, -{0.035440756328125, 0.741967535328125}, -{0.826814113265625, 0.129551982203125}, -{0.102763510390625, 0.797682223171875}, -{0.257250986453125, 0.431315283906250}, -{0.601735045687500, 0.535855464812500}, -{0.523947313656250, 0.161458463203125}, -{0.089514399500000, 0.200051700359375}, -{0.998871711468750, 0.969931745984375}, -{0.725557411375000, 0.325703939312500}, -{0.411054041609375, 0.940728892687500}, -{0.092214949156250, 0.395649388656250}, -{0.947571808109375, 0.318240323640625}, -{0.385150528859375, 0.598791693968750}, -{0.760340045031250, 0.666060247875000}, -{0.385468447859375, 0.173477959078125}, -{0.619715387546875, 0.985882883562500}, -{0.110693313734375, 0.604613051578125}, -{0.759083993796875, 0.247656627843750}, -{0.198151308765625, 0.803362716656250}, -{0.368295322046875, 0.475490672062500}, -{0.522431987421875, 0.579676484406250}, -{0.668614601359375, 0.002828221531250}, -{0.159061291453125, 0.115786836312500}, -{0.885343915375000, 0.797979216453125}, -{0.502049350968750, 0.290322923484375}, -{0.361960011265625, 0.916515061968750}, -{0.212257727531250, 0.374306940859375}, -{0.808006746078125, 0.446560873750000}, -{0.489786425109375, 0.696416160921875}, -{0.914775229140625, 0.625807223171875}, -{0.272394756234375, 0.082105300000000}, -{0.697833622000000, 0.795251544750000}, -{0.146614411140625, 0.654378257218750}, -{0.959039534828125, 0.169381743953125}, -{0.056931985968750, 0.888270723078125}, -{0.493765020015625, 0.415613958984375}, -{0.727875617718750, 0.674521589734375}, -{0.646232996843750, 0.172262871093750}, -{0.052267234312500, 0.085173392906250}, -{0.794171695281250, 0.897748994546875}, -{0.664407018781250, 0.458335411421875}, -{0.318273390046875, 0.755820190000000}, -{0.046247165328125, 0.350123234406250}, -{0.865799202015625, 0.333466330906250}, -{0.285498339406250, 0.583855022421875}, -{0.956147102093750, 0.505576277234375}, -{0.318949826718750, 0.141763441546875}, -{0.510429611953125, 0.887119489765625}, -{0.184919978109375, 0.561328326953125}, -{0.759417624109375, 0.096532545187500}, -{0.130403602781250, 0.937032064312500}, -{0.394595719296875, 0.367380713734375}, -{0.567731703031250, 0.747610961484375}, -{0.538716806515625, 0.039836274843750} -}; \ No newline at end of file +} \ No newline at end of file diff --git a/22_RaytracedAO/Renderer.h b/22_RaytracedAO/Renderer.h index f3f7e43c7..81e38ac7f 100644 --- a/22_RaytracedAO/Renderer.h +++ b/22_RaytracedAO/Renderer.h @@ -46,8 +46,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac const nbl::core::vector& clipPlanes={} ); - void deinitScreenSizedResources(); - void resetSampleAndFrameCounters(); void takeAndSaveScreenShot(const std::filesystem::path& screenshotFilePath, bool denoise, const DenoiserArgs& denoiserArgs = {}); @@ -160,7 +158,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_perCameraRasterDSLayout; nbl::core::smart_refctd_ptr m_rasterInstanceDataDSLayout,m_additionalGlobalDSLayout,m_commonRaytracingDSLayout; nbl::core::smart_refctd_ptr m_raygenDSLayout,m_closestHitDSLayout,m_resolveDSLayout; - nbl::core::smart_refctd_ptr m_visibilityBufferFillPipeline; nbl::core::smart_refctd_ptr m_cullPipelineLayout; nbl::core::smart_refctd_ptr m_raygenPipelineLayout; @@ -195,14 +192,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac private: nbl::core::smart_refctd_ptr bufferView; } sampleSequence; - uint16_t maxPathDepth; - uint16_t noRussianRouletteDepth : 15; - uint16_t hideEnvironment : 1; - uint32_t maxSensorSamples; - - // scene specific data - nbl::core::vector<::RadeonRays::Shape*> rrShapes; - nbl::core::vector<::RadeonRays::Shape*> rrInstances; nbl::core::matrix3x4SIMD m_prevView; nbl::core::matrix4x3 m_prevCamTform; @@ -232,20 +221,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_commonRaytracingDS[2]; nbl::core::smart_refctd_ptr m_rasterInstanceDataDS,m_raygenDS,m_resolveDS; nbl::core::smart_refctd_ptr m_closestHitDS[2]; - uint32_t m_raygenWorkGroups[2]; - struct InteropBuffer - { - nbl::core::smart_refctd_ptr buffer; - std::pair<::RadeonRays::Buffer*, cl_mem> asRRBuffer = { nullptr,0u }; - }; - InteropBuffer m_rayBuffer[2]; - InteropBuffer m_intersectionBuffer[2]; - nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; - nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; - nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; - nbl::core::smart_refctd_ptr m_maskAcc; - nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer; + nbl::video::IFrameBuffer* m_colorBuffer; // Resources used for envmap sampling nbl::core::smart_refctd_ptr m_finalEnvmap; diff --git a/22_RaytracedAO/raytraceCommon.h b/22_RaytracedAO/raytraceCommon.h index 595fc7198..40397e84f 100644 --- a/22_RaytracedAO/raytraceCommon.h +++ b/22_RaytracedAO/raytraceCommon.h @@ -83,79 +83,5 @@ struct SLight **/ }; - - -// -#include -#ifdef __cplusplus -struct alignas(16) StaticViewData_t -#else -struct StaticViewData_t -#endif -{ -#ifdef __cplusplus - uint16_t imageDimensions[2]; - uint8_t maxPathDepth; - uint8_t noRussianRouletteDepth; - uint16_t samplesPerPixelPerDispatch; - uint32_t sampleSequenceStride : 31; - uint32_t hideEnvmap : 1; -#else - uint imageDimensions; - uint maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch; - uint sampleSequenceStride_hideEnvmap; -#endif - float envMapPDFNormalizationFactor; - nbl_glsl_RWMC_CascadeParameters cascadeParams; -}; -#ifndef __cplusplus -uvec2 getImageDimensions(in StaticViewData_t data) -{ - return uvec2( - bitfieldExtract(data.imageDimensions, 0,16), - bitfieldExtract(data.imageDimensions,16,16) - ); -} -#endif - - -struct RaytraceShaderCommonData_t -{ - float rcpFramesDispatched; - uint frameLowDiscrepancySequenceShift; - uint pathDepth_rayCountWriteIx; // depth=0 if path tracing disabled - float textureFootprintFactor; - // need to be at the end because of some PC -> OpenGL Uniform mapping bug - // PERSPECTIVE - // mat3(viewDirReconFactors)*vec3(uv,1) or hitPoint-viewDirReconFactors[3] - // ORTHO - // viewDirReconFactors[2]=V - mat4x3 viewDirReconFactors; - -#ifdef __cplusplus - uint32_t getPathDepth() const - { - return nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,0,RAYCOUNT_SHIFT); - } - void setPathDepth(const uint32_t depth) - { - pathDepth_rayCountWriteIx = nbl::core::bitfieldInsert(pathDepth_rayCountWriteIx,depth,0,RAYCOUNT_SHIFT); - } - - uint32_t getReadIndex() const - { - const uint32_t index = nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - if (index) - return index-1; - return RAYCOUNT_N_BUFFERING-1; - } - void advanceWriteIndex() - { - const uint32_t writeIx = nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - pathDepth_rayCountWriteIx = nbl::core::bitfieldInsert(pathDepth_rayCountWriteIx,writeIx+1,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - } -#endif -}; - #include #endif \ No newline at end of file diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index aa11ee33e..a7ea46a85 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -16,6 +16,8 @@ list(APPEND NBL_LIBRARIES ) list(APPEND NBL_EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/io/CSceneLoader.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CSession.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CScene.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CRenderer.cpp" ) list(APPEND NBL_ diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h index d8dfc4430..24e6ca490 100644 --- a/40_PathTracer/include/io/CSceneLoader.h +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -184,6 +184,10 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov // float cascadeLuminanceBase = core::nan(); float cascadeLuminanceStart = core::nan(); + // + uint16_t hideEnvironment : 1 = false; + uint16_t russianRouletteDepth : 15 = 0x7fffu; + uint16_t maxPathDepth = 0; } mutableDefaults = {}; // these can change without having to reset accumulations, etc. struct SDynamic @@ -198,6 +202,7 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov struct SPostProcess { + std::filesystem::path bloomFilePath; float bloomScale = 0.0f; float bloomIntensity = 0.0f; std::string tonemapperArgs = ""; @@ -218,19 +223,6 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov float rotateSpeed = core::nan(); union { - /* - float linearStepZoomSpeed = sensorData.stepZoomSpeed; - if(core::isnan(sensorData.stepZoomSpeed)) - { - linearStepZoomSpeed = sceneDiagonal * (DefaultZoomSpeed / DefaultSceneDiagonal); - } - - // Set Zoom Multiplier - { - float logarithmicZoomSpeed = std::pow(sceneDiagonal, linearStepZoomSpeed / sceneDiagonal); - sensorData.stepZoomSpeed = logarithmicZoomSpeed; - sensorData.getInteractiveCameraAnimator()->setStepZoomMultiplier(logarithmicZoomSpeed); - */ struct SZoomable // spherical can't zoom { float speed = core::nan(); @@ -248,7 +240,7 @@ class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmov inline operator bool() const { - if (!scene || !sensors.empty()) + if (!scene || sensors.empty()) return false; return true; } diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index e86c56aa5..9e4089e12 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -14,6 +14,32 @@ #include +// TODO: move to HLSL file +namespace nbl::this_example +{ + +struct SPrevisPushConstants : SSensorDynamics +{ +}; + +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_SPP_PER_DISPATCH_LOG2 5 +struct SBeautyPushConstants : SSensorDynamics +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSppPerDispatchLog2 = MAX_SPP_PER_DISPATCH_LOG2; + + uint32_t maxSppPerDispatch : MAX_SPP_PER_DISPATCH_LOG2; + uint32_t unused : 27; +}; +#undef MAX_SPP_PER_DISPATCH_LOG2 + +struct SDebugPushConstants : SSensorDynamics +{ +}; + +} + + namespace nbl::this_example { @@ -23,18 +49,12 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl enum class RenderMode : uint8_t { Previs, - Beauty//, + Beauty, //Albedo, //Normal, - //Motion - }; - // TODO: move this somewhere else - struct DenoiserArgs - { - std::filesystem::path bloomFilePath; - float bloomScale = 0.0f; - float bloomIntensity = 0.0f; - std::string tonemapperArgs = ""; + //Motion, + DebugIDs, + Count }; // @@ -85,43 +105,76 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl video::IQueue* uploadQueue = nullptr; // core::smart_refctd_ptr utilities = nullptr; + // can be null + system::logger_opt_smart_ptr logger = nullptr; }; struct SCreationParams : SCachedCreationParams { + system::path sampleSequenceCache; }; static core::smart_refctd_ptr create(SCreationParams&& _params); // - inline video::ILogicalDevice* getDevice() { return m_params.utilities->getLogicalDevice(); } + inline const SCachedCreationParams& getCreationParams() const { return m_creation; } + + // + inline video::ILogicalDevice* getDevice() const {return m_creation.utilities->getLogicalDevice();} // core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); - - // session object - class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable + + struct SCachedConstructionParams { - // sensor data - struct STransients - { -// core::smart_refctd_ptr<>; - } transients = {}; - - public: - // init - // deinit - }; + constexpr static inline uint8_t FramesInFlight = 3; - protected: - struct SConstructorParams : SCachedCreationParams - { - // per pipeline UBO, with fast updates + // per pipeline UBO for other pipelines core::smart_refctd_ptr uboDSLayout; // descriptor set for a scene shall contain sampled textures and compiled materials core::smart_refctd_ptr sceneDSLayout; + // descriptor set for sensors + core::smart_refctd_ptr sensorDSLayout; - // rendering pipelines - core::smart_refctd_ptr preVis; - core::smart_refctd_ptr pathTracing; + // TODO + std::array,uint8_t(RenderMode::Count)> renderingPipelines; + + // + core::smart_refctd_ptr commandBuffers[FramesInFlight]; + }; + // + inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + + protected: + struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + { + + // Each Atom of the sample sequence provides 3N dimensions (3 for BxDF, 3 for NEE, etc.) + // Then Atoms are ordered by sampleID, then dimension (cache will be fully trashed by tracing TLASes until next bounce) +#if 0 + // semi persistent data + struct SampleSequence + { + public: + static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t); + SampleSequence() : bufferView() {} + + // one less because first path vertex uses a different sequence + static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;} + nbl::core::smart_refctd_ptr createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount); + + // from cache + void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr&& buff); + // regenerate + nbl::core::smart_refctd_ptr createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount); + + auto getBufferView() const {return bufferView;} + + private: + nbl::core::smart_refctd_ptr bufferView; + } sampleSequence; + + // Resources used for envmap sampling + nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; +#endif // rwmc resolve, autoexposure first pass core::smart_refctd_ptr rwmcResolveAndLumaMeasure; // TODO: autoexposure, and first axis of FFT @@ -137,41 +190,15 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl core::smart_refctd_ptr secondAxisFFTTonemap; // TODO // Present - core::smart_refctd_ptr presentRenderpass; - core::smart_refctd_ptr regularPresent; + core::smart_refctd_ptr presentRenderpass; // TODO + core::smart_refctd_ptr regularPresent; // TODO core::smart_refctd_ptr cubemapPresent; // TODO }; - inline CRenderer(SConstructorParams&& _params) : m_params(std::move(_params)) {} + inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} virtual inline ~CRenderer() {} - SConstructorParams m_params; -#if 0 - // semi persistent data - nbl::io::path sampleSequenceCachePath; - struct SampleSequence - { - public: - static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t); - SampleSequence() : bufferView() {} - - // one less because first path vertex uses a different sequence - static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;} - nbl::core::smart_refctd_ptr createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount); - - // from cache - void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr&& buff); - // regenerate - nbl::core::smart_refctd_ptr createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount); - - auto getBufferView() const {return bufferView;} - - private: - nbl::core::smart_refctd_ptr bufferView; - } sampleSequence; - - // Resources used for envmap sampling - nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; -#endif + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; }; } @@ -194,6 +221,8 @@ struct to_string_helper return "Beauty"; case enum_t::Previs: return "Previs"; + case enum_t::DebugIDs: + return "DebugIDs"; default: break; } diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index e2ae0718b..25f8c100c 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -6,10 +6,55 @@ #include "io/CSceneLoader.h" +#include "renderer/CSession.h" +// TODO: move to HLSL file +namespace nbl::this_example +{ +struct SSceneUniforms +{ + struct SIndirectInit + { + // +// bda_t pQuantizedSequence; + // because the PDF is rescaled to log2(luma)/log2(Max)*255 + // and you get it out as `exp2(texValue)*factor` + hlsl::float32_t envmapPDFNormalizationFactor; + hlsl::float16_t envmapScale; + uint16_t unused; + } indirect; +}; + +struct SceneDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // RGB9E5 post multiplied by a max value + NBL_CONSTEXPR_STATIC_INLINE uint32_t Envmap = 1; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 2; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 3; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 4; + // UINT8 log2(luma) meant for stochastic descent or querying the PDF of the Warp Map + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapPDF = 5; + // R16G16_UNORM or R32G32_SFLOAT (depending on envmap resolution) meant for skipping stochastic descent + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapWarpMap = 6; +}; + +struct SceneDSBindingCounts +{ + // Mostly held back by Intel ARC, important to not have more than this many light geometries, can increase to + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxDescriptorSetUpdateAfterBindAccelerationStructures&platform=all + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxPerStageDescriptorUpdateAfterBindAccelerationStructures&platform=all + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 65535; + // Reasonable combo (esp if we implement a cache over the DS) + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 128; + // Spec mandated minimum + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 500000; +}; +} namespace nbl::this_example { +class CRenderer; class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable { @@ -31,49 +76,44 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable } }; - // TODO: figure out whats constant, and whats state that can be passed around - inline std::span getSensors() const {return m_params.sensors;} + // + inline CRenderer* getRenderer() const {return m_construction.renderer.get();} - // TODO: function to initialize per-sensor stuff + using sensor_t = CSceneLoader::SLoadResult::SSensor; + // + inline std::span getSensors() const {return m_construction.sensors;} + + // + core::smart_refctd_ptr createSession(const sensor_t& sensor); protected: friend class CRenderer; - struct SConstructorParams : SCachedCreationParams + struct SCachedConstructorParams { + // + hlsl::shapes::AABB<> sceneBound; + // + core::vector sensors; + // backward link for reference counting + core::smart_refctd_ptr renderer; // descriptor set for a scene shall contain sampled textures and compiled materials - core::smart_refctd_ptr sceneDS; - - core::vector sensors; -#if 0 - nbl::core::aabbox3df m_sceneBound; - float m_maxAreaLightLuma; - StaticViewData_t m_staticViewData; - RaytraceShaderCommonData_t m_raytraceCommonData; - // Resources used for envmap sampling - nbl::core::smart_refctd_ptr m_finalEnvmap; -#endif + core::smart_refctd_ptr sceneDS; + // main TLAS + core::smart_refctd_ptr TLAS; }; - inline CScene(SConstructorParams&& _params) : m_params(std::move(_params)) {} + struct SConstructorParams : SCachedCreationParams, SCachedConstructorParams + { + // sensor list can be empty, we can just make one up as we go along + inline operator bool() const + { + return renderer && sceneDS; + } + }; + inline CScene(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} virtual inline ~CScene() {} - SConstructorParams m_params; -#if 0 - // TODO: sensor stuff - uint16_t hideEnvironment : 1; - uint32_t maxSensorSamples; - - uint32_t m_framesDispatched; - vec2 m_rcpPixelSize; - uint64_t m_totalRaysCast; - StaticViewData_t m_staticViewData; - RaytraceShaderCommonData_t m_raytraceCommonData; - - nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; - nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; - nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; - nbl::core::smart_refctd_ptr m_maskAcc; - -#endif + SCachedCreationParams m_creation; + SCachedConstructorParams m_construction; }; } diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h new file mode 100644 index 000000000..27353e8d2 --- /dev/null +++ b/40_PathTracer/include/renderer/CSession.h @@ -0,0 +1,161 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SESSION_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SESSION_H_INCLUDED_ + + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + +#include "io/CSceneLoader.h" + + +// TODO: move to HLSL file +namespace nbl::this_example +{ +#define MAX_SPP_LOG2 15 +NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; +// need to be able to count (represent) both 0 and Max +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; + +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_CASCADE_COUNT_LOG2 3 +struct SSensorUniforms +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t ScrambleKeyTextureSize = 512; + +#define MAX_PATH_DEPTH_LOG2 7 + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxCascadeCountLog2 = MAX_CASCADE_COUNT_LOG2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; + + hlsl::float32_t2 rcpPixelSize; + hlsl::rwmc::SplattingParameters splatting; + hlsl::uint16_t2 renderSize; + // bitfield + uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; + uint16_t unused0 : 13; + // bitfield + uint16_t unused1 : 1; + uint16_t hideEnvironment : 1; + uint16_t lastPathDepth : MAX_PATH_DEPTH_LOG2; + uint16_t lastNoRussianRouletteDepth : MAX_PATH_DEPTH_LOG2; +}; +#undef MAX_PATH_DEPTH_LOG2 + +// no uint16_t to be used because its going to be a push constant +struct SSensorDynamics +{ + // assuming input will be ndc = [-1,1]^2 x {-1} + hlsl::float32_t3x4 ndcToRay; + hlsl::float32_t tMax; + // we can adaptively sample per-pixel, but + uint32_t minSPP : MAX_SPP_LOG2; + uint32_t maxSPP : MAX_SPP_LOG2; + uint32_t unused : 2; +}; + +// no uint16_t to be used because its going to be a push constant +struct SResolveConstants +{ + struct SProtoRWMC + { + hlsl::float32_t initialEmin; + hlsl::float32_t reciprocalBase; + hlsl::float32_t reciprocalKappa; + hlsl::float32_t colorReliabilityFactor; + } rwmc; + uint32_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); + uint32_t unused : 28; +}; +#undef MAX_CASCADE_COUNT_LOG2 + + + +struct SensorDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // R32G32_UINT storage texture (can get animated/rearranged) + NBL_CONSTEXPR_STATIC_INLINE uint32_t ScrambleKey = 1; + // R16_UINT Per Pixel Sample Count (so don't need to read all RWMC cascades) + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampleCount = 2; + // R64_UINT with packing RGB14E6 or RGB14E7 and using rest for spp in the cascade + NBL_CONSTEXPR_STATIC_INLINE uint32_t RWMCCascades = 3; + // R10G10B10_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 4; + // R10G10B10_SNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 5; + // R10G10B10_SNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 6; + // R16_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 7; +}; +} + + +namespace nbl::this_example +{ +class CScene; + +class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + using sensor_t = CSceneLoader::SLoadResult::SSensor; + using sensor_type_e = sensor_t::SMutable::Raygen::Type; + + // + bool init(video::IGPUCommandBuffer* cb); + + // + bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); + + // + inline void deinit() {m_active = {};} + + private: + friend class CScene; + + struct SConstructionParams + { + core::string name = "TODO from `sensor`"; + core::smart_refctd_ptr scene; + SSensorUniforms uniforms; + SSensorDynamics initDynamics; + SResolveConstants initResolveConstants; + sensor_type_e type; + }; + inline CSession(SConstructionParams&& _params) : m_params(std::move(_params)) {} + + const SConstructionParams m_params; + // heavy VRAM data and data only needed during an active session + struct SActiveResources + { + struct SImageWithViews + { + inline operator bool() const + { + return image && !views.empty() && views.begin()->second; + } + + core::smart_refctd_ptr image = {}; + core::unordered_map> views = {}; + }; + struct SImmutables + { + inline operator bool() const + { + return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; + } + + SImageWithViews scrambleKey = {}, sampleCount = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; + // stores all the sensor data required + core::smart_refctd_ptr ds = {}; + }; + SImmutables immutables = {}; + SSensorDynamics prevSensorState = {}; + } m_active = {}; +}; + +} +#endif diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index dfc57b8e1..45c0e3ef2 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -161,9 +161,12 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }}); // TODO: tmp code - auto scene_daily_pt = m_sceneLoader->load({ - .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", - .workingDirectory = localOutputCWD + auto scene_daily_pt = m_renderer->createScene({ + .load = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", + .workingDirectory = localOutputCWD + }), + .converter = nullptr }); // the UI would have you load the zip first, then present a dropdown of what to load // but still need to support archive mount for cmdline load @@ -174,6 +177,18 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }); #endif + auto session = scene_daily_pt->createSession(scene_daily_pt->getSensors().front()); + + // temporary test + { + auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + session->init(cb); + cb->end(); + } + session->deinit(); + + // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { @@ -261,8 +276,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - m_converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - for (auto i = 0u; i < MaxFramesInFlight; i++) { if (!pool) @@ -1582,8 +1595,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes smart_refctd_ptr m_presentDsPool; smart_refctd_ptr m_presentPipeline; - smart_refctd_ptr m_converter; - core::matrix4SIMD m_cachedModelViewProjectionMatrix; bool m_useIndirectCommand = false; diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp index 5f82356d0..4507c64dd 100644 --- a/40_PathTracer/src/io/CSceneLoader.cpp +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -48,6 +48,7 @@ struct to_string_helper mutableDefaults["farClip"] = _mutableDefaults.farClip; mutableDefaults["cascadeLuminanceBase"] = _mutableDefaults.cascadeLuminanceBase; mutableDefaults["cascadeLuminanceStart"] = _mutableDefaults.cascadeLuminanceStart; + mutableDefaults["hideEnvironment"] = _mutableDefaults.hideEnvironment; } { auto& dynamicDefaults = j["dynamicDefaults"]; @@ -162,6 +163,9 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult } const auto* const meta = static_cast(untypedMeta); + // + auto& integrator = meta->m_global.m_integrator; + // TODO: compute/get this from minumum extent of scene float sceneSize = 50.f; @@ -380,6 +384,22 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult // mutableDefaults.cascadeLuminanceBase = film.cascadeLuminanceBase; mutableDefaults.cascadeLuminanceStart = film.cascadeLuminanceStart; + // + integrator.visit([&mutableDefaults](auto& var)->void + { + if constexpr (std::is_base_of_v>) + mutableDefaults.hideEnvironment = var.hideEnvironment; + } + ); + integrator.visit([&mutableDefaults](auto& var)->void + { + if constexpr (std::is_base_of_v>) + { + mutableDefaults.maxPathDepth = var.maxPathDepth; + mutableDefaults.russianRouletteDepth = var.russianRouletteDepth; + } + } + ); } { using dyn_t = SLoadResult::SSensor::SDynamic; @@ -440,6 +460,7 @@ auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult } // post process { + dynamicDefaults.postProc.bloomFilePath = film.denoiserBloomFilePath; dynamicDefaults.postProc.bloomScale = film.denoiserBloomScale; dynamicDefaults.postProc.bloomIntensity = film.denoiserBloomIntensity; dynamicDefaults.postProc.tonemapperArgs = std::string(film.denoiserTonemapperArgs); diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index de9c9a5c4..0d68e958d 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -8,34 +8,188 @@ #include "nbl/this_example/builtin/build/spirv/keys.hpp" +#include + namespace nbl::this_example { +using namespace nbl::core; using namespace nbl::asset; +using namespace nbl::system; using namespace nbl::video; // -core::smart_refctd_ptr CRenderer::create(SCreationParams&& _params) +smart_refctd_ptr CRenderer::create(SCreationParams&& _params) { if (!_params) return nullptr; SConstructorParams params = {std::move(_params)}; + // + if (!params.logger.get()) + params.logger = smart_refctd_ptr(params.utilities->getLogger()); + auto checkNullObject = [¶ms](auto& obj, const std::string_view debugName)->bool + { + if (!obj) + { + params.logger.log("Failed to Create %s Object!",ILogger::ELL_ERROR,debugName.data()); + return true; + } + obj->setObjectDebugName(debugName.data()); + return false; + }; + // ILogicalDevice* device = params.utilities->getLogicalDevice(); + // limits + // basic samplers + const auto samplerDefaultRepeat = device->createSampler({}); // create the layouts + smart_refctd_ptr renderingLayouts[uint8_t(RenderMode::Count)]; { - // one descriptor layout to rule them all + constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING | hlsl::ShaderStage::ESS_COMPUTE; + // descriptor { - // bindless textures - // bindless storage images - // bindless buffer views - // bindless buffer storage views + using binding_create_flags_t = IDescriptorSetLayoutBase::SBindingBase::E_CREATE_FLAGS; + constexpr IGPUDescriptorSetLayout::SBinding UBOBinding = { + .binding = SensorDSBindings::UBO, + .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = hlsl::ShaderStage::ESS_ALL_OR_LIBRARY, + .count = 1 + }; + // the generic single-UBO + { + params.uboDSLayout = device->createDescriptorSetLayout({&UBOBinding,1}); + if (checkNullObject(params.uboDSLayout,"Generic Single UBO Layout")) + return nullptr; + } + constexpr auto DescriptorIndexingFlags = binding_create_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_create_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT; + // + auto singleStorageImage = [](const uint32_t binding)->IGPUDescriptorSetLayout::SBinding + { + return { + .binding = binding, + .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = RTStages, + .count = 1 + }; + }; + // TODO: provide these two samplers from Envmap Importance sampling extension + const auto samplerNearestRepeat = device->createSampler({ + { + .MinFilter = ISampler::E_TEXTURE_FILTER::ETF_NEAREST, + .MaxFilter = ISampler::E_TEXTURE_FILTER::ETF_NEAREST, + .MipmapMode = ISampler::E_SAMPLER_MIPMAP_MODE::ESMM_NEAREST, + .AnisotropicFilter = 0, + }, + 0.f, + 0.f, + 0.f + }); + // bindless everything + { + // TODO: provide these two samplers from Envmap Importance sampling extension + const auto samplerEnvmapPDF = samplerNearestRepeat; + const auto samplerEnvmapWarpmap = device->createSampler({ + { + .MinFilter = ISampler::E_TEXTURE_FILTER::ETF_LINEAR, + .MaxFilter = ISampler::E_TEXTURE_FILTER::ETF_LINEAR, + .MipmapMode = ISampler::E_SAMPLER_MIPMAP_MODE::ESMM_NEAREST, + .AnisotropicFilter = 0, + }, + 0.f, + 0.f, + 0.f + }); + std::initializer_list bindings = { + UBOBinding, + { + .binding = SceneDSBindings::Envmap, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerDefaultRepeat + }, + { + .binding = SceneDSBindings::TLASes, + .type = IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::TLASes + }, + { + .binding = SceneDSBindings::Samplers, + .type = IDescriptor::E_TYPE::ET_SAMPLER, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::Samplers + }, + { + .binding = SceneDSBindings::SampledImages, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::SampledImages + }, + { + .binding = SceneDSBindings::EnvmapPDF, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerEnvmapPDF + }, + { + .binding = SceneDSBindings::EnvmapWarpMap, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerEnvmapWarpmap + } + }; + params.sceneDSLayout = device->createDescriptorSetLayout(bindings); + if (checkNullObject(params.sceneDSLayout,"Scene Descriptor Layout")) + return nullptr; + } + // the sensor layout + { + std::initializer_list bindings = { + UBOBinding, + singleStorageImage(SensorDSBindings::ScrambleKey), + singleStorageImage(SensorDSBindings::SampleCount), + singleStorageImage(SensorDSBindings::RWMCCascades), + singleStorageImage(SensorDSBindings::Albedo), + singleStorageImage(SensorDSBindings::Normal), + singleStorageImage(SensorDSBindings::Motion), + singleStorageImage(SensorDSBindings::Mask) + }; + params.sensorDSLayout = device->createDescriptorSetLayout(bindings); + if (checkNullObject(params.sensorDSLayout,"Sensor Descriptor Layout")) + return nullptr; + } } // but many push constant ranges - // and first descriptor set layout for 1 UBO to put image indices and BDA (fast swap at will) + SPushConstantRange pcRanges[uint8_t(RenderMode::Count)]; + auto setPCRange = [&pcRanges](const RenderMode mode)->void + { + pcRanges[uint8_t(mode)] = {.stageFlags=RTStages,.offset=0,.size=sizeof(T)}; + }; + setPCRange.operator()(RenderMode::Previs); + setPCRange.operator()(RenderMode::Beauty); + setPCRange.operator()(RenderMode::DebugIDs); + for (uint8_t t=0; tcreatePipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); + string debugName = to_string(static_cast(t))+"Rendering Pipeline Layout"; + if (checkNullObject(renderingLayouts[t],debugName)) + return nullptr; + } } // create the pipelines @@ -53,6 +207,16 @@ core::smart_refctd_ptr CRenderer::create(SCreationParams&& _params) // TODO } + // command buffers + for (uint8_t i=0; icreateCommandPool(params.graphicsQueue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE); + if (pool) + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,1,params.commandBuffers+i,smart_refctd_ptr(params.logger.get())); + if (checkNullObject(params.commandBuffers[i],"Graphics Command Buffer "+to_string(i))) + return nullptr; + } + return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); } @@ -61,20 +225,76 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& { if (!_params) return nullptr; + + auto* const device = getDevice(); auto converter = core::smart_refctd_ptr(_params.converter); CScene::SConstructorParams params = {std::move(_params)}; + params.sensors = std::move(_params.load.sensors); + params.renderer = smart_refctd_ptr(this); + { + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&m_construction.sceneDSLayout.get(),1}); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(m_construction.sceneDSLayout)); + if (!ds) + { + m_creation.logger.log("Failed to create a scene - failed descriptor set allocation!",ILogger::ELL_ERROR); + return nullptr; + } + params.sceneDS = make_smart_refctd_ptr(std::move(ds)); + } // new cache if none provided if (!converter) - converter = CAssetConverter::create({.device=getDevice(),.optimizer={}}); - + converter = CAssetConverter::create({.device=device,.optimizer={}}); + + // +// converter->reserve(); // build the BLAS and TLAS { + // TODO + } + core::smart_refctd_ptr ubo; + + // write into DS + { + vector infos; + vector writes; + auto* const ds = params.sceneDS->getDescriptorSet(); + auto addWrite = [&](const uint32_t binding, IGPUDescriptorSet::SDescriptorInfo&& info)->void + { + writes.emplace_back() = { + .dstSet = ds, + .binding = binding, + .arrayElement = 0, + .count = 1, + .info = reinterpret_cast(infos.size()) + }; + infos.push_back(std::move(info)); + }; + addWrite(SceneDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(SSceneUniforms),.buffer=ubo}); + // TODO: Envmap + // TODO: TLASes + // TODO: Samplers + // TODO: Sampled Images + // TODO: Envmap PDF + // TODO: Envmap Warp Map + for (auto& write : writes) + write.info = infos.data()+reinterpret_cast(write.info); +// device->updateDescriptorSets(writes,{}); } - // fill out the render classes but don't init yet +#if 0 + float m_maxAreaLightLuma; + // Resources used for envmap sampling + nbl::core::smart_refctd_ptr m_finalEnvmap; +#endif + // + if (!params) + { + m_creation.logger.log("Failed to create a scene!",ILogger::ELL_ERROR); + return nullptr; + } return core::smart_refctd_ptr(new CScene(std::move(params)),core::dont_grab); } diff --git a/40_PathTracer/src/renderer/CScene.cpp b/40_PathTracer/src/renderer/CScene.cpp new file mode 100644 index 000000000..cb2f8dc0d --- /dev/null +++ b/40_PathTracer/src/renderer/CScene.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/limits.hlsl" + +#include "renderer/CRenderer.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::video; + +// +smart_refctd_ptr CScene::createSession(const sensor_t& sensor) +{ + const auto& constants = sensor.constants; + const auto& dynDefaults = sensor.dynamicDefaults; + const auto& mutDefaults = sensor.mutableDefaults; + const auto& raygen = mutDefaults.raygen; + + CSession::SConstructionParams params = { + .scene = smart_refctd_ptr(this), + .type = raygen.getType() + }; + + // fill uniforms + { + const uint16_t2 renderSize(constants.width,constants.height); + const uint16_t maxPathDepth = hlsl::clamp(mutDefaults.maxPathDepth,1,0x1u<(mutDefaults.russianRouletteDepth,1,maxPathDepth); + params.uniforms = { + .rcpPixelSize = promote(1.f)/float32_t2(renderSize), + .splatting = {}, // TODO + .renderSize = renderSize, + .lastCascadeIndex = static_cast(constants.cascadeCount-1), + .hideEnvironment = mutDefaults.hideEnvironment, + .lastPathDepth = static_cast(maxPathDepth-1), + .lastNoRussianRouletteDepth = static_cast(russianRouletteDepth-1) + }; + } + + // + params.initDynamics = { + .ndcToRay = {}, // TODO + .tMax = mutDefaults.farClip, + .minSPP = core::min(dynDefaults.samplesNeeded,16), // for later enhancement + .maxSPP = dynDefaults.samplesNeeded + }; + + // + { + const auto reciprocalKappa = 1.f/dynDefaults.kappa; + params.initResolveConstants = { + .rwmc = { + .initialEmin = dynDefaults.Emin, + .reciprocalBase = 1.f/mutDefaults.cascadeLuminanceBase, + .reciprocalKappa = reciprocalKappa, + .colorReliabilityFactor = hlsl::mix(mutDefaults.cascadeLuminanceBase,1.f,reciprocalKappa) + }, + .cascadeCount = constants.cascadeCount + }; + } + + return smart_refctd_ptr(new CSession(std::move(params)),dont_grab); +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp new file mode 100644 index 000000000..43c91e69a --- /dev/null +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -0,0 +1,239 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/CRenderer.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::video; + +// +bool CSession::init(video::IGPUCommandBuffer* cb) +{ + auto renderer = m_params.scene->getRenderer(); + auto& logger = renderer->getCreationParams().logger; + auto device = renderer->getDevice(); + + auto& immutables = m_active.immutables; + + // create the descriptors + core::vector infos; + core::vector writes; + { + auto addWrite = [&](const uint32_t binding, IGPUDescriptorSet::SDescriptorInfo&& info)->void + { + writes.emplace_back() = { + .binding = binding, + .arrayElement = 0, + .count = 1, + .info = reinterpret_cast(infos.size()) + }; + infos.push_back(std::move(info)); + }; + + // + auto dedicatedAllocate = [&](IDeviceMemoryBacked* memBacked, const std::string_view debugName)->bool + { + if (!memBacked) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),debugName.data()); + return false; + } + memBacked->setObjectDebugName(debugName.data()); + + auto mreqs = memBacked->getMemoryReqs(); + mreqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + if (!device->allocate(mreqs,memBacked,IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE).isValid()) + { + logger.log("Could not allocate memory for Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),debugName.data()); + return false; + } + return true; + }; + + // create UBO + { + IGPUBuffer::SCreationParams params = {}; + params.size = sizeof(m_params.uniforms); + params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_UNIFORM_BUFFER_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_INLINE_UPDATE_VIA_CMDBUF; + auto ubo = device->createBuffer(std::move(params)); + if (!dedicatedAllocate(ubo.get(),"Sensor UBO")) + return false; + addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); + } + + auto createImage = [&]( + const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, + const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT + ) -> SActiveResources::SImageWithViews + { + SActiveResources::SImageWithViews retval = {}; + { + IGPUImage::SCreationParams params = {}; + params.type = IGPUImage::E_TYPE::ET_2D; + params.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + params.format = format; + params.extent.width = resolution[0]; + params.extent.height = resolution[1]; + params.extent.depth = 1; + params.mipLevels = 1; + params.arrayLayers = layers; + using image_usage_e = IGPUImage::E_USAGE_FLAGS; + params.usage = image_usage_e::EUF_STORAGE_BIT|image_usage_e::EUF_TRANSFER_DST_BIT|extraUsages; + if (m_params.type==sensor_type_e::Env) + { + params.arrayLayers *= 6; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_CUBE_COMPATIBLE_BIT; + } + viewFormats.set(format); + if (viewFormats.count()>1) + { + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_EXTENDED_USAGE_BIT; + } + params.viewFormats = viewFormats; + auto image = device->createImage(std::move(params)); + if (!dedicatedAllocate(image.get(),debugName)) + return retval; + for (uint8_t f=0; f(f); + auto view = device->createImageView({ + .image = image, + .viewType = IGPUImageView::E_TYPE::ET_2D_ARRAY, + .format = viewFormat + }); + string viewDebugName = string(debugName)+" "+to_string(viewFormat)+" View"; + if (!view) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),viewDebugName.c_str()); + return {}; + } + view->setObjectDebugName(viewDebugName.c_str()); + retval.views[viewFormat] = std::move(view); + } + retval.image = std::move(image); + } + return retval; + }; + auto addImageWrite = [&](const uint32_t binding, const smart_refctd_ptr& view)->void + { + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = view; + info.info.image.imageLayout = IGPUImage::LAYOUT::GENERAL; + addWrite(binding,std::move(info)); + }; + immutables.scrambleKey = createImage("Scramble Key",E_FORMAT::EF_R32G32_UINT,promote(SSensorUniforms::ScrambleKeyTextureSize),1); + addImageWrite(SensorDSBindings::ScrambleKey,immutables.scrambleKey.views[E_FORMAT::EF_R32G32_UINT]); + + // create the render-sized images + auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SActiveResources::SImageWithViews + { + return createImage(debugName,format,m_params.uniforms.renderSize,std::forward(args)...); + }; + immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT,1); + addImageWrite(SensorDSBindings::SampleCount,immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]); + immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); + addImageWrite(SensorDSBindings::RWMCCascades,immutables.rwmcCascades.views[E_FORMAT::EF_R32G32_UINT]); + immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + addImageWrite(SensorDSBindings::Albedo,immutables.albedo.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + // Normal and Albedo should have used `EF_A2B10G10R10_SNORM_PACK32` but Nvidia doesn't support + immutables.normal = createScreenSizedImage("Normal",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + addImageWrite(SensorDSBindings::Normal,immutables.normal.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + immutables.motion = createScreenSizedImage("Motion",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + addImageWrite(SensorDSBindings::Motion,immutables.motion.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + immutables.mask = createScreenSizedImage("Mask",E_FORMAT::EF_R16_UNORM,1); + addImageWrite(SensorDSBindings::Mask,immutables.mask.views[E_FORMAT::EF_R16_UNORM]); + } + + // create descriptor set + { + auto layout = renderer->getConstructionParams().sensorDSLayout; + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout.get(),1}); + immutables.ds = pool->createDescriptorSet(std::move(layout)); + const char* DebugName = "Sensor Descriptor Set"; + if (!immutables.ds) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),DebugName); + return false; + } + immutables.ds->setObjectDebugName(DebugName); + for (auto& write : writes) + { + write.dstSet = immutables.ds.get(); + write.info = infos.data()+reinterpret_cast(write.info); + } + if (!device->updateDescriptorSets(writes,{})) + { + logger.log("Failed to write Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),DebugName); + return false; + } + } + + if (!immutables || !reset(m_params.initDynamics,cb)) + { + logger.log("Could not Init Session for sensor \"%s\" failed to reset!",ILogger::ELL_ERROR,m_params.name.c_str()); + deinit(); + return false; + } + +// TODO: fill scramble Key with noise + + return true; +} + +bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) +{ + auto* const renderer = m_params.scene->getRenderer(); + auto* const device = renderer->getDevice(); + const auto& immutables = m_active.immutables; + + // slam the barriers as big as possible, it wont happen frequently + bool success = true; + const SMemoryBarrier before[] = { + { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .srcAccessMask = ACCESS_FLAGS::NONE, // because we don't care about reading previously written values + .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .dstAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS + } + }; + success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=before}); + auto clearImage = [cb,&success](const SActiveResources::SImageWithViews& img)->void + { + const IGPUImage::SSubresourceRange subresRng = { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .levelCount = 1, + .layerCount = img.image->getCreationParameters().arrayLayers + }; + IGPUCommandBuffer::SClearColorValue color; + memset(&color,0,sizeof(color)); + success = success && cb->clearColorImage(img.image.get(),IGPUImage::LAYOUT::GENERAL,&color,1,&subresRng); + }; + clearImage(immutables.sampleCount); + clearImage(immutables.rwmcCascades); + clearImage(immutables.albedo); + clearImage(immutables.normal); + clearImage(immutables.motion); + clearImage(immutables.mask); + const SMemoryBarrier after[] = { + { + .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=after}); + + if (success) + m_active.prevSensorState = newVal; + return success; +} + +} \ No newline at end of file From 4b65e7086e3e2f532c6d5fc258fc1c21eb983a7f Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 17 Jan 2026 16:11:36 +0100 Subject: [PATCH 170/219] Corrected quaternion tests --- 59_QuaternionTests/CQuaternionTester.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 71546d783..d9a33253b 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -92,13 +92,13 @@ class CQuaternionTester final : public ITester Date: Sat, 17 Jan 2026 20:17:13 +0100 Subject: [PATCH 171/219] Fixed examples 30 and 71 --- 30_ComputeShaderPathTracer/main.cpp | 13 ++++---- 71_RayTracingPipeline/include/common.hpp | 2 +- 71_RayTracingPipeline/main.cpp | 38 ++++++++++++------------ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index 54bc64495..82ab9fb91 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -7,6 +7,7 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" +#include #include "nbl/this_example/common.hpp" @@ -22,7 +23,7 @@ using namespace nbl::examples; // TODO: share push constants struct PTPushConstant { - matrix4SIMD invMVP; + hlsl::float32_t4x4 invMVP; int sampleCount; int depth; }; @@ -841,9 +842,9 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B m_camera.setProjectionMatrix([&]() { - static matrix4SIMD projection; + static hlsl::float32_t4x4 projection; - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); return projection; }()); @@ -878,9 +879,9 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B // Set Camera { core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(60.0f), - WindowDimensions.x / WindowDimensions.y, + float(WindowDimensions.x / WindowDimensions.y), 0.01f, 500.0f ); @@ -955,7 +956,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); PTPushConstant pc; - viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.invMVP = hlsl::inverse(viewProjectionMatrix); pc.sampleCount = spp; pc.depth = depth; diff --git a/71_RayTracingPipeline/include/common.hpp b/71_RayTracingPipeline/include/common.hpp index 6727c879c..e6b538618 100644 --- a/71_RayTracingPipeline/include/common.hpp +++ b/71_RayTracingPipeline/include/common.hpp @@ -26,7 +26,7 @@ struct ReferenceObjectCpu { core::smart_refctd_ptr data; Material material; - core::matrix3x4SIMD transform; + hlsl::float32_t3x4 transform; }; diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index ecaf53b7f..307fd3e99 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -9,7 +9,10 @@ #include "nbl/builtin/hlsl/indirect_commands.hlsl" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" - +#include +#include +#include +#include class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -476,9 +479,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui m_camera.setProjectionMatrix([&]() { - static matrix4SIMD projection; + static hlsl::float32_t4x4 projection; - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(m_cameraSetting.fov), io.DisplaySize.x / io.DisplaySize.y, m_cameraSetting.zNear, @@ -542,9 +545,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // Set Camera { core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(60.0f), - WIN_W / WIN_H, + float(WIN_W / WIN_H), 0.01f, 500.0f ); @@ -620,18 +623,15 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto projectionMatrix = m_camera.getProjectionMatrix(); const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + //hlsl::float32_t3x4 modelMatrix; - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + hlsl::float32_t4x4 modelViewProjectionMatrix = viewProjectionMatrix; if (m_cachedModelViewProjectionMatrix != modelViewProjectionMatrix) { m_frameAccumulationCounter = 0; m_cachedModelViewProjectionMatrix = modelViewProjectionMatrix; } - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + hlsl::float32_t4x4 invModelViewProjectionMatrix = hlsl::inverse(modelViewProjectionMatrix); { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; @@ -665,7 +665,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui pc.frameCounter = m_frameAccumulationCounter; const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + pc.invMVP = invModelViewProjectionMatrix; cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); @@ -1071,13 +1071,13 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + hlsl::math::linalg::setTranslation(transform, float32_t3(x, y, z)); + return transform; }; - core::matrix3x4SIMD planeTransform; - planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); + hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::math::linalg::rotation_mat(core::radians(-90.0f), { 1,0,0 })); // triangles geometries auto geometryCreator = make_smart_refctd_ptr(); @@ -1228,7 +1228,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui inst.base.instanceCustomIndex = i; inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; inst.base.mask = 0xFF; - inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; + inst.transform = isProceduralInstance ? hlsl::float32_t3x4() : cpuObjects[i].transform; instance->instance = inst; } @@ -1467,7 +1467,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui float camXAngle = 32.f / 180.f * 3.14159f; } m_cameraSetting; - Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); Light m_light = { .direction = {-1.0f, -1.0f, -0.4f}, @@ -1519,7 +1519,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui smart_refctd_ptr m_converter; - core::matrix4SIMD m_cachedModelViewProjectionMatrix; + hlsl::float32_t4x4 m_cachedModelViewProjectionMatrix; bool m_useIndirectCommand = false; }; From 6576208acfa689f979538fe95b4694d793fe972c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 11:21:12 +0100 Subject: [PATCH 172/219] Fixed quaternion tests --- 22_CppCompat/main.cpp | 3 +++ 59_QuaternionTests/CQuaternionTester.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index aa840607d..f69c58dbc 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -455,12 +455,15 @@ void cpu_tests() float32_t3x4 b; float32_t3 v; float32_t4 u; + float32_t x; mul(a, b); mul(b, a); mul(a, v); mul(v, b); mul(u, a); mul(b, u); + mul(a, x); + mul(b, x); float32_t4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); a - a; diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index d9a33253b..245a0ec13 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -83,7 +83,7 @@ class CQuaternionTester final : public ITester Date: Mon, 19 Jan 2026 12:06:13 +0100 Subject: [PATCH 173/219] Removed scalar matrix multiplication test --- 22_CppCompat/main.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index f69c58dbc..aa840607d 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -455,15 +455,12 @@ void cpu_tests() float32_t3x4 b; float32_t3 v; float32_t4 u; - float32_t x; mul(a, b); mul(b, a); mul(a, v); mul(v, b); mul(u, a); mul(b, u); - mul(a, x); - mul(b, x); float32_t4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); a - a; From 1ca5358d553484fad0892b28542064678c053f49 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 13:39:37 +0100 Subject: [PATCH 174/219] Fixed example 34 --- 34_DebugDraw/main.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp index 6f699f091..f2dd6210d 100644 --- a/34_DebugDraw/main.cpp +++ b/34_DebugDraw/main.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -56,7 +57,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti constexpr float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; core::vectorSIMDf cameraPosition(14, 8, 12); core::vectorSIMDf cameraTarget(0, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), float(WIN_W) / WIN_H, zNear, zFar); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), float(WIN_W) / WIN_H, zNear, zFar); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, moveSpeed, rotateSpeed); } @@ -195,9 +196,6 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti camera.endInputProcessing(nextPresentationTimestamp); } - float32_t4x4 viewProjectionMatrix; - memcpy(&viewProjectionMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjectionMatrix)); // TODO: get rid of legacy transform - auto* queue = getGraphicsQueue(); asset::SViewport viewport; @@ -238,7 +236,7 @@ class DebugDrawSampleApp final : public SimpleWindowedApplication, public Builti ext::debug_draw::DrawAABB::DrawParameters drawParams; drawParams.commandBuffer = cmdbuf; - drawParams.cameraMat = viewProjectionMatrix; + drawParams.cameraMat = camera.getConcatenatedMatrix(); if (!drawAABB->renderSingle(drawParams, testAABB, float32_t4{ 1, 0, 0, 1 })) m_logger->log("Unable to draw AABB with single draw pipeline!", ILogger::ELL_ERROR); From 587cbff28b1d0b42f2f704c3ba9b247ad0276590 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 18:09:40 +0100 Subject: [PATCH 175/219] Fixed example 73 --- 73_GeometryInspector/main.cpp | 58 ++++++++--------------------------- 1 file changed, 12 insertions(+), 46 deletions(-) diff --git a/73_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp index 8a487d707..570ce52d2 100644 --- a/73_GeometryInspector/main.cpp +++ b/73_GeometryInspector/main.cpp @@ -4,6 +4,7 @@ #include "common.hpp" #include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" +#include #ifdef NBL_BUILD_MITSUBA_LOADER #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" @@ -26,35 +27,6 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR }; public: - static float32_t4x4 intofloat32_t4x4(const matrix4SIMD& mat) - { - return float32_t4x4{ - mat.rows[0].x, mat.rows[0].y, mat.rows[0].z, mat.rows[0].w, - mat.rows[1].x, mat.rows[1].y, mat.rows[1].z, mat.rows[1].w, - mat.rows[2].x, mat.rows[2].y, mat.rows[2].z, mat.rows[2].w, - mat.rows[3].x, mat.rows[3].y, mat.rows[3].z, mat.rows[3].w, - }; - } - - static float32_t4x4 intofloat32_t4x4(const matrix3x4SIMD& mat) - { - return float32_t4x4{ - mat.rows[0].x, mat.rows[0].y, mat.rows[0].z, mat.rows[0].w, - mat.rows[1].x, mat.rows[1].y, mat.rows[1].z, mat.rows[1].w, - mat.rows[2].x, mat.rows[2].y, mat.rows[2].z, mat.rows[2].w, - 0.0f, 0.0f, 0.0f, 1.0f, - }; - } - - static float32_t4x4 intofloat32_t4x4(const float32_t3x4& mat) - { - return float32_t4x4{ - mat[0], - mat[1], - mat[2], - float32_t4(0.0f, 0.0f, 0.0f, 1.0f), - }; - } inline GeometryInspectorApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), device_base_t({1280,720}, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} @@ -159,9 +131,9 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR m_camera.setProjectionMatrix([&]() { - static matrix4SIMD projection; + static hlsl::float32_t4x4 projection; - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(m_cameraSetting.fov), io.DisplaySize.x / io.DisplaySize.y, m_cameraSetting.zNear, @@ -225,10 +197,10 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR auto& selectedInstance = m_renderer->getInstance(m_selectedMesh); - imguizmoM16InOut.view = hlsl::transpose(intofloat32_t4x4(m_camera.getViewMatrix())); - imguizmoM16InOut.projection = hlsl::transpose(intofloat32_t4x4(m_camera.getProjectionMatrix())); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4, 3, 4>(m_camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(m_camera.getProjectionMatrix()); imguizmoM16InOut.projection[1][1] *= -1.f; // Flip y coordinates. https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ - imguizmoM16InOut.model = hlsl::transpose(intofloat32_t4x4(selectedInstance.world)); + imguizmoM16InOut.model = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4, 3, 4>(selectedInstance.world)); { m_transformParams.enableViewManipulate = true; EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], m_transformParams); @@ -385,16 +357,10 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR } // draw scene - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - { - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix,m_camera.getViewMatrix().pointer(),sizeof(viewMatrix)); - memcpy(&viewProjMatrix,m_camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); - } - m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); - } + float32_t3x4 viewMatrix = m_camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = m_camera.getConcatenatedMatrix(); + + m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; const auto& renderInstance = m_renderer->getInstance(m_selectedMesh); @@ -700,7 +666,7 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR { const auto measure = hlsl::length(diagonal); const auto aspectRatio = float(m_window->getWidth())/float(m_window->getHeight()); - m_camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f,aspectRatio,distance*measure*0.1,measure*4.0)); + m_camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(1.2f,aspectRatio,distance*measure*0.1f,measure*4.0f)); m_camera.setMoveSpeed(measure*0.04); } const auto pos = bound.maxVx+diagonal*distance; @@ -748,7 +714,7 @@ class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinR float camXAngle = 32.f / 180.f * 3.14159f; } m_cameraSetting; - Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera m_camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), hlsl::float32_t4x4()); // mutables std::string m_modelPath; From edd1e0b6d5b016448199003539833252fa7d8a37 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 19 Jan 2026 21:31:29 +0100 Subject: [PATCH 176/219] add beauty target --- 40_PathTracer/include/renderer/CRenderer.h | 55 ++++---------------- 40_PathTracer/include/renderer/CSession.h | 55 ++++++++++++++++++-- 40_PathTracer/main.cpp | 3 +- 40_PathTracer/src/renderer/CRenderer.cpp | 18 ++++--- 40_PathTracer/src/renderer/CSession.cpp | 60 ++++++++++++---------- 5 files changed, 105 insertions(+), 86 deletions(-) diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 9e4089e12..384e6387c 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -6,6 +6,7 @@ #include "renderer/CScene.h" +#include "renderer/CSession.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -46,17 +47,6 @@ namespace nbl::this_example class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovable { public: - enum class RenderMode : uint8_t - { - Previs, - Beauty, - //Albedo, - //Normal, - //Motion, - DebugIDs, - Count - }; - // constexpr static video::SPhysicalDeviceFeatures RequiredDeviceFeatures() { @@ -135,7 +125,7 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl core::smart_refctd_ptr sensorDSLayout; // TODO - std::array,uint8_t(RenderMode::Count)> renderingPipelines; + std::array,uint8_t(CSession::RenderMode::Count)> renderingPipelines; // core::smart_refctd_ptr commandBuffers[FramesInFlight]; @@ -176,20 +166,21 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; #endif - // rwmc resolve, autoexposure first pass - core::smart_refctd_ptr rwmcResolveAndLumaMeasure; // TODO: autoexposure, and first axis of FFT +// Denoiser + // TODO: autoexposure + core::smart_refctd_ptr lumaMeasure; // TODO: motion vector stuff - // compute and apply exposure, interleave into OptiX input formats, etc. - core::smart_refctd_ptr preOptiXDenoise; // TODO + // rwmc resolve, apply exposure, interleave into OptiX input formats + core::smart_refctd_ptr rwmcResolve; // TODO: OIDN denoise // deinterlave from OptiX output format, perform first axis of FFT - core::smart_refctd_ptr postOptiXDenoise; // TODO + core::smart_refctd_ptr postDenoise; // TODO // second axis FFT, spectrum multiply and iFFT core::smart_refctd_ptr secondAxisBloom; // TODO // first axis iFFT, tonemap, encode into final EXR format core::smart_refctd_ptr secondAxisFFTTonemap; // TODO - // Present +// Presenter (invokes denoiser) core::smart_refctd_ptr presentRenderpass; // TODO core::smart_refctd_ptr regularPresent; // TODO core::smart_refctd_ptr cubemapPresent; // TODO @@ -201,33 +192,5 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl SCachedConstructionParams m_construction; }; -} - -// -namespace nbl::system::impl -{ -template<> -struct to_string_helper -{ - private: - using enum_t = nbl::this_example::CRenderer::RenderMode; - - public: - static inline std::string __call(const enum_t value) - { - switch (value) - { - case enum_t::Beauty: - return "Beauty"; - case enum_t::Previs: - return "Previs"; - case enum_t::DebugIDs: - return "DebugIDs"; - default: - break; - } - return ""; - } -}; } #endif diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 27353e8d2..66ce4a7e5 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -82,14 +82,16 @@ struct SensorDSBindings NBL_CONSTEXPR_STATIC_INLINE uint32_t SampleCount = 2; // R64_UINT with packing RGB14E6 or RGB14E7 and using rest for spp in the cascade NBL_CONSTEXPR_STATIC_INLINE uint32_t RWMCCascades = 3; + // RGB5E9 + NBL_CONSTEXPR_STATIC_INLINE uint32_t Beauty = 4; // R10G10B10_UNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 4; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 5; // R10G10B10_SNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 5; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 6; // R10G10B10_SNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 6; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; // R16_UNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 7; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; }; } @@ -101,6 +103,16 @@ class CScene; class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable { public: + enum class RenderMode : uint8_t + { + Previs, + Beauty, + //Albedo, + //Normal, + //Motion, + DebugIDs, + Count + }; using sensor_t = CSceneLoader::SLoadResult::SSensor; using sensor_type_e = sensor_t::SMutable::Raygen::Type; @@ -148,14 +160,47 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; } - SImageWithViews scrambleKey = {}, sampleCount = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; + // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? + // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. + // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. + // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? + SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; // stores all the sensor data required core::smart_refctd_ptr ds = {}; + // }; SImmutables immutables = {}; SSensorDynamics prevSensorState = {}; } m_active = {}; }; +} + +// +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + private: + using enum_t = nbl::this_example::CSession::RenderMode; + + public: + static inline std::string __call(const enum_t value) + { + switch (value) + { + case enum_t::Beauty: + return "Beauty"; + case enum_t::Previs: + return "Previs"; + case enum_t::DebugIDs: + return "DebugIDs"; + default: + break; + } + return ""; + } +}; } #endif diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 45c0e3ef2..6e3ece46c 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -62,7 +62,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes m_logger->log("Build Info:\n%s",ILogger::ELL_INFO,j.dump(4).c_str()); } - // TODO: remove +// TODO: remove constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; // TODO: remove constexpr static inline uint32_t MaxFramesInFlight = 3u; constexpr static inline uint8_t MaxUITextureCount = 1u; // TODO: remove @@ -187,6 +187,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes cb->end(); } session->deinit(); + scene_daily_pt = nullptr; // Load Custom Shader diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 0d68e958d..4accf9e0c 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -46,7 +46,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) const auto samplerDefaultRepeat = device->createSampler({}); // create the layouts - smart_refctd_ptr renderingLayouts[uint8_t(RenderMode::Count)]; + smart_refctd_ptr renderingLayouts[uint8_t(CSession::RenderMode::Count)]; { constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING | hlsl::ShaderStage::ESS_COMPUTE; // descriptor @@ -162,6 +162,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) UBOBinding, singleStorageImage(SensorDSBindings::ScrambleKey), singleStorageImage(SensorDSBindings::SampleCount), + singleStorageImage(SensorDSBindings::Beauty), singleStorageImage(SensorDSBindings::RWMCCascades), singleStorageImage(SensorDSBindings::Albedo), singleStorageImage(SensorDSBindings::Normal), @@ -175,18 +176,19 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) } // but many push constant ranges - SPushConstantRange pcRanges[uint8_t(RenderMode::Count)]; - auto setPCRange = [&pcRanges](const RenderMode mode)->void + using render_mode_e = CSession::RenderMode; + SPushConstantRange pcRanges[uint8_t(render_mode_e::Count)]; + auto setPCRange = [&pcRanges](const render_mode_e mode)->void { pcRanges[uint8_t(mode)] = {.stageFlags=RTStages,.offset=0,.size=sizeof(T)}; }; - setPCRange.operator()(RenderMode::Previs); - setPCRange.operator()(RenderMode::Beauty); - setPCRange.operator()(RenderMode::DebugIDs); - for (uint8_t t=0; t(render_mode_e::Previs); + setPCRange.operator()(render_mode_e::Beauty); + setPCRange.operator()(render_mode_e::DebugIDs); + for (uint8_t t=0; tcreatePipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); - string debugName = to_string(static_cast(t))+"Rendering Pipeline Layout"; + string debugName = to_string(static_cast(t))+"Rendering Pipeline Layout"; if (checkNullObject(renderingLayouts[t],debugName)) return nullptr; } diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 43c91e69a..fef8663ee 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -66,6 +66,7 @@ bool CSession::init(video::IGPUCommandBuffer* cb) addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); } + const auto allowedFormatUsages = device->getPhysicalDevice()->getImageFormatUsagesOptimalTiling(); auto createImage = [&]( const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT @@ -73,38 +74,43 @@ bool CSession::init(video::IGPUCommandBuffer* cb) { SActiveResources::SImageWithViews retval = {}; { - IGPUImage::SCreationParams params = {}; - params.type = IGPUImage::E_TYPE::ET_2D; - params.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - params.format = format; - params.extent.width = resolution[0]; - params.extent.height = resolution[1]; - params.extent.depth = 1; - params.mipLevels = 1; - params.arrayLayers = layers; - using image_usage_e = IGPUImage::E_USAGE_FLAGS; - params.usage = image_usage_e::EUF_STORAGE_BIT|image_usage_e::EUF_TRANSFER_DST_BIT|extraUsages; - if (m_params.type==sensor_type_e::Env) { - params.arrayLayers *= 6; - params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_CUBE_COMPATIBLE_BIT; - } - viewFormats.set(format); - if (viewFormats.count()>1) - { - params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; - params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_EXTENDED_USAGE_BIT; + IGPUImage::SCreationParams params = {}; + params.type = IGPUImage::E_TYPE::ET_2D; + params.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + params.format = format; + params.extent.width = resolution[0]; + params.extent.height = resolution[1]; + params.extent.depth = 1; + params.mipLevels = 1; + params.arrayLayers = layers; + using image_usage_e = IGPUImage::E_USAGE_FLAGS; + params.usage = image_usage_e::EUF_TRANSFER_DST_BIT|extraUsages; + if (m_params.type==sensor_type_e::Env) + { + params.arrayLayers *= 6; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_CUBE_COMPATIBLE_BIT; + } + viewFormats.set(format); + if (viewFormats.count()>1) + { + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_EXTENDED_USAGE_BIT; + } + params.viewFormats = viewFormats; + retval.image = device->createImage(std::move(params)); + if (!dedicatedAllocate(retval.image.get(),debugName)) + return {}; } - params.viewFormats = viewFormats; - auto image = device->createImage(std::move(params)); - if (!dedicatedAllocate(image.get(),debugName)) - return retval; + const auto& params = retval.image->getCreationParameters(); for (uint8_t f=0; f(f); + const auto thisFormatUsages = static_cast>(allowedFormatUsages[viewFormat]); auto view = device->createImageView({ - .image = image, + .subUsages = retval.image->getCreationParameters().usage, + .image = retval.image, .viewType = IGPUImageView::E_TYPE::ET_2D_ARRAY, .format = viewFormat }); @@ -117,7 +123,6 @@ bool CSession::init(video::IGPUCommandBuffer* cb) view->setObjectDebugName(viewDebugName.c_str()); retval.views[viewFormat] = std::move(view); } - retval.image = std::move(image); } return retval; }; @@ -138,6 +143,8 @@ bool CSession::init(video::IGPUCommandBuffer* cb) }; immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT,1); addImageWrite(SensorDSBindings::SampleCount,immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]); + immutables.beauty = createScreenSizedImage("Beauty",E_FORMAT::EF_E5B9G9R9_UFLOAT_PACK32,1,std::bitset().set(E_FORMAT::EF_R32_UINT)); + addImageWrite(SensorDSBindings::Beauty,immutables.beauty.views[E_FORMAT::EF_R32_UINT]); immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); addImageWrite(SensorDSBindings::RWMCCascades,immutables.rwmcCascades.views[E_FORMAT::EF_R32G32_UINT]); immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); @@ -216,6 +223,7 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) success = success && cb->clearColorImage(img.image.get(),IGPUImage::LAYOUT::GENERAL,&color,1,&subresRng); }; clearImage(immutables.sampleCount); + clearImage(immutables.beauty); clearImage(immutables.rwmcCascades); clearImage(immutables.albedo); clearImage(immutables.normal); From 966ec2064e7f4fe17075f2a4717956071e63fd1d Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 19 Jan 2026 21:32:40 +0100 Subject: [PATCH 177/219] correct last commit --- 40_PathTracer/src/renderer/CSession.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index fef8663ee..5868388fa 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -109,7 +109,7 @@ bool CSession::init(video::IGPUCommandBuffer* cb) const auto viewFormat = static_cast(f); const auto thisFormatUsages = static_cast>(allowedFormatUsages[viewFormat]); auto view = device->createImageView({ - .subUsages = retval.image->getCreationParameters().usage, + .subUsages = retval.image->getCreationParameters().usage & thisFormatUsages, .image = retval.image, .viewType = IGPUImageView::E_TYPE::ET_2D_ARRAY, .format = viewFormat From 9536512373153fd4b68b83d537fa718604b5ed5d Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 19 Jan 2026 21:58:47 +0100 Subject: [PATCH 178/219] move common stuff to HLSL files --- 40_PathTracer/include/renderer/CScene.h | 44 +--------- 40_PathTracer/include/renderer/CSession.h | 88 +------------------ .../include/renderer/shaders/common.hlsl | 9 ++ .../include/renderer/shaders/rwmc.hlsl | 30 +++++++ .../include/renderer/shaders/scene.hlsl | 51 +++++++++++ .../include/renderer/shaders/session.hlsl | 73 +++++++++++++++ 40_PathTracer/main.cpp | 2 + 7 files changed, 167 insertions(+), 130 deletions(-) create mode 100644 40_PathTracer/include/renderer/shaders/common.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/rwmc.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/scene.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/session.hlsl diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index 25f8c100c..0ebb36425 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -7,50 +7,8 @@ #include "io/CSceneLoader.h" #include "renderer/CSession.h" +#include "renderer/shaders/scene.hlsl" -// TODO: move to HLSL file -namespace nbl::this_example -{ -struct SSceneUniforms -{ - struct SIndirectInit - { - // -// bda_t pQuantizedSequence; - // because the PDF is rescaled to log2(luma)/log2(Max)*255 - // and you get it out as `exp2(texValue)*factor` - hlsl::float32_t envmapPDFNormalizationFactor; - hlsl::float16_t envmapScale; - uint16_t unused; - } indirect; -}; - -struct SceneDSBindings -{ - NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; - // RGB9E5 post multiplied by a max value - NBL_CONSTEXPR_STATIC_INLINE uint32_t Envmap = 1; - NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 2; - NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 3; - NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 4; - // UINT8 log2(luma) meant for stochastic descent or querying the PDF of the Warp Map - NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapPDF = 5; - // R16G16_UNORM or R32G32_SFLOAT (depending on envmap resolution) meant for skipping stochastic descent - NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapWarpMap = 6; -}; - -struct SceneDSBindingCounts -{ - // Mostly held back by Intel ARC, important to not have more than this many light geometries, can increase to - // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxDescriptorSetUpdateAfterBindAccelerationStructures&platform=all - // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxPerStageDescriptorUpdateAfterBindAccelerationStructures&platform=all - NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 65535; - // Reasonable combo (esp if we implement a cache over the DS) - NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 128; - // Spec mandated minimum - NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 500000; -}; -} namespace nbl::this_example { diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 66ce4a7e5..01776a774 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -5,95 +5,9 @@ #define _NBL_THIS_EXAMPLE_C_SESSION_H_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" -#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" - #include "io/CSceneLoader.h" - -// TODO: move to HLSL file -namespace nbl::this_example -{ -#define MAX_SPP_LOG2 15 -NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; -// need to be able to count (represent) both 0 and Max -NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; - -// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 -#define MAX_CASCADE_COUNT_LOG2 3 -struct SSensorUniforms -{ - NBL_CONSTEXPR_STATIC_INLINE uint16_t ScrambleKeyTextureSize = 512; - -#define MAX_PATH_DEPTH_LOG2 7 - NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxCascadeCountLog2 = MAX_CASCADE_COUNT_LOG2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; - - hlsl::float32_t2 rcpPixelSize; - hlsl::rwmc::SplattingParameters splatting; - hlsl::uint16_t2 renderSize; - // bitfield - uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; - uint16_t unused0 : 13; - // bitfield - uint16_t unused1 : 1; - uint16_t hideEnvironment : 1; - uint16_t lastPathDepth : MAX_PATH_DEPTH_LOG2; - uint16_t lastNoRussianRouletteDepth : MAX_PATH_DEPTH_LOG2; -}; -#undef MAX_PATH_DEPTH_LOG2 - -// no uint16_t to be used because its going to be a push constant -struct SSensorDynamics -{ - // assuming input will be ndc = [-1,1]^2 x {-1} - hlsl::float32_t3x4 ndcToRay; - hlsl::float32_t tMax; - // we can adaptively sample per-pixel, but - uint32_t minSPP : MAX_SPP_LOG2; - uint32_t maxSPP : MAX_SPP_LOG2; - uint32_t unused : 2; -}; - -// no uint16_t to be used because its going to be a push constant -struct SResolveConstants -{ - struct SProtoRWMC - { - hlsl::float32_t initialEmin; - hlsl::float32_t reciprocalBase; - hlsl::float32_t reciprocalKappa; - hlsl::float32_t colorReliabilityFactor; - } rwmc; - uint32_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); - uint32_t unused : 28; -}; -#undef MAX_CASCADE_COUNT_LOG2 - - - -struct SensorDSBindings -{ - NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; - // R32G32_UINT storage texture (can get animated/rearranged) - NBL_CONSTEXPR_STATIC_INLINE uint32_t ScrambleKey = 1; - // R16_UINT Per Pixel Sample Count (so don't need to read all RWMC cascades) - NBL_CONSTEXPR_STATIC_INLINE uint32_t SampleCount = 2; - // R64_UINT with packing RGB14E6 or RGB14E7 and using rest for spp in the cascade - NBL_CONSTEXPR_STATIC_INLINE uint32_t RWMCCascades = 3; - // RGB5E9 - NBL_CONSTEXPR_STATIC_INLINE uint32_t Beauty = 4; - // R10G10B10_UNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 5; - // R10G10B10_SNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 6; - // R10G10B10_SNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; - // R16_UNORM - NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; -}; -} +#include "renderer/shaders/session.hlsl" namespace nbl::this_example diff --git a/40_PathTracer/include/renderer/shaders/common.hlsl b/40_PathTracer/include/renderer/shaders/common.hlsl new file mode 100644 index 000000000..178159e62 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/common.hlsl @@ -0,0 +1,9 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + + +#endif // _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/rwmc.hlsl b/40_PathTracer/include/renderer/shaders/rwmc.hlsl new file mode 100644 index 000000000..2db12ddb0 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/rwmc.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ + + +#include "renderer/shaders/common.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + + +namespace nbl::this_example +{ +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_CASCADE_COUNT_LOG2 3 + +// no uint16_t to be used because its going to be a push constant +struct SResolveConstants // TODO: move somewhere +{ + struct SProtoRWMC + { + hlsl::float32_t initialEmin; + hlsl::float32_t reciprocalBase; + hlsl::float32_t reciprocalKappa; + hlsl::float32_t colorReliabilityFactor; + } rwmc; + uint32_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); + uint32_t unused : 28; +}; +} + +#endif // _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/scene.hlsl b/40_PathTracer/include/renderer/shaders/scene.hlsl new file mode 100644 index 000000000..f4a30d3b9 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/scene.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ + + +#include "renderer/shaders/common.hlsl" + + +namespace nbl::this_example +{ +struct SSceneUniforms +{ + struct SIndirectInit + { + // +// bda_t pQuantizedSequence; + // because the PDF is rescaled to log2(luma)/log2(Max)*255 + // and you get it out as `exp2(texValue)*factor` + hlsl::float32_t envmapPDFNormalizationFactor; + hlsl::float16_t envmapScale; + uint16_t unused; + } indirect; +}; + +struct SceneDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // RGB9E5 post multiplied by a max value + NBL_CONSTEXPR_STATIC_INLINE uint32_t Envmap = 1; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 2; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 3; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 4; + // UINT8 log2(luma) meant for stochastic descent or querying the PDF of the Warp Map + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapPDF = 5; + // R16G16_UNORM or R32G32_SFLOAT (depending on envmap resolution) meant for skipping stochastic descent + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapWarpMap = 6; +}; + +struct SceneDSBindingCounts +{ + // Mostly held back by Intel ARC, important to not have more than this many light geometries, can increase to + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxDescriptorSetUpdateAfterBindAccelerationStructures&platform=all + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxPerStageDescriptorUpdateAfterBindAccelerationStructures&platform=all + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 65535; + // Reasonable combo (esp if we implement a cache over the DS) + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 128; + // Spec mandated minimum + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 500000; +}; +} + +#endif // _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl new file mode 100644 index 000000000..175f94cb2 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -0,0 +1,73 @@ +#ifndef _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ + + +#include "renderer/shaders/rwmc.hlsl" + + +namespace nbl::this_example +{ +#define MAX_SPP_LOG2 15 +NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; +// need to be able to count (represent) both 0 and Max +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; + +struct SSensorUniforms +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t ScrambleKeyTextureSize = 512; + +#define MAX_PATH_DEPTH_LOG2 7 + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxCascadeCountLog2 = MAX_CASCADE_COUNT_LOG2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; + + hlsl::float32_t2 rcpPixelSize; + hlsl::rwmc::SplattingParameters splatting; + hlsl::uint16_t2 renderSize; + // bitfield + uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; + uint16_t unused0 : BOOST_PP_SUB(16,MAX_CASCADE_COUNT_LOG2); + // bitfield + uint16_t unused1 : 1; + uint16_t hideEnvironment : 1; + uint16_t lastPathDepth : MAX_PATH_DEPTH_LOG2; + uint16_t lastNoRussianRouletteDepth : MAX_PATH_DEPTH_LOG2; +}; +#undef MAX_PATH_DEPTH_LOG2 + +// no uint16_t to be used because its going to be a push constant +struct SSensorDynamics +{ + // assuming input will be ndc = [-1,1]^2 x {-1} + hlsl::float32_t3x4 ndcToRay; + hlsl::float32_t tMax; + // we can adaptively sample per-pixel, but + uint32_t minSPP : MAX_SPP_LOG2; + uint32_t maxSPP : MAX_SPP_LOG2; + uint32_t unused : BOOST_PP_SUB(32,BOOST_PP_MUL(MAX_SPP_LOG2,2)); +}; +#undef MAX_SPP_LOG2 + + +struct SensorDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // R32G32_UINT storage texture (can get animated/rearranged) + NBL_CONSTEXPR_STATIC_INLINE uint32_t ScrambleKey = 1; + // R16_UINT Per Pixel Sample Count (so don't need to read all RWMC cascades) + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampleCount = 2; + // R64_UINT with packing RGB14E6 or RGB14E7 and using rest for spp in the cascade + NBL_CONSTEXPR_STATIC_INLINE uint32_t RWMCCascades = 3; + // RGB5E9 + NBL_CONSTEXPR_STATIC_INLINE uint32_t Beauty = 4; + // R10G10B10_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 5; + // R10G10B10_SNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 6; + // R10G10B10_SNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; + // R16_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; +}; +} + +#endif // _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 6e3ece46c..067b78897 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -185,6 +185,8 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); session->init(cb); cb->end(); + + // TODO: stuff } session->deinit(); scene_daily_pt = nullptr; From e83e34c074b3fa03d359a2c2cc9a31221b79d676 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 20 Jan 2026 00:18:03 +0100 Subject: [PATCH 179/219] precompile my own shaders! --- 40_PathTracer/CMakeLists.txt | 28 ++++------- .../app_resources/pathtrace/debugIDs.hlsl | 14 ++++++ .../app_resources/present/default.hlsl | 22 +++++++++ 40_PathTracer/include/renderer/CRenderer.h | 34 ++++---------- 40_PathTracer/include/renderer/CSession.h | 1 + .../renderer/shaders/pathtrace/common.hlsl | 23 +++++++++ .../shaders/pathtrace/push_constants.hlsl | 47 +++++++++++++++++++ .../include/renderer/shaders/rwmc.hlsl | 11 +++-- .../include/renderer/shaders/scene.hlsl | 23 +++++++-- .../include/renderer/shaders/session.hlsl | 44 ++++++++++------- 40_PathTracer/main.cpp | 8 ++++ 11 files changed, 190 insertions(+), 65 deletions(-) create mode 100644 40_PathTracer/app_resources/pathtrace/debugIDs.hlsl create mode 100644 40_PathTracer/app_resources/present/default.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index a7ea46a85..638e5a837 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -24,24 +24,6 @@ list(APPEND NBL_ ) nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "${}" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() - set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") # TODO: why not use GLOB_RECURSE from above ? set(DEPENDS @@ -65,6 +47,14 @@ set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ [ + { + "INPUT": "app_resources/pathtrace/debugIDs.hlsl", + "KEY": "pathtrace_debugIDs", + }, + { + "INPUT": "app_resources/present/default.hlsl", + "KEY": "present_default", + }, { "INPUT": "app_resources/raytrace.rgen.hlsl", "KEY": "raytrace_rgen", @@ -119,6 +109,8 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" +# -I "${CMAKE_CURRENT_SOURCE_DIR}/../include" # TODO: Arek for some reason, the `-I` are relative to each `.hlsl` file getting compiled, and not this CMAke file + -I "D:\\work\\Nabla-master\\examples_tests\\40_PathTracer\\include" -T lib_${SM} ) diff --git a/40_PathTracer/app_resources/pathtrace/debugIDs.hlsl b/40_PathTracer/app_resources/pathtrace/debugIDs.hlsl new file mode 100644 index 000000000..cb171833d --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/debugIDs.hlsl @@ -0,0 +1,14 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SDebugPushConstants pc; + +[shader("raygeneration")] +void pathtrace_debugIDs() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl new file mode 100644 index 000000000..97cfb7c0e --- /dev/null +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -0,0 +1,22 @@ +// Copyright (C) 2024-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +#include "renderer/shaders/present/push_constants.hlsl" +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace nbl::this_example; +using namespace ext::FullScreenTriangle; + +[[vk::binding(0)]] Texture2DArray images[DefaultResolvePushConstants::ImageCount]; +[[vk::binding(1)]] SamplerState samplerState; + +[[vk::push_constant]] DefaultResolvePushConstants pc; + +[shader("pixel")] +float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(images[pc.imageIndex].SampleLevel(samplerState,float32_t3(vxAttr.uv,0.f),0.f).rgb,1.0f); +} diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 384e6387c..15e430191 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -14,31 +14,7 @@ #include #include - -// TODO: move to HLSL file -namespace nbl::this_example -{ - -struct SPrevisPushConstants : SSensorDynamics -{ -}; - -// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 -#define MAX_SPP_PER_DISPATCH_LOG2 5 -struct SBeautyPushConstants : SSensorDynamics -{ - NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSppPerDispatchLog2 = MAX_SPP_PER_DISPATCH_LOG2; - - uint32_t maxSppPerDispatch : MAX_SPP_PER_DISPATCH_LOG2; - uint32_t unused : 27; -}; -#undef MAX_SPP_PER_DISPATCH_LOG2 - -struct SDebugPushConstants : SSensorDynamics -{ -}; - -} +#include "renderer/shaders/pathtrace/push_constants.hlsl" namespace nbl::this_example @@ -62,6 +38,14 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl retval.accelerationStructureHostCommands = true; return retval; } +#if 0 // see TODO in main.cpp + constexpr static video::SPhysicalDeviceLimits RequiredDeviceLimits() + { + video::SPhysicalDeviceLimits retval = {}; + retval.shaderStorageImageReadWithoutFormat = true; + return retval; + } +#endif struct SCachedCreationParams { diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 01776a774..44fd443b3 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -8,6 +8,7 @@ #include "io/CSceneLoader.h" #include "renderer/shaders/session.hlsl" +#include "renderer/shaders/pathtrace/push_constants.hlsl" namespace nbl::this_example diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl new file mode 100644 index 000000000..1a94419e4 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ + + +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +namespace nbl +{ +namespace this_example +{ +NBL_CONSTEXPR uint32_t SceneDSIndex = 0; +NBL_CONSTEXPR uint32_t SessionDSIndex = 1; +} +} +#include "renderer/shaders/scene.hlsl" +#include "renderer/shaders/session.hlsl" +#include "renderer/shaders/pathtrace/push_constants.hlsl" + + +#endif // _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl new file mode 100644 index 000000000..f0fbe07e8 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl @@ -0,0 +1,47 @@ +#ifndef _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ + + +#include "renderer/shaders/session.hlsl" + +#include + + +// no uint16_t to be used because its going to be a push constant +namespace nbl +{ +namespace this_example +{ +struct SSensorDynamics +{ + // assuming input will be ndc = [-1,1]^2 x {-1} + hlsl::float32_t3x4 ndcToRay; + hlsl::float32_t tMax; + // we can adaptively sample per-pixel, but + uint32_t minSPP : MAX_SPP_LOG2; + uint32_t maxSPP : MAX_SPP_LOG2; + uint32_t unused : BOOST_PP_SUB(32,BOOST_PP_MUL(MAX_SPP_LOG2,2)); +}; + +struct SPrevisPushConstants : SSensorDynamics +{ +}; + +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_SPP_PER_DISPATCH_LOG2 5 +struct SBeautyPushConstants : SSensorDynamics +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSppPerDispatchLog2 = MAX_SPP_PER_DISPATCH_LOG2; + + uint32_t maxSppPerDispatch : MAX_SPP_PER_DISPATCH_LOG2; + uint32_t unused : 27; +}; +#undef MAX_SPP_PER_DISPATCH_LOG2 + +struct SDebugPushConstants : SSensorDynamics +{ +}; + +} +} +#endif // _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/rwmc.hlsl b/40_PathTracer/include/renderer/shaders/rwmc.hlsl index 2db12ddb0..cf9c29c60 100644 --- a/40_PathTracer/include/renderer/shaders/rwmc.hlsl +++ b/40_PathTracer/include/renderer/shaders/rwmc.hlsl @@ -6,8 +6,12 @@ #include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" #include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" +#include +#include -namespace nbl::this_example +namespace nbl +{ +namespace this_example { // We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 #define MAX_CASCADE_COUNT_LOG2 3 @@ -23,8 +27,9 @@ struct SResolveConstants // TODO: move somewhere hlsl::float32_t colorReliabilityFactor; } rwmc; uint32_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); - uint32_t unused : 28; + uint32_t unused : BOOST_PP_SUB(31,MAX_CASCADE_COUNT_LOG2); }; -} +} +} #endif // _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/scene.hlsl b/40_PathTracer/include/renderer/shaders/scene.hlsl index f4a30d3b9..d55bb9cb1 100644 --- a/40_PathTracer/include/renderer/shaders/scene.hlsl +++ b/40_PathTracer/include/renderer/shaders/scene.hlsl @@ -4,8 +4,9 @@ #include "renderer/shaders/common.hlsl" - -namespace nbl::this_example +namespace nbl +{ +namespace this_example { struct SSceneUniforms { @@ -46,6 +47,22 @@ struct SceneDSBindingCounts // Spec mandated minimum NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 500000; }; -} +#ifdef __HLSL_VERSION +[[vk::binding(SceneDSBindings::UBO,SceneDSIndex)]] ConstantBuffer gScene; +// could be float32_t3 +[[vk::binding(SceneDSBindings::Envmap,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmap; +[[vk::binding(SceneDSBindings::Envmap,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapSampler; +[[vk::binding(SceneDSBindings::TLASes,SceneDSIndex)]] RaytracingAccelerationStructure gTLASes[SceneDSBindingCounts::TLASes]; +[[vk::binding(SceneDSBindings::Samplers,SceneDSIndex)]] SamplerState gSamplers[SceneDSBindingCounts::Samplers]; +[[vk::binding(SceneDSBindings::SampledImages,SceneDSIndex)]] Texture2DArray gSampledImages[SceneDSBindingCounts::SampledImages]; +// could be float32_t +[[vk::binding(SceneDSBindings::EnvmapPDF,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmapPDF; +[[vk::binding(SceneDSBindings::EnvmapPDF,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapPDFSampler; +// could be float32_t2 +[[vk::binding(SceneDSBindings::EnvmapWarpMap,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmapWarpMap; +[[vk::binding(SceneDSBindings::EnvmapWarpMap,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapWarpMapSampler; +#endif +} +} #endif // _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl index 175f94cb2..f28d3faeb 100644 --- a/40_PathTracer/include/renderer/shaders/session.hlsl +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -5,7 +5,9 @@ #include "renderer/shaders/rwmc.hlsl" -namespace nbl::this_example +namespace nbl +{ +namespace this_example { #define MAX_SPP_LOG2 15 NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; @@ -34,18 +36,6 @@ struct SSensorUniforms }; #undef MAX_PATH_DEPTH_LOG2 -// no uint16_t to be used because its going to be a push constant -struct SSensorDynamics -{ - // assuming input will be ndc = [-1,1]^2 x {-1} - hlsl::float32_t3x4 ndcToRay; - hlsl::float32_t tMax; - // we can adaptively sample per-pixel, but - uint32_t minSPP : MAX_SPP_LOG2; - uint32_t maxSPP : MAX_SPP_LOG2; - uint32_t unused : BOOST_PP_SUB(32,BOOST_PP_MUL(MAX_SPP_LOG2,2)); -}; -#undef MAX_SPP_LOG2 struct SensorDSBindings @@ -61,13 +51,35 @@ struct SensorDSBindings NBL_CONSTEXPR_STATIC_INLINE uint32_t Beauty = 4; // R10G10B10_UNORM NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 5; - // R10G10B10_SNORM + // modified R10G10B10_UNORM NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 6; - // R10G10B10_SNORM + // modified R10G10B10_UNORM NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; // R16_UNORM NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; }; -} + +#ifdef __HLSL_VERSION +[[vk::binding(SensorDSBindings::UBO,SessionDSIndex)]] ConstantBuffer gSensor; +// could be uint32_t2 +[[vk::binding(SensorDSBindings::ScrambleKey,SessionDSIndex)]] RWTexture2DArray gScrambleKey; +// could be uint32_t or even uint16_t +[[vk::binding(SensorDSBindings::SampleCount,SessionDSIndex)]] RWTexture2DArray gSampleCount; +// could be uint32_t2 +[[vk::binding(SensorDSBindings::RWMCCascades,SessionDSIndex)]] RWTexture2DArray gRWMCCascades; +// could be uint32_t +[[vk::binding(SensorDSBindings::Beauty,SessionDSIndex)]] RWTexture2DArray gBeauty; +[[vk::binding(SensorDSBindings::Albedo,SessionDSIndex)]] RWTexture2DArray gAlbedo; +// thse two are snorm but stored as unorm, care needs to be taken to map: +// [-1,1] <-> [0,1] but with 0 being exactly representable, so really [-1,1] <-> [1/1023,1] +// Requires x*1022.f/2046.f+1024.f/2046.f shift/adjust for accumulation and storage +// Then to decode back into [-1,1] need max(y*2046.f/1022.f-1024.f/1022.f,-1) = x +[[vk::binding(SensorDSBindings::Normal,SessionDSIndex)]] RWTexture2DArray gNormal; +[[vk::binding(SensorDSBindings::Motion,SessionDSIndex)]] RWTexture2DArray gMotion; +// could be float32_t +[[vk::binding(SensorDSBindings::Mask,SessionDSIndex)]] RWTexture2DArray gMask; +#endif +} +} #endif // _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 067b78897..32034efd2 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -102,6 +102,14 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes return retval.unionWith(CRenderer::PreferredDeviceFeatures()); } + inline SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + auto retval = device_base_t::getRequiredDeviceLimits(); + // TODO: need union/superset + retval.shaderStorageImageReadWithoutFormat = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) From 1b9b0808e21a03b0695e40069812e707402935e5 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 20 Jan 2026 00:37:06 +0100 Subject: [PATCH 180/219] ok figure out we'll only have 3 path tracing pipelines --- 40_PathTracer/CMakeLists.txt | 12 +++++++-- .../app_resources/pathtrace/beauty.hlsl | 14 +++++++++++ .../pathtrace/{debugIDs.hlsl => debug.hlsl} | 2 +- .../app_resources/pathtrace/previs.hlsl | 14 +++++++++++ 40_PathTracer/include/renderer/CSession.h | 9 +++---- .../shaders/pathtrace/push_constants.hlsl | 1 + .../shaders/present/push_constants.hlsl | 25 +++++++++++++++++++ 40_PathTracer/src/renderer/CRenderer.cpp | 2 +- 40_PathTracer/src/renderer/CSession.cpp | 5 +++- 9 files changed, 73 insertions(+), 11 deletions(-) create mode 100644 40_PathTracer/app_resources/pathtrace/beauty.hlsl rename 40_PathTracer/app_resources/pathtrace/{debugIDs.hlsl => debug.hlsl} (93%) create mode 100644 40_PathTracer/app_resources/pathtrace/previs.hlsl create mode 100644 40_PathTracer/include/renderer/shaders/present/push_constants.hlsl diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 638e5a837..6df276c2f 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -48,8 +48,16 @@ set(SM 6_8) set(JSON [=[ [ { - "INPUT": "app_resources/pathtrace/debugIDs.hlsl", - "KEY": "pathtrace_debugIDs", + "INPUT": "app_resources/pathtrace/previs.hlsl", + "KEY": "pathtrace_previs", + }, + { + "INPUT": "app_resources/pathtrace/beauty.hlsl", + "KEY": "pathtrace_beauty", + }, + { + "INPUT": "app_resources/pathtrace/debug.hlsl", + "KEY": "pathtrace_debug", }, { "INPUT": "app_resources/present/default.hlsl", diff --git a/40_PathTracer/app_resources/pathtrace/beauty.hlsl b/40_PathTracer/app_resources/pathtrace/beauty.hlsl new file mode 100644 index 000000000..4cab1d768 --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/beauty.hlsl @@ -0,0 +1,14 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SBeautyPushConstants pc; + +[shader("raygeneration")] +void pathtrace_beauty() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} diff --git a/40_PathTracer/app_resources/pathtrace/debugIDs.hlsl b/40_PathTracer/app_resources/pathtrace/debug.hlsl similarity index 93% rename from 40_PathTracer/app_resources/pathtrace/debugIDs.hlsl rename to 40_PathTracer/app_resources/pathtrace/debug.hlsl index cb171833d..08cd38237 100644 --- a/40_PathTracer/app_resources/pathtrace/debugIDs.hlsl +++ b/40_PathTracer/app_resources/pathtrace/debug.hlsl @@ -5,7 +5,7 @@ using namespace nbl::this_example; [[vk::push_constant]] SDebugPushConstants pc; [shader("raygeneration")] -void pathtrace_debugIDs() +void pathtrace_debug() { const uint32_t3 launchID = spirv::LaunchIdKHR; const uint32_t3 launchSize = spirv::LaunchSizeKHR; diff --git a/40_PathTracer/app_resources/pathtrace/previs.hlsl b/40_PathTracer/app_resources/pathtrace/previs.hlsl new file mode 100644 index 000000000..55245a2d3 --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/previs.hlsl @@ -0,0 +1,14 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SPrevisPushConstants pc; + +[shader("raygeneration")] +void pathtrace_previs() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 44fd443b3..0ec27296e 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -22,10 +22,7 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm { Previs, Beauty, - //Albedo, - //Normal, - //Motion, - DebugIDs, + Debug, Count }; using sensor_t = CSceneLoader::SLoadResult::SSensor; @@ -109,8 +106,8 @@ struct to_string_helper return "Beauty"; case enum_t::Previs: return "Previs"; - case enum_t::DebugIDs: - return "DebugIDs"; + case enum_t::Debug: + return "Debug"; default: break; } diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl index f0fbe07e8..ae12d441f 100644 --- a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl @@ -40,6 +40,7 @@ struct SBeautyPushConstants : SSensorDynamics struct SDebugPushConstants : SSensorDynamics { + // some enum/choice of what to debug }; } diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl new file mode 100644 index 000000000..c966188a3 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -0,0 +1,25 @@ +#ifndef _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ + + +#include "renderer/shaders/rwmc.hlsl" + + +// no uint16_t to be used because its going to be a push constant +namespace nbl +{ +namespace this_example +{ + +struct DefaultResolvePushConstants +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t ImageCount = 16; + + // 3 bits for cube layer + uint32_t layer : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,3); + uint32_t imageIndex : BOOST_PP_SUB(29,MAX_CASCADE_COUNT_LOG2); +}; + +} +} +#endif // _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 4accf9e0c..5e1f9cc50 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -184,7 +184,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) }; setPCRange.operator()(render_mode_e::Previs); setPCRange.operator()(render_mode_e::Beauty); - setPCRange.operator()(render_mode_e::DebugIDs); + setPCRange.operator()(render_mode_e::Debug); for (uint8_t t=0; tcreatePipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 5868388fa..b00073e9a 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -59,10 +59,13 @@ bool CSession::init(video::IGPUCommandBuffer* cb) { IGPUBuffer::SCreationParams params = {}; params.size = sizeof(m_params.uniforms); - params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_UNIFORM_BUFFER_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_INLINE_UPDATE_VIA_CMDBUF; + using usage_flags_e = IGPUBuffer::E_USAGE_FLAGS; + params.usage = usage_flags_e::EUF_UNIFORM_BUFFER_BIT |usage_flags_e::EUF_TRANSFER_DST_BIT | usage_flags_e::EUF_INLINE_UPDATE_VIA_CMDBUF; auto ubo = device->createBuffer(std::move(params)); if (!dedicatedAllocate(ubo.get(),"Sensor UBO")) return false; + // pipeline barrier in `reset` will take care of sync for this + cb->updateBuffer({.buffer=ubo},&m_params.uniforms); addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); } From adcf8603188b9a1ee6e6a4e0d91ca799f8a74661 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 20 Jan 2026 02:31:33 +0100 Subject: [PATCH 181/219] the training wheels come off, lets make a path tracer! --- 40_PathTracer/CMakeLists.txt | 66 -- .../app_resources/pathtrace/beauty.hlsl | 14 +- .../app_resources/pathtrace/debug.hlsl | 14 +- .../app_resources/pathtrace/previs.hlsl | 13 +- 40_PathTracer/include/renderer/CRenderer.h | 22 +- 40_PathTracer/include/renderer/CSession.h | 1 - 40_PathTracer/main.cpp | 830 +----------------- 40_PathTracer/src/renderer/CRenderer.cpp | 68 +- 40_PathTracer/src/renderer/CSession.cpp | 2 +- 9 files changed, 134 insertions(+), 896 deletions(-) diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 6df276c2f..fb9c597f8 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -25,24 +25,6 @@ list(APPEND NBL_ nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "${}" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -# TODO: why not use GLOB_RECURSE from above ? -set(DEPENDS - app_resources/common.hlsl - app_resources/light_directional.rcall.hlsl - app_resources/light_point.rcall.hlsl - app_resources/light_spot.rcall.hlsl - app_resources/present.frag.hlsl - app_resources/raytrace.rahit.hlsl - app_resources/raytrace.rchit.hlsl - app_resources/raytrace.rgen.hlsl - app_resources/raytrace.rint.hlsl - app_resources/raytrace.rmiss.hlsl - app_resources/raytrace_procedural.rchit.hlsl - app_resources/raytrace_shadow.rahit.hlsl - app_resources/raytrace_shadow.rmiss.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -62,54 +44,6 @@ set(JSON [=[ { "INPUT": "app_resources/present/default.hlsl", "KEY": "present_default", - }, - { - "INPUT": "app_resources/raytrace.rgen.hlsl", - "KEY": "raytrace_rgen", - }, - { - "INPUT": "app_resources/raytrace.rchit.hlsl", - "KEY": "raytrace_rchit", - }, - { - "INPUT": "app_resources/raytrace_procedural.rchit.hlsl", - "KEY": "raytrace_procedural_rchit", - }, - { - "INPUT": "app_resources/raytrace.rint.hlsl", - "KEY": "raytrace_rint", - }, - { - "INPUT": "app_resources/raytrace.rahit.hlsl", - "KEY": "raytrace_rahit", - }, - { - "INPUT": "app_resources/raytrace_shadow.rahit.hlsl", - "KEY": "raytrace_shadow_rahit", - }, - { - "INPUT": "app_resources/raytrace.rmiss.hlsl", - "KEY": "raytrace_rmiss", - }, - { - "INPUT": "app_resources/raytrace_shadow.rmiss.hlsl", - "KEY": "raytrace_shadow_rmiss", - }, - { - "INPUT": "app_resources/light_directional.rcall.hlsl", - "KEY": "light_directional_rcall", - }, - { - "INPUT": "app_resources/light_point.rcall.hlsl", - "KEY": "light_point_rcall", - }, - { - "INPUT": "app_resources/light_spot.rcall.hlsl", - "KEY": "light_spot_rcall", - }, - { - "INPUT": "app_resources/present.frag.hlsl", - "KEY": "present_frag", } ] ]=]) diff --git a/40_PathTracer/app_resources/pathtrace/beauty.hlsl b/40_PathTracer/app_resources/pathtrace/beauty.hlsl index 4cab1d768..2027e6d4d 100644 --- a/40_PathTracer/app_resources/pathtrace/beauty.hlsl +++ b/40_PathTracer/app_resources/pathtrace/beauty.hlsl @@ -4,11 +4,23 @@ using namespace nbl::this_example; [[vk::push_constant]] SBeautyPushConstants pc; + +struct[raypayload] BeautyPayload +{ + uint32_t instanceID : read(caller):write(closesthit); +// float16_t3 normal : read(caller):write(closesthit); +}; + [shader("raygeneration")] -void pathtrace_beauty() +void raygen() { const uint32_t3 launchID = spirv::LaunchIdKHR; const uint32_t3 launchSize = spirv::LaunchSizeKHR; gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); } + +[shader("miss")] +void miss(inout BeautyPayload payload) +{ +} \ No newline at end of file diff --git a/40_PathTracer/app_resources/pathtrace/debug.hlsl b/40_PathTracer/app_resources/pathtrace/debug.hlsl index 08cd38237..25e9d2664 100644 --- a/40_PathTracer/app_resources/pathtrace/debug.hlsl +++ b/40_PathTracer/app_resources/pathtrace/debug.hlsl @@ -4,11 +4,23 @@ using namespace nbl::this_example; [[vk::push_constant]] SDebugPushConstants pc; + +struct[raypayload] DebugPayload +{ + uint32_t instanceID : read(caller):write(closesthit); + uint32_t primitiveID : read(caller):write(closesthit); +}; + [shader("raygeneration")] -void pathtrace_debug() +void raygen() { const uint32_t3 launchID = spirv::LaunchIdKHR; const uint32_t3 launchSize = spirv::LaunchSizeKHR; gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); } + +[shader("miss")] +void miss(inout DebugPayload payload) +{ +} \ No newline at end of file diff --git a/40_PathTracer/app_resources/pathtrace/previs.hlsl b/40_PathTracer/app_resources/pathtrace/previs.hlsl index 55245a2d3..035088068 100644 --- a/40_PathTracer/app_resources/pathtrace/previs.hlsl +++ b/40_PathTracer/app_resources/pathtrace/previs.hlsl @@ -4,11 +4,22 @@ using namespace nbl::this_example; [[vk::push_constant]] SPrevisPushConstants pc; + +struct[raypayload] PrevisPayload +{ + uint16_t materialID : read(caller):write(closesthit); +}; + [shader("raygeneration")] -void pathtrace_previs() +void raygen() { const uint32_t3 launchID = spirv::LaunchIdKHR; const uint32_t3 launchSize = spirv::LaunchSizeKHR; gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); } + +[shader("miss")] +void miss(inout PrevisPayload payload) +{ +} diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 15e430191..57d81446c 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -10,11 +10,8 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include -#include -#include - #include "renderer/shaders/pathtrace/push_constants.hlsl" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" namespace nbl::this_example @@ -85,6 +82,7 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl struct SCreationParams : SCachedCreationParams { system::path sampleSequenceCache; + asset::IAssetManager* assMan; }; static core::smart_refctd_ptr create(SCreationParams&& _params); @@ -100,6 +98,7 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl struct SCachedConstructionParams { constexpr static inline uint8_t FramesInFlight = 3; + core::smart_refctd_ptr semaphore; // per pipeline UBO for other pipelines core::smart_refctd_ptr uboDSLayout; @@ -117,6 +116,15 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + // + template + static inline core::smart_refctd_ptr loadPrecompiledShader( + asset::IAssetManager* assMan, video::ILogicalDevice* device, system::logger_opt_ptr logger={} + ) + { + return loadPrecompiledShader_impl(assMan,builtin::build::get_spirv_key(device),logger); + } + protected: struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams { @@ -169,11 +177,15 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl core::smart_refctd_ptr regularPresent; // TODO core::smart_refctd_ptr cubemapPresent; // TODO }; - inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} + inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), + m_frameIx(m_construction.semaphore->getCounterValue()) {} virtual inline ~CRenderer() {} + static core::smart_refctd_ptr loadPrecompiledShader_impl(asset::IAssetManager* assMan, const core::string& key, system::logger_opt_ptr logger); + SCachedCreationParams m_creation; SCachedConstructionParams m_construction; + uint64_t m_frameIx; }; } diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 0ec27296e..b02e0dfa8 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -79,7 +79,6 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; // stores all the sensor data required core::smart_refctd_ptr ds = {}; - // }; SImmutables immutables = {}; SSensorDynamics prevSensorState = {}; diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 32034efd2..4a03b9371 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -14,7 +14,6 @@ // TODO remove #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "common.hpp" #include "nbl/builtin/hlsl/indirect_commands.hlsl" @@ -161,12 +160,16 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }}); // - m_renderer = CRenderer::create({{ - .graphicsQueue = getGraphicsQueue(), - .computeQueue = getComputeQueue(), - .uploadQueue = getTransferUpQueue(), - .utilities = smart_refctd_ptr(m_utils) - }}); + m_renderer = CRenderer::create({ + { + .graphicsQueue = getGraphicsQueue(), + .computeQueue = getComputeQueue(), + .uploadQueue = getTransferUpQueue(), + .utilities = smart_refctd_ptr(m_utils) + }, + "TODO Sample sequence cache", + m_assetMgr.get() + }); // TODO: tmp code auto scene_daily_pt = m_renderer->createScene({ @@ -200,48 +203,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes scene_daily_pt = nullptr; - // Load Custom Shader - auto loadPrecompiledShader = [&]() -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "app_resources"; // virtual root - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto shader = IAsset::castDown(assets[0]); - if (!shader) - { - m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); - return nullptr; - } - return shader; - }; - - // load shaders - const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" - const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" - const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" - const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" - const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" - const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" - const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" - const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" - const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" - const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" - const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" - const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - auto gQueue = getGraphicsQueue(); // Create renderpass and init surface nbl::video::IGPURenderpass* renderpass; @@ -281,169 +243,18 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes if (!renderpass) return logFail("Failed to create Renderpass!"); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + if (!m_surface || !m_surface->init(getGraphicsQueue(), std::move(scResources), swapchainParams.sharedParams)) return logFail("Could not create Window & Surface or initialize the Surface!"); } - auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - for (auto i = 0u; i < MaxFramesInFlight; i++) - { - if (!pool) - return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) - return logFail("Couldn't create Command Buffer!"); - } m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); m_surface->recreateSwapchain(); - // create output images - m_hdrImage = m_device->createImage({ - { - .type = IGPUImage::ET_2D, - .samples = ICPUImage::ESCF_1_BIT, - .format = EF_R16G16B16A16_SFLOAT, - .extent = {WIN_W, WIN_H, 1}, - .mipLevels = 1, - .arrayLayers = 1, - .flags = IImage::ECF_NONE, - .usage = bitflag(IImage::EUF_STORAGE_BIT) | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_SAMPLED_BIT - } - }); - - if (!m_hdrImage || !m_device->allocate(m_hdrImage->getMemoryReqs(), m_hdrImage.get()).isValid()) - return logFail("Could not create HDR Image"); - - m_hdrImageView = m_device->createImageView({ - .flags = IGPUImageView::ECF_NONE, - .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT | IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT, - .image = m_hdrImage, - .viewType = IGPUImageView::E_TYPE::ET_2D, - .format = asset::EF_R16G16B16A16_SFLOAT - }); - - - - // ray trace pipeline and descriptor set layout setup - { - const auto bindings = std::array{ - ICPUDescriptorSetLayout::SBinding{ - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, - } - }; - auto cpuDescriptorSetLayout = core::make_smart_refctd_ptr(bindings); - - const SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, - .offset = 0u, - .size = sizeof(SPushConstants), - }; - const auto cpuPipelineLayout = core::make_smart_refctd_ptr(std::span({ pcRange }), std::move(cpuDescriptorSetLayout), nullptr, nullptr, nullptr); - - const auto pipeline = ICPURayTracingPipeline::create(cpuPipelineLayout.get()); - pipeline->getCachedCreationParams() = { - .flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS, - .maxRecursionDepth = 1, - .dynamicStackSize = true, - }; - - pipeline->getSpecInfos(ESS_RAYGEN)[0] = { - .shader = raygenShader, - .entryPoint = "main", - }; - - pipeline->getSpecInfoVector(ESS_MISS)->resize(EMT_COUNT); - const auto missGroups = pipeline->getSpecInfos(ESS_MISS); - missGroups[EMT_PRIMARY] = { .shader = missShader, .entryPoint = "main" }; - missGroups[EMT_OCCLUSION] = { .shader = missShadowShader, .entryPoint = "main" }; - - auto getHitGroupIndex = [](E_GEOM_TYPE geomType, E_RAY_TYPE rayType) - { - return geomType * ERT_COUNT + rayType; - }; - - const auto hitGroupCount = ERT_COUNT * EGT_COUNT; - pipeline->getSpecInfoVector(ESS_CLOSEST_HIT)->resize(hitGroupCount); - pipeline->getSpecInfoVector(ESS_ANY_HIT)->resize(hitGroupCount); - pipeline->getSpecInfoVector(ESS_INTERSECTION)->resize(hitGroupCount); - - const auto closestHitSpecs = pipeline->getSpecInfos(ESS_CLOSEST_HIT); - const auto anyHitSpecs = pipeline->getSpecInfos(ESS_ANY_HIT); - const auto intersectionSpecs = pipeline->getSpecInfos(ESS_INTERSECTION); - - closestHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = closestHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = {.shader = anyHitShaderColorPayload, .entryPoint = "main"}; - - anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; - - closestHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = proceduralClosestHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; - intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = intersectionHitShader, .entryPoint = "main" }; - - anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = {.shader = anyHitShaderShadowPayload, .entryPoint = "main" }; - intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = intersectionHitShader, .entryPoint = "main" }; - - pipeline->getSpecInfoVector(ESS_CALLABLE)->resize(ELT_COUNT); - const auto callableGroups = pipeline->getSpecInfos(ESS_CALLABLE); - callableGroups[ELT_DIRECTIONAL] = { .shader = directionalLightCallShader, .entryPoint = "main" }; - callableGroups[ELT_POINT] = { .shader = pointLightCallShader, .entryPoint = "main" }; - callableGroups[ELT_SPOT] = { .shader = spotLightCallShader, .entryPoint = "main" }; - - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - CAssetConverter::SInputs inputs = {}; - inputs.logger = m_logger.get(); - - const std::array cpuPipelines = { pipeline.get() }; - std::get>(inputs.assets) = cpuPipelines; - - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - auto reservation = converter->reserve(inputs); - auto future = reservation.convert(params); - if (future.copy() != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - - // assign gpu objects to output - auto&& pipelines = reservation.getGPUObjects(); - m_rayTracingPipeline = pipelines[0].value; - const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; - - const std::array dsLayoutPtrs = { gpuDsLayout }; - m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); - m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(core::smart_refctd_ptr(gpuDsLayout)); - - calculateRayTracingStackSize(m_rayTracingPipeline); - - if (!createShaderBindingTable(m_rayTracingPipeline)) - return logFail("Could not create shader binding table"); - - } - - auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); - - if (!createIndirectBuffer()) - return logFail("Could not create indirect buffer"); - - if (!createAccelerationStructuresFromGeometry()) - return logFail("Could not create acceleration structures from geometry creator"); +#if 0 // presenter ISampler::SParams samplerParams = { .AnisotropicFilter = 0 }; @@ -628,24 +439,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes ImGui::End(); } ); - - // Set Camera - { - core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( - core::radians(60.0f), - WIN_W / WIN_H, - 0.01f, - 500.0f - ); - m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - } - +#endif m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); m_surface->recreateSwapchain(); m_winMgr->show(m_window.get()); - m_oracle.reportBeginFrameRecord(); - m_camera.mapKeysToWASD(); return true; } @@ -673,6 +470,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline void workLoopBody() override { +#if 0 // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); // We block for semaphores for 2 reasons here: @@ -901,30 +699,17 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } m_api->endCapture(); m_frameAccumulationCounter++; +#endif } inline void update() { - m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); - m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); - static std::chrono::microseconds previousEventTimestamp{}; m_inputSystem->getDefaultMouse(&m_mouse); m_inputSystem->getDefaultKeyboard(&m_keyboard); - auto updatePresentationTimestamp = [&]() - { - m_currentImageAcquire = m_surface->acquireNextImage(); - - m_oracle.reportEndFrameRecord(); - const auto timestamp = m_oracle.getNextPresentationTimeStamp(); - m_oracle.reportBeginFrameRecord(); - - return timestamp; - }; - - const auto nextPresentationTimestamp = updatePresentationTimestamp(); + m_currentImageAcquire = m_surface->acquireNextImage(); struct { @@ -932,14 +717,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes std::vector keyboard{}; } capturedEvents; - m_camera.beginInputProcessing(nextPresentationTimestamp); { const auto& io = ImGui::GetIO(); m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { - if (!io.WantCaptureMouse) - m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl - for (const auto& e : events) // here capture { if (e.timeStamp < previousEventTimestamp) @@ -953,9 +734,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { - if (!io.WantCaptureKeyboard) - m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl - for (const auto& e : events) // here capture { if (e.timeStamp < previousEventTimestamp) @@ -967,7 +745,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }, m_logger.get()); } - m_camera.endInputProcessing(nextPresentationTimestamp); const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); @@ -999,573 +776,16 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } private: - uint32_t getWorkgroupCount(uint32_t dim, uint32_t size) - { - return (dim + size - 1) / size; - } - - bool createIndirectBuffer() - { - const auto getBufferRangeAddress = [](const SBufferRange& range) - { - return range.buffer->getDeviceAddress() + range.offset; - }; - const auto command = TraceRaysIndirectCommand_t{ - .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygenGroupRange), - .raygenShaderRecordSize = m_shaderBindingTable.raygenGroupRange.size, - .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.missGroupsRange), - .missShaderBindingTableSize = m_shaderBindingTable.missGroupsRange.size, - .missShaderBindingTableStride = m_shaderBindingTable.missGroupsStride, - .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hitGroupsRange), - .hitShaderBindingTableSize = m_shaderBindingTable.hitGroupsRange.size, - .hitShaderBindingTableStride = m_shaderBindingTable.hitGroupsStride, - .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callableGroupsRange), - .callableShaderBindingTableSize = m_shaderBindingTable.callableGroupsRange.size, - .callableShaderBindingTableStride = m_shaderBindingTable.callableGroupsStride, - .width = WIN_W, - .height = WIN_H, - .depth = 1, - }; - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = sizeof(TraceRaysIndirectCommand_t); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), &command).move_into(m_indirectBuffer); - return true; - } - - void calculateRayTracingStackSize(const smart_refctd_ptr& pipeline) - { - const auto raygenStackSize = pipeline->getRaygenStackSize(); - auto getMaxSize = [&](auto ranges, auto valProj) -> uint16_t - { - auto maxValue = 0; - for (const auto& val : ranges) - { - maxValue = std::max(maxValue, std::invoke(valProj, val)); - } - return maxValue; - }; - - const auto closestHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::closestHit); - const auto anyHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::anyHit); - const auto intersectionStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::intersection); - const auto missStackMax = getMaxSize(pipeline->getMissStackSizes(), std::identity{}); - const auto callableStackMax = getMaxSize(pipeline->getCallableStackSizes(), std::identity{}); - auto firstDepthStackSizeMax = std::max(closestHitStackMax, missStackMax); - firstDepthStackSizeMax = std::max(firstDepthStackSizeMax, intersectionStackMax + anyHitStackMax); - m_rayTracingStackSize = raygenStackSize + std::max(firstDepthStackSizeMax, callableStackMax); - } - - bool createShaderBindingTable(const smart_refctd_ptr& pipeline) - { - const auto& limits = m_device->getPhysicalDevice()->getLimits(); - const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; - const auto handleSizeAligned = nbl::core::alignUp(handleSize, limits.shaderGroupHandleAlignment); - - auto& raygenRange = m_shaderBindingTable.raygenGroupRange; - - auto& hitRange = m_shaderBindingTable.hitGroupsRange; - const auto hitHandles = pipeline->getHitHandles(); - - auto& missRange = m_shaderBindingTable.missGroupsRange; - const auto missHandles = pipeline->getMissHandles(); - - auto& callableRange = m_shaderBindingTable.callableGroupsRange; - const auto callableHandles = pipeline->getCallableHandles(); - - raygenRange = { - .offset = 0, - .size = core::alignUp(handleSizeAligned, limits.shaderGroupBaseAlignment) - }; - - missRange = { - .offset = raygenRange.size, - .size = core::alignUp(missHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), - }; - m_shaderBindingTable.missGroupsStride = handleSizeAligned; - - hitRange = { - .offset = missRange.offset + missRange.size, - .size = core::alignUp(hitHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), - }; - m_shaderBindingTable.hitGroupsStride = handleSizeAligned; - - callableRange = { - .offset = hitRange.offset + hitRange.size, - .size = core::alignUp(callableHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), - }; - m_shaderBindingTable.callableGroupsStride = handleSizeAligned; - - const auto bufferSize = raygenRange.size + missRange.size + hitRange.size + callableRange.size; - - ICPUBuffer::SCreationParams cpuBufferParams; - cpuBufferParams.size = bufferSize; - auto cpuBuffer = ICPUBuffer::create(std::move(cpuBufferParams)); - uint8_t* pData = reinterpret_cast(cpuBuffer->getPointer()); - - // copy raygen region - memcpy(pData, &pipeline->getRaygen(), handleSize); - - // copy miss region - uint8_t* pMissData = pData + missRange.offset; - for (const auto& handle : missHandles) - { - memcpy(pMissData, &handle, handleSize); - pMissData += m_shaderBindingTable.missGroupsStride; - } - - // copy hit region - uint8_t* pHitData = pData + hitRange.offset; - for (const auto& handle : hitHandles) - { - memcpy(pHitData, &handle, handleSize); - pHitData += m_shaderBindingTable.hitGroupsStride; - } - - // copy callable region - uint8_t* pCallableData = pData + callableRange.offset; - for (const auto& handle : callableHandles) - { - memcpy(pCallableData, &handle, handleSize); - pCallableData += m_shaderBindingTable.callableGroupsStride; - } - - { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; - params.size = bufferSize; - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(raygenRange.buffer); - missRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - hitRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - callableRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - } - - return true; - } - - bool createAccelerationStructuresFromGeometry() - { - auto queue = getGraphicsQueue(); - // get geometries into ICPUBuffers - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!pool) - return logFail("Couldn't create Command Pool for geometry creation!"); - - const auto defaultMaterial = Material{ - .ambient = {0.2, 0.1, 0.1}, - .diffuse = {0.8, 0.3, 0.3}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - .alpha = 1.0f, - }; - - auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) - { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); - return transform; - }; - - core::matrix3x4SIMD planeTransform; - planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); - - // triangles geometries - auto geometryCreator = make_smart_refctd_ptr(); - - const auto cpuObjects = std::array{ - scene::ReferenceObjectCpu { - .data = geometryCreator->createRectangle({10, 10}), - .material = defaultMaterial, - .transform = planeTransform, - }, - scene::ReferenceObjectCpu { - .data = geometryCreator->createCube({1, 1, 1}), - .material = defaultMaterial, - .transform = getTranslationMatrix(0, 0.5f, 0), - }, - scene::ReferenceObjectCpu { - .data = geometryCreator->createCube({1.5, 1.5, 1.5}), - .material = Material{ - .ambient = {0.1, 0.1, 0.2}, - .diffuse = {0.2, 0.2, 0.8}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - .alpha = 1.0f, - }, - .transform = getTranslationMatrix(-5.0f, 1.0f, 0), - }, - scene::ReferenceObjectCpu { - .data = geometryCreator->createCube({1.5, 1.5, 1.5}), - .material = Material{ - .ambient = {0.1, 0.2, 0.1}, - .diffuse = {0.2, 0.8, 0.2}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - .alpha = 0.2, - }, - .transform = getTranslationMatrix(5.0f, 1.0f, 0), - }, - }; - - // procedural geometries - using Aabb = IGPUBottomLevelAccelerationStructure::AABB_t; - - smart_refctd_ptr cpuProcBuffer; - { - ICPUBuffer::SCreationParams params; - params.size = NumberOfProceduralGeometries * sizeof(Aabb); - cpuProcBuffer = ICPUBuffer::create(std::move(params)); - } - - core::vector proceduralGeoms; - proceduralGeoms.reserve(NumberOfProceduralGeometries); - auto proceduralGeometries = reinterpret_cast(cpuProcBuffer->getPointer()); - for (int32_t i = 0; i < NumberOfProceduralGeometries; i++) - { - const auto middle_i = NumberOfProceduralGeometries / 2.0; - SProceduralGeomInfo sphere = { - .material = hlsl::_static_cast(Material{ - .ambient = {0.1, 0.05 * i, 0.1}, - .diffuse = {0.3, 0.2 * i, 0.3}, - .specular = {0.8, 0.8, 0.8}, - .shininess = 1.0f, - }), - .center = float32_t3((i - middle_i) * 4.0, 2, 5.0), - .radius = 1, - }; - - proceduralGeoms.push_back(sphere); - const auto sphereMin = sphere.center - sphere.radius; - const auto sphereMax = sphere.center + sphere.radius; - proceduralGeometries[i] = { - vector3d(sphereMin.x, sphereMin.y, sphereMin.z), - vector3d(sphereMax.x, sphereMax.y, sphereMax.z) - }; - } - - { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = proceduralGeoms.size() * sizeof(SProceduralGeomInfo); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), proceduralGeoms.data()).move_into(m_proceduralGeomInfoBuffer); - } - - // get ICPUBuffers into ICPUBLAS - // TODO use one BLAS and multiple triangles/aabbs in one - const auto blasCount = std::size(cpuObjects) + 1; - const auto proceduralBlasIdx = std::size(cpuObjects); - - std::array, std::size(cpuObjects)+1u> cpuBlasList; - for (uint32_t i = 0; i < blasCount; i++) - { - auto& blas = cpuBlasList[i]; - blas = make_smart_refctd_ptr(); - - if (i == proceduralBlasIdx) - { - auto aabbs = make_refctd_dynamic_array>>(1u); - auto primitiveCounts = make_refctd_dynamic_array>(1u); - - auto& aabb = aabbs->front(); - auto& primCount = primitiveCounts->front(); - - primCount = NumberOfProceduralGeometries; - aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; - aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); - aabb.geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; // only allow opaque for now - - blas->setGeometries(std::move(aabbs), std::move(primitiveCounts)); - } - else - { - auto triangles = make_refctd_dynamic_array>>(1u); - auto primitiveCounts = make_refctd_dynamic_array>(1u); - - auto& tri = triangles->front(); - - auto& primCount = primitiveCounts->front(); - primCount = cpuObjects[i].data->getPrimitiveCount(); - - tri = cpuObjects[i].data->exportForBLAS(); - tri.geometryFlags = cpuObjects[i].material.isTransparent() ? - IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::NO_DUPLICATE_ANY_HIT_INVOCATION_BIT : - IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; - - blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); - } - - auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; - if (i == proceduralBlasIdx) - blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT; - - blas->setBuildFlags(blasFlags); - blas->setContentHash(blas->computeContentHash()); - } - - auto geomInfoBuffer = ICPUBuffer::create({ std::size(cpuObjects) * sizeof(STriangleGeomInfo) }); - STriangleGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); - - // get ICPUBLAS into ICPUTLAS - auto geomInstances = make_refctd_dynamic_array>(blasCount); - { - uint32_t i = 0; - for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) - { - const auto isProceduralInstance = i == proceduralBlasIdx; - ICPUTopLevelAccelerationStructure::StaticInstance inst; - inst.base.blas = cpuBlasList[i]; - inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); - inst.base.instanceCustomIndex = i; - inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; - inst.base.mask = 0xFF; - inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; - - instance->instance = inst; - } - } - - auto cpuTlas = make_smart_refctd_ptr(); - cpuTlas->setInstances(std::move(geomInstances)); - cpuTlas->setBuildFlags(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); - - // convert with asset converter - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - struct MyInputs : CAssetConverter::SInputs - { - // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all - inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override - { - assert(memoryBacked); - return memoryBacked->getObjectType() != IDeviceMemoryBacked::EOT_BUFFER ? (~0u) : rebarMemoryTypes; - } - - uint32_t rebarMemoryTypes; - } inputs = {}; - inputs.logger = m_logger.get(); - inputs.rebarMemoryTypes = m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); - // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in - // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) - struct MyAllocator final : public IDeviceMemoryAllocator - { - ILogicalDevice* getDeviceForAllocations() const override { return device; } - - SAllocation allocate(const SAllocateInfo& info) override - { - auto retval = device->allocate(info); - // map what is mappable by default so ReBAR checks succeed - if (retval.isValid() && retval.memory->isMappable()) - retval.memory->map({ .offset = 0,.length = info.size }); - return retval; - } - - ILogicalDevice* device; - } myalloc; - myalloc.device = m_device.get(); - inputs.allocator = &myalloc; - - std::array tmpTlas; - std::array tmpBuffers; - std::array tmpGeometries; - std::array, std::size(cpuObjects)> tmpGeometryPatches; - { - tmpTlas[0] = cpuTlas.get(); - tmpBuffers[0] = cpuProcBuffer.get(); - for (uint32_t i = 0; i < cpuObjects.size(); i++) - { - tmpGeometries[i] = cpuObjects[i].data.get(); - tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; - } - - std::get>(inputs.assets) = tmpTlas; - std::get>(inputs.assets) = tmpBuffers; - std::get>(inputs.assets) = tmpGeometries; - std::get>(inputs.patches) = tmpGeometryPatches; - } - - auto reservation = converter->reserve(inputs); - { - auto prepass = [&](const auto & references) -> bool - { - auto objects = reservation.getGPUObjects(); - uint32_t counter = {}; - for (auto& object : objects) - { - auto gpu = object.value; - auto* reference = references[counter]; - - if (reference) - { - if (!gpu) - { - m_logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); - return false; - } - } - counter++; - } - return true; - }; - - prepass.template operator() < ICPUTopLevelAccelerationStructure > (tmpTlas); - prepass.template operator() < ICPUBuffer > (tmpBuffers); - prepass.template operator() < ICPUPolygonGeometry > (tmpGeometries); - } - - constexpr auto CompBufferCount = 2; - std::array, CompBufferCount> compBufs = {}; - std::array compBufInfos = {}; - { - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs); - compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - for (auto i = 0; i < CompBufferCount; i++) - compBufInfos[i].cmdbuf = compBufs[i].get(); - } - auto compSema = m_device->createSemaphore(0u); - SIntendedSubmitInfo compute = {}; - compute.queue = queue; - compute.scratchCommandBuffers = compBufInfos; - compute.scratchSemaphore = { - .semaphore = compSema.get(), - .value = 0u, - .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT | PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT - }; - // convert - { - smart_refctd_ptr scratchAlloc; - { - constexpr auto MaxAlignment = 256; - constexpr auto MinAllocationSize = 1024; - const auto scratchSize = core::alignUp(reservation.getMaxASBuildScratchSize(false), MaxAlignment); - - - IGPUBuffer::SCreationParams creationParams = {}; - creationParams.size = scratchSize; - creationParams.usage = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - auto scratchBuffer = m_device->createBuffer(std::move(creationParams)); - - auto reqs = scratchBuffer->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); - - auto allocation = m_device->allocate(reqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - allocation.memory->map({ .offset = 0,.length = reqs.size }); - - scratchAlloc = make_smart_refctd_ptr( - SBufferRange{0ull, scratchSize, std::move(scratchBuffer)}, - core::allocator(), MaxAlignment, MinAllocationSize - ); - } - - struct MyParams final : CAssetConverter::SConvertParams - { - inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override - { - return finalUser; - } - inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override - { - return finalUser; - } - - uint8_t finalUser; - } params = {}; - params.utilities = m_utils.get(); - params.compute = &compute; - params.scratchForDeviceASBuild = scratchAlloc.get(); - params.finalUser = queue->getFamilyIndex(); - - auto future = reservation.convert(params); - if (future.copy() != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! - if (compute.getFutureScratchSemaphore().value>3) - m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); - - // assign gpu objects to output - auto&& tlases = reservation.getGPUObjects(); - m_gpuTlas = tlases[0].value; - auto&& buffers = reservation.getGPUObjects(); - m_proceduralAabbBuffer = buffers[0].value; - - auto&& gpuPolygonGeometries = reservation.getGPUObjects(); - m_gpuPolygons.resize(gpuPolygonGeometries.size()); - - for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) - { - const auto& cpuObject = cpuObjects[i]; - const auto& gpuPolygon = gpuPolygonGeometries[i].value; - const auto gpuTriangles = gpuPolygon->exportForBLAS(); - - const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; - const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; - - const auto& normalView = gpuPolygon->getNormalView(); - const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; - auto normalType = NT_R32G32B32_SFLOAT; - if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) - normalType = NT_R8G8B8A8_SNORM; - - const auto& indexBufferBinding = gpuTriangles.indexData; - auto& geomInfo = geomInfos[i]; - geomInfo = { - .material = hlsl::_static_cast(cpuObject.material), - .vertexBufferAddress = vertexBufferAddress, - .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, - .normalBufferAddress = normalBufferAddress, - .normalType = normalType, - .indexType = gpuTriangles.indexType, - }; - - m_gpuPolygons[i] = gpuPolygon; - } - } - - { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = geomInfoBuffer->getSize(); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), geomInfos).move_into(m_triangleGeomInfoBuffer); - } - - return true; - } - smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; - smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; uint32_t m_frameAccumulationCounter = 0; - std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; core::smart_refctd_ptr m_inputSystem; InputSystem::ChannelReader m_mouse; InputSystem::ChannelReader m_keyboard; - struct CameraSetting - { - float fov = 60.f; - float zNear = 0.1f; - float zFar = 10000.f; - float moveSpeed = 1.f; - float rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - - } m_cameraSetting; - Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - - Light m_light = { - .direction = {-1.0f, -1.0f, -0.4f}, - .position = {10.0f, 15.0f, 8.0f}, - .outerCutoff = 0.866025404f, // {cos(radians(30.0f))}, - .type = ELT_DIRECTIONAL - }; - video::CDumbPresentationOracle m_oracle; struct C_UI @@ -1581,24 +801,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } m_ui; core::smart_refctd_ptr m_guiDescriptorSetPool; - core::vector m_gpuIntersectionSpheres; - uint32_t m_intersectionHitGroupIdx; - - core::vector> m_gpuPolygons; - smart_refctd_ptr m_gpuTlas; - smart_refctd_ptr m_instanceBuffer; - - smart_refctd_ptr m_triangleGeomInfoBuffer; - smart_refctd_ptr m_proceduralGeomInfoBuffer; - smart_refctd_ptr m_proceduralAabbBuffer; - smart_refctd_ptr m_indirectBuffer; - - smart_refctd_ptr m_hdrImage; - smart_refctd_ptr m_hdrImageView; - - smart_refctd_ptr m_rayTracingDsPool; - smart_refctd_ptr m_rayTracingDs; - smart_refctd_ptr m_rayTracingPipeline; uint64_t m_rayTracingStackSize; ShaderBindingTable m_shaderBindingTable; diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 5e1f9cc50..58f7d3387 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -6,9 +6,11 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "nbl/this_example/builtin/build/spirv/keys.hpp" - #include +#include +#include +#include + namespace nbl::this_example { @@ -17,6 +19,22 @@ using namespace nbl::asset; using namespace nbl::system; using namespace nbl::video; + +smart_refctd_ptr CRenderer::loadPrecompiledShader_impl(IAssetManager* assMan, const core::string& key, logger_opt_ptr logger) +{ + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = logger; + lp.workingDirectory = "app_resources"; // virtual root + auto assetBundle = assMan->getAsset(key,lp); + const auto assets = assetBundle.getContents(); + if (!assets.empty()) + if (auto shader = IAsset::castDown(*assets.begin()); shader) + return shader; + + logger.log("Failed to load precompiled shader %s", ILogger::ELL_ERROR, key.c_str()); + return nullptr; +} + // smart_refctd_ptr CRenderer::create(SCreationParams&& _params) { @@ -27,11 +45,14 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) // if (!params.logger.get()) params.logger = smart_refctd_ptr(params.utilities->getLogger()); - auto checkNullObject = [¶ms](auto& obj, const std::string_view debugName)->bool + logger_opt_ptr logger = params.logger.get().get(); + + // + auto checkNullObject = [¶ms,logger](auto& obj, const std::string_view debugName)->bool { if (!obj) { - params.logger.log("Failed to Create %s Object!",ILogger::ELL_ERROR,debugName.data()); + logger.log("Failed to Create %s Object!",ILogger::ELL_ERROR,debugName.data()); return true; } obj->setObjectDebugName(debugName.data()); @@ -42,9 +63,15 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) ILogicalDevice* device = params.utilities->getLogicalDevice(); // limits + // + params.semaphore = device->createSemaphore(0); + if (checkNullObject(params.semaphore,"CRenderer Semaphore")) + return nullptr; + // basic samplers const auto samplerDefaultRepeat = device->createSampler({}); + using render_mode_e = CSession::RenderMode; // create the layouts smart_refctd_ptr renderingLayouts[uint8_t(CSession::RenderMode::Count)]; { @@ -176,7 +203,6 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) } // but many push constant ranges - using render_mode_e = CSession::RenderMode; SPushConstantRange pcRanges[uint8_t(render_mode_e::Count)]; auto setPCRange = [&pcRanges](const render_mode_e mode)->void { @@ -196,9 +222,39 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) // create the pipelines { - // TODO + + IGPURayTracingPipeline::SCreationParams creationParams[uint8_t(render_mode_e::Count)] = {}; + using creation_flags_e = IGPURayTracingPipeline::SCreationParams::FLAGS; + auto flags = creation_flags_e::NO_NULL_MISS_SHADERS; + { + smart_refctd_ptr raygenShaders[uint8_t(render_mode_e::Count)] = {}; + raygenShaders[uint8_t(render_mode_e::Previs)] = loadPrecompiledShader<"pathtrace_previs">(_params.assMan,device,logger); + raygenShaders[uint8_t(render_mode_e::Beauty)] = loadPrecompiledShader<"pathtrace_beauty">(_params.assMan,device,logger); + raygenShaders[uint8_t(render_mode_e::Debug)] = loadPrecompiledShader<"pathtrace_debug">(_params.assMan,device,logger); + IGPURayTracingPipeline::SShaderSpecInfo missShaders[uint8_t(render_mode_e::Count)] = {}; + for (uint8_t m=0; mcreateRayTracingPipelines(nullptr,creationParams,params.renderingPipelines.data())) + { + logger.log("Failed to create Path Tracing Pipelines",ILogger::ELL_ERROR); + return nullptr; + } } +// TODO: move to CBasicPresenter // the renderpass: custom dependencies, but everything else fixed from outside (format, and number of subpasses) { // params.presentRenderpass = device->createRenderpass(); diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index b00073e9a..52f3465a9 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -65,7 +65,7 @@ bool CSession::init(video::IGPUCommandBuffer* cb) if (!dedicatedAllocate(ubo.get(),"Sensor UBO")) return false; // pipeline barrier in `reset` will take care of sync for this - cb->updateBuffer({.buffer=ubo},&m_params.uniforms); + cb->updateBuffer({.size=sizeof(m_params.uniforms),.buffer=ubo},&m_params.uniforms); addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); } From 3e39f036cda70bc7a8e4dccdfe99d59a60b0a263 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 21 Jan 2026 04:55:25 +0300 Subject: [PATCH 182/219] Projected Parallelogram sampling --- 73_SolidAngleVisualizer/CMakeLists.txt | 2 +- .../app_resources/hlsl/Drawing.hlsl | 19 +- .../app_resources/hlsl/RayVis.frag.hlsl | 273 ++++++--- .../app_resources/hlsl/Sampling.hlsl | 87 +-- .../hlsl/SolidAngleVis.frag.hlsl | 72 ++- .../hlsl/benchmark/benchmark.comp.hlsl | 70 ++- .../app_resources/hlsl/benchmark/common.hlsl | 14 +- .../app_resources/hlsl/common.hlsl | 42 +- .../app_resources/hlsl/gpu_common.hlsl | 5 +- .../hlsl/parallelogram_sampling.hlsl | 535 ++++++++++++++++++ .../app_resources/hlsl/silhouette.hlsl | 55 +- .../app_resources/hlsl/utils.hlsl | 7 + 73_SolidAngleVisualizer/main.cpp | 524 ++++++++--------- 13 files changed, 1215 insertions(+), 490 deletions(-) create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl diff --git a/73_SolidAngleVisualizer/CMakeLists.txt b/73_SolidAngleVisualizer/CMakeLists.txt index f1701829f..6438c8e06 100644 --- a/73_SolidAngleVisualizer/CMakeLists.txt +++ b/73_SolidAngleVisualizer/CMakeLists.txt @@ -43,9 +43,9 @@ if(NBL_BUILD_IMGUI) app_resources/hlsl/gpu_common.hlsl app_resources/hlsl/Drawing.hlsl app_resources/hlsl/Sampling.hlsl - app_resources/hlsl/Sampling.hlsl app_resources/hlsl/silhouette.hlsl app_resources/hlsl/utils.hlsl + app_resources/hlsl/parallelogram_sampling.hlsl # app_resources/hlsl/test.comp.hlsl app_resources/hlsl/benchmark/benchmark.comp.hlsl diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index 1a2962c78..fa2a93b45 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -4,16 +4,12 @@ #include "common.hlsl" #include "gpu_common.hlsl" -#if DEBUG_DATA // Check if a face on the hemisphere is visible from camera at origin bool isFaceVisible(float32_t3 faceCenter, float32_t3 faceNormal) { float32_t3 viewVec = normalize(-faceCenter); // Vector from camera to face return dot(faceNormal, viewVec) > 0.0f; } -#endif // DEBUG_DATA - -#if VISUALIZE_SAMPLES // doesn't change Z coordinate float32_t3 sphereToCircle(float32_t3 spherePoint) @@ -30,6 +26,8 @@ float32_t3 sphereToCircle(float32_t3 spherePoint) } } +#if VISUALIZE_SAMPLES + float32_t drawGreatCircleArc(float32_t3 fragPos, float32_t3 points[2], float32_t aaWidth, float32_t width = 0.01f) { float32_t3 v0 = normalize(points[0]); @@ -103,8 +101,8 @@ float32_t4 drawHiddenEdges(float32_t3x4 modelMatrix, float32_t3 spherePos, uint3 } float32_t3 pts[2] = {p0, p1}; - float32_t4 c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.005f); - color += float32_t4(hiddenEdgeColor * c.a, c.a); + float32_t c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.003f); + color += float32_t4(hiddenEdgeColor * c, c); } return color; @@ -128,7 +126,7 @@ float32_t4 drawCorner(float32_t3 cornerNDCPos, float32_t2 ndc, float32_t aaWidth // ------------------------------------------------- // inner black dot for hidden corners // ------------------------------------------------- - if (cornerNDCPos.z < 0.0f) + if (cornerNDCPos.z < 0.0f && innerDotSize > 0.0) { float32_t innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, innerDotSize + aaWidth, @@ -191,23 +189,22 @@ float32_t arrowHead(float32_t2 ndc, float32_t2 tip, float32_t2 direction, float3 } // Helper to draw an edge with proper color mapping -float32_t4 drawEdge(uint32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float32_t aaWidth, float32_t width = 0.01f) +float32_t4 drawEdge(uint32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float32_t aaWidth, float32_t width = 0.003f) { float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); }; -float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWidth) +float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWidth, float32_t dotSize) { float32_t4 color = float32_t4(0, 0, 0, 0); - float32_t dotSize = 0.02f; float32_t innerDotSize = dotSize * 0.5f; for (uint32_t i = 0; i < 8; i++) { float32_t3 cornerCirclePos = sphereToCircle(normalize(getVertex(modelMatrix, i))); - color += drawCorner(cornerCirclePos, ndc, aaWidth, dotSize, innerDotSize, colorLUT[i]); + color += drawCorner(cornerCirclePos, ndc, aaWidth, dotSize, 0.0, colorLUT[i]); } return color; diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl index 2b4d7e3ef..a8a1ff52d 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl @@ -7,9 +7,20 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; +// Visualizes a ray as an arrow from origin in NDC space +// Returns color (rgb), intensity (a), and depth (in extra component) +struct ArrowResult +{ + float32_t4 color : SV_Target0; + float32_t depth : SV_Depth; +}; + [[vk::push_constant]] struct PushConstantRayVis pc; +// #if DEBUG_DATA [[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; -#define VISUALIZE_SAMPLES 1 +// #endif + +#if VISUALIZE_SAMPLES #include "Drawing.hlsl" // Ray-AABB intersection in world space @@ -46,71 +57,101 @@ float32_t2 projectToNDC(float32_t3 worldPos, float32_t4x4 viewProj, float32_t as return clipPos.xy; } -// Visualizes a ray as an arrow from origin in NDC space -// Returns color (rgb), intensity (a), and depth (in extra component) -struct ArrowResult -{ - float32_t4 color : SV_Target0; - float32_t depth : SV_Depth; -}; - ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf, float32_t arrowLength, float32_t2 ndcPos, float32_t aspect) { ArrowResult result; result.color = float32_t4(0, 0, 0, 0); - result.depth = 0.0; + result.depth = 1.0; // Far plane in reversed-Z float32_t3 rayDir = normalize(directionAndPdf.xyz); float32_t pdf = directionAndPdf.w; - float32_t3 rayEnd = rayOrigin + rayDir * arrowLength; + // Define the 3D line segment + float32_t3 worldStart = rayOrigin; + float32_t3 worldEnd = rayOrigin + rayDir * arrowLength; + + // Transform to view space (camera space) for clipping + float32_t4x4 viewMatrix = pc.viewProjMatrix; // If you have view matrix separately, use that + // For now, we'll work in clip space and check w values + + float32_t4 clipStart = mul(pc.viewProjMatrix, float32_t4(worldStart, 1.0)); + float32_t4 clipEnd = mul(pc.viewProjMatrix, float32_t4(worldEnd, 1.0)); + + // Clip against near plane (w = 0 plane in clip space) + // If both points are behind camera, reject + if (clipStart.w <= 0.001 && clipEnd.w <= 0.001) + return result; + + // If line crosses the near plane, clip it + float32_t t0 = 0.0; + float32_t t1 = 1.0; + + if (clipStart.w <= 0.001) + { + // Start is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t0 = saturate(t); + clipStart = lerp(clipStart, clipEnd, t0); + worldStart = lerp(worldStart, worldEnd, t0); + } + + if (clipEnd.w <= 0.001) + { + // End is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t1 = saturate(t); + clipEnd = lerp(clipStart, clipEnd, t1); + worldEnd = lerp(worldStart, worldEnd, t1); + } + + // Now check if the clipped segment is valid + if (t0 >= t1) + return result; - // Project start and end points to NDC space - float32_t2 ndcStart = projectToNDC(rayOrigin, pc.viewProjMatrix, aspect); - float32_t2 ndcEnd = projectToNDC(rayEnd, pc.viewProjMatrix, aspect); + // Perspective divide to NDC + float32_t2 ndcStart = clipStart.xy / clipStart.w; + float32_t2 ndcEnd = clipEnd.xy / clipEnd.w; - // Get clip space positions - float32_t4 clipStart = mul(pc.viewProjMatrix, float32_t4(rayOrigin, 1.0)); - float32_t4 clipEnd = mul(pc.viewProjMatrix, float32_t4(rayEnd, 1.0)); + // Apply aspect ratio correction + ndcStart.x *= aspect; + ndcEnd.x *= aspect; - // Calculate arrow properties in NDC space - float32_t arrowNDCLength = length(ndcEnd - ndcStart); + // Calculate arrow direction in NDC + float32_t2 arrowVec = ndcEnd - ndcStart; + float32_t arrowNDCLength = length(arrowVec); - // Skip if arrow is too small on screen (in NDC units) - if (arrowNDCLength < 0.01) + // Skip if arrow is too small on screen + if (arrowNDCLength < 0.005) return result; - // Calculate the parametric position along the arrow shaft IN NDC - float32_t2 pa = ndcPos - ndcStart; - float32_t2 ba = ndcEnd - ndcStart; - float32_t t_ndc = saturate(dot(pa, ba) / dot(ba, ba)); + // Calculate perpendicular distance to line segment in NDC space + float32_t2 toPixel = ndcPos - ndcStart; + float32_t t_ndc = saturate(dot(toPixel, arrowVec) / dot(arrowVec, arrowVec)); // Draw line shaft float32_t lineThickness = 0.002; float32_t lineIntensity = lineSegment(ndcPos, ndcStart, ndcEnd, lineThickness); - // Calculate depth at this pixel's position along the arrow + // Calculate perspective-correct depth if (lineIntensity > 0.0) { - // Interpolate in CLIP space for perspective-correct depth + // Interpolate in clip space float32_t4 clipPos = lerp(clipStart, clipEnd, t_ndc); - float32_t depthNDC = clipPos.z / clipPos.w; - // Convert to reversed depth [0,1] -> [1,0] - result.depth = 1.0 - depthNDC; + // Compute NDC depth for reversed-Z + float32_t depthNDC = clipPos.z / clipPos.w; + result.depth = depthNDC; - // Clip against depth range (like hardware would) - // In reversed depth: near=1.0, far=0.0 + // Clip against valid depth range if (result.depth < 0.0 || result.depth > 1.0) { - lineIntensity = 0.0; // Outside depth range, clip it + lineIntensity = 0.0; } } // Modulate by PDF float32_t pdfIntensity = saturate(pdf * 0.5); - - float32_t3 finalColor = pdfIntensity; + float32_t3 finalColor = float32_t3(pdfIntensity, pdfIntensity, pdfIntensity); result.color = float32_t4(finalColor, lineIntensity); return result; @@ -141,81 +182,137 @@ float32_t3 worldToLocalDir(float32_t3 worldDir, float32_t3x4 modelMatrix) float32_t4x4 invModel = inverse(model4x4); return mul(invModel, float32_t4(worldDir, 0.0)).xyz; } + +// Returns both tMin (entry) and tMax (exit) for ray-AABB intersection +struct AABBIntersection +{ + float32_t tMin; // Distance to front face (entry point) + float32_t tMax; // Distance to back face (exit point) + bool hit; // Whether ray intersects the AABB at all +}; + +AABBIntersection rayAABBIntersectionFull(float32_t3 origin, float32_t3 dir, float32_t3 boxMin, float32_t3 boxMax) +{ + AABBIntersection result; + result.hit = false; + result.tMin = 0.0f; + result.tMax = 0.0f; + + float32_t3 invDir = 1.0f / dir; + float32_t3 t0 = (boxMin - origin) * invDir; + float32_t3 t1 = (boxMax - origin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + result.tMin = max(max(tmin.x, tmin.y), tmin.z); + result.tMax = min(min(tmax.x, tmax.y), tmax.z); + + // Ray intersects if tMax >= tMin and tMax > 0 + result.hit = (result.tMax >= result.tMin) && (result.tMax > 0.0f); + + // If we're inside the box, tMin will be negative + // In that case, we want to use tMax (exit point) + if (result.tMin < 0.0f) + result.tMin = 0.0f; + + return result; +} +#endif // VISUALIZE_SAMPLES + [[vk::location(0)]] ArrowResult main(SVertexAttributes vx) { ArrowResult output; +#if VISUALIZE_SAMPLES output.color = float32_t4(0.0, 0.0, 0.0, 0.0); - output.depth = 0.0; // Default to far plane in reversed depth - float32_t maxDepth = 0.0; // Track the closest depth (maximum in reversed depth) + output.depth = 0.0; // Far plane in reversed-Z (near=0, far=1) + float32_t maxDepth = 0.0; // Track closest depth (minimum in reversed-Z) + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); // Convert to NDC space with aspect ratio correction float32_t2 ndcPos = vx.uv * 2.0f - 1.0f; float32_t aspect = pc.viewport.z / pc.viewport.w; ndcPos.x *= aspect; - // Draw clipped silhouett vertices using drawCorners() for (uint32_t v = 0; v < DebugDataBuffer[0].clippedSilhouetteVertexCount; v++) { float32_t4 clipPos = mul(pc.viewProjMatrix, float32_t4(DebugDataBuffer[0].clippedSilhouetteVertices[v], 1.0)); - float32_t3 ndcPosVertex = clipPos.xyz / clipPos.w; // Perspective divide to get NDC - - float32_t4 intensity = drawCorner(ndcPosVertex, ndcPos, 0.005, 0.01, 0.01, float32_t3(1.0, 0.0, 0.0)); - - output.color += intensity; - output.depth = intensity > 0.0 ? 1.0 : output.depth; // Update depth - maxDepth = max(maxDepth, output.depth); - } - - int sampleCount = DebugDataBuffer[0].sampleCount; - - for (int i = 0; i < sampleCount; i++) - { - float32_t3 rayOrigin = float32_t3(0, 0, 0); - float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; - float32_t3 rayDir = normalize(directionAndPdf.xyz); + float32_t3 ndcPosVertex = clipPos.xyz / clipPos.w; + if (ndcPosVertex.z < maxDepth) + continue; - // Define cube bounds in local space (unit cube from -0.5 to 0.5, adjust as needed) - float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); - float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + float32_t4 intensity = drawCorner(ndcPosVertex, ndcPos, aaWidth, 0.03, 0.0, colorLUT[DebugDataBuffer[0].clippedSilhouetteVerticesIndices[v]]); - // Transform ray to local space of the cube - float32_t3 localRayOrigin = worldToLocal(rayOrigin, pc.modelMatrix); - float32_t3 localRayDir = normalize(worldToLocalDir(rayDir, pc.modelMatrix)); - - // Perform intersection test in local space - float32_t hitDistance = rayAABBIntersection(localRayOrigin, localRayDir, cubeLocalMin, cubeLocalMax); - - float32_t arrowLength; - if (hitDistance > 0.0) - { - // Calculate world space hit distance - // We need to account for the scaling in the model matrix - float32_t3 localHitPoint = localRayOrigin + localRayDir * hitDistance; - float32_t3 worldHitPoint = mul(pc.modelMatrix, float32_t4(localHitPoint, 1.0)).xyz; - arrowLength = length(worldHitPoint - rayOrigin); - } - else + // Update depth only where we drew something + if (any(intensity.rgb > 0.0)) { - // No intersection, use fallback (e.g., fixed length or distance to cube center) - float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; - arrowLength = length(cubeCenter - rayOrigin) + 2.0; + output.color.rgb += intensity.rgb; + maxDepth = max(maxDepth, 1.0f - ndcPosVertex.z); } - - ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); - maxDepth = max(maxDepth, arrow.depth); - - // Additive blending - output.color.rgb += hitDistance > 0.0 ? arrow.color.rgb : float32_t3(1.0, 0.0, 0.0); - output.color.a = max(output.color.a, arrow.color.a); } + uint32_t sampleCount = DebugDataBuffer[0].sampleCount; + + // for (uint32_t i = 0; i < sampleCount; i++) + // { + // float32_t3 rayOrigin = float32_t3(0, 0, 0); + // float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; + // float32_t3 rayDir = normalize(directionAndPdf.xyz); + + // // Define cube bounds in local space + // float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); + // float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + + // // Transform ray to local space of the cube + // float32_t3 localRayOrigin = worldToLocal(rayOrigin, pc.modelMatrix); + // float32_t3 localRayDir = normalize(worldToLocalDir(rayDir, pc.modelMatrix)); + + // // Get both entry and exit distances + // AABBIntersection intersection = rayAABBIntersectionFull( + // localRayOrigin, + // localRayDir, + // cubeLocalMin, + // cubeLocalMax); + + // float32_t arrowLength; + // float32_t3 arrowColor; + + // if (intersection.hit) + // { + // // Use tMax (exit point at back face) instead of tMin (entry point at front face) + // float32_t3 localExitPoint = localRayOrigin + localRayDir * intersection.tMax; + // float32_t3 worldExitPoint = mul(pc.modelMatrix, float32_t4(localExitPoint, 1.0)).xyz; + // arrowLength = length(worldExitPoint - rayOrigin); + // arrowColor = float32_t3(0.0, 1.0, 0.0); // Green for valid samples + // } + // else + // { + // // Ray doesn't intersect - THIS SHOULD NEVER HAPPEN with correct sampling! + // float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; + // arrowLength = length(cubeCenter - rayOrigin) + 2.0; + // arrowColor = float32_t3(1.0, 0.0, 0.0); // Red for BROKEN samples + // } + + // ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); + + // // Only update depth if arrow was actually drawn + // if (arrow.color.a > 0.0) + // { + // maxDepth = max(maxDepth, arrow.depth); + // } + + // // Modulate arrow color by its alpha (only add where arrow is visible) + // output.color.rgb += arrowColor * arrow.color.a; + // output.color.a = max(output.color.a, arrow.color.a); + // } + // Clamp to prevent overflow output.color = saturate(output.color); output.color.a = 1.0; - // Write the closest depth (maximum in reversed depth) - // ONLY write depth if we actually drew something - output.depth = output.color.a > 0.0 ? maxDepth : 0.0; + // Write the closest depth (minimum in reversed-Z) + output.depth = maxDepth; +#endif return output; -} \ No newline at end of file +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl index 9caf83246..cefa65267 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl @@ -2,17 +2,15 @@ #define _SAMPLING_HLSL_ // Include the spherical triangle utilities -#include +#include "gpu_common.hlsl" +#include "parallelogram_sampling.hlsl" #include #include #include -#include "nbl/builtin/hlsl/random/pcg.hlsl" -#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#include +#include using namespace nbl::hlsl; -// Sampling mode enum -#define SAMPLING_MODE_SOLID_ANGLE 0 -#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 // Maximum number of triangles we can have after clipping // Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles @@ -59,9 +57,9 @@ float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float n2 /= l2; // 3. Get arc lengths (angles in radians) - float32_t a = asin(clamp(l0, -1.0, 1.0)); // side v0-v1 - float32_t b = asin(clamp(l1, -1.0, 1.0)); // side v1-v2 - float32_t c = asin(clamp(l2, -1.0, 1.0)); // side v2-v0 + float32_t a = asin(clamp(l0, -1.0f, 1.0f)); // side v0-v1 + float32_t b = asin(clamp(l1, -1.0f, 1.0f)); // side v1-v2 + float32_t c = asin(clamp(l2, -1.0f, 1.0f)); // side v2-v0 // Handle acos/asin quadrant if dot product is negative if (dot(v0, v1) < 0) @@ -94,7 +92,7 @@ SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint3 const float32_t3 origin = float32_t3(0, 0, 0); // Compute face normal ONCE before the loop - silhouette is planar! - if (samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) { float32_t3 v1 = silhouette.vertices[1]; float32_t3 v2 = silhouette.vertices[2]; @@ -116,7 +114,7 @@ SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint3 // Calculate triangle solid angle float32_t solidAngle; - if (samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) { // scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) float32_t3 cos_vertices = clamp( @@ -141,7 +139,7 @@ SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint3 data.count++; } -#ifdef DEBUG_DATA +#if DEBUG_DATA // Validate no antipodal edges exist (would create spherical lune) for (uint32_t i = 0; i < silhouette.count; i++) { @@ -156,13 +154,6 @@ SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint3 } } DebugDataBuffer[0].maxTrianglesExceeded = (data.count > MAX_TRIANGLES); - - DebugDataBuffer[0].clippedSilhouetteVertexCount = silhouette.count; - for (uint32_t v = 0; v < silhouette.count; v++) - { - DebugDataBuffer[0].clippedSilhouetteVertices[v] = silhouette.vertices[v]; - } - DebugDataBuffer[0].triangleCount = data.count; DebugDataBuffer[0].totalSolidAngles = data.totalWeight; for (uint32_t tri = 0; tri < data.count; tri++) @@ -214,7 +205,7 @@ float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float float32_t3 v1 = silhouette.vertices[vertexIdx]; float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; - float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); + float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); float32_t3 origin = float32_t3(0, 0, 0); @@ -232,7 +223,7 @@ float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float float32_t3 direction; float32_t rcpPdf; - if (data.samplingMode == SAMPLING_MODE_PROJECTED_SOLID_ANGLE) + if (data.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) { sampling::ProjectedSphericalTriangle samplingTri = sampling::ProjectedSphericalTriangle::create(shapeTri); @@ -277,8 +268,12 @@ float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float #if VISUALIZE_SAMPLES -float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSilhouette silhouette, - uint32_t samplingMode, uint32_t frameIndex, SamplingData samplingData, uint32_t numSamples, inout RWStructuredBuffer DebugDataBuffer) +float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth, ClippedSilhouette silhouette, SAMPLING_MODE samplingMode, uint32_t frameIndex, SamplingData samplingData, uint32_t numSamples +#if DEBUG_DATA + , + inout RWStructuredBuffer DebugDataBuffer +#endif +) { float32_t4 accumColor = 0; @@ -289,27 +284,49 @@ float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSi float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); + ParallelogramSilhouette paraSilhouette = buildParallelogram(silhouette, ndc, spherePos, aaWidth, accumColor); + +#if DEBUG_DATA DebugDataBuffer[0].sampleCount = numSamples; +#endif for (uint32_t i = 0; i < numSamples; i++) { - nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(frameIndex * 65536u + i); - const uint32_t seed1 = seedGen(); - const uint32_t seed2 = seedGen(); - nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(seed1, seed2)); - float32_t2 xi = nextRandomUnorm2(rnd); - float32_t pdf; - uint32_t triIdx; - float32_t3 sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + // Hash the invocation to offset the grid + uint32_t offset = i * 747796405u + 2891336453u; + uint32_t idx = (offset) & 63u; // Keep within 64 samples + float32_t2 xi = float32_t2( + (float32_t(idx & 7u) + 0.5) / 8.0f, + (float32_t(idx >> 3u) + 0.5) / 8.0f); + float32_t pdf; + uint32_t index = 0; + float32_t3 sampleDir; + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, index); + } + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + bool valid; + sampleDir = sampleFromParallelogram(paraSilhouette, xi, pdf, valid); + if (!valid) + { + pdf = 0.0f; + sampleDir = float32_t3(0, 0, 1); + } + } +#if DEBUG_DATA DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); +#endif float32_t dist3D = distance(sampleDir, normalize(spherePos)); float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); if (alpha3D > 0.0f && !isInsidePSS) { - float32_t3 sampleColor = colorLUT[triIdx].rgb; + float32_t3 sampleColor = colorLUT[index].rgb; accumColor += float32_t4(sampleColor * alpha3D, alpha3D); } @@ -322,7 +339,7 @@ float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSi float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); if (alpha2D > 0.0f) { - float32_t3 sampleColor = colorLUT[triIdx].rgb; + float32_t3 sampleColor = colorLUT[index].rgb; accumColor += float32_t4(sampleColor * alpha2D, alpha2D); } } @@ -334,5 +351,5 @@ float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, ClippedSi return accumColor; } -#endif -#endif +#endif // VISUALIZE_SAMPLES +#endif // _SAMPLING_HLSL_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 79791af57..bd9312733 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -6,9 +6,9 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; +#if DEBUG_DATA [[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; // TODO: move below other includes - -#define VISUALIZE_SAMPLES 1 +#endif #include "utils.hlsl" #include "Drawing.hlsl" @@ -120,10 +120,25 @@ void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 s #else computeSilhouette(pc.modelMatrix, vertexCount, sil, silhouette); #endif - // Draw clipped silhouette vertices - // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); - SamplingData samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); + SamplingData samplingData; + ParallelogramSilhouette paraSilhouette; + if (pc.samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + pc.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); + } + else + { + + paraSilhouette = buildParallelogram(silhouette +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + } + #if VISUALIZE_SAMPLES // For debugging: Draw a small indicator of which faces are found @@ -131,27 +146,50 @@ void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 s // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); - // Draw samples on sphere - color += visualizeSamples(vx.uv, spherePos, silhouette, pc.samplingMode, pc.frameIndex, samplingData, 64, DebugDataBuffer); - + // Draw clipped silhouette vertices + // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); - color += drawCorners(pc.modelMatrix, ndc, aaWidth); + // color += drawCorners(pc.modelMatrix, ndc, aaWidth, 0.05f); color += drawRing(ndc, aaWidth); - if (all(vx.uv >= float32_t2(0.49f, 0.49f)) && all(vx.uv <= float32_t2(0.51f, 0.51f))) + // Draw samples on sphere + color += visualizeSamples(vx.uv, spherePos, ndc, aaWidth, silhouette, pc.samplingMode, pc.frameIndex, samplingData, pc.sampleCount +#if DEBUG_DATA + , + DebugDataBuffer +#endif + ); + + if (all(vx.uv >= float32_t2(0.f, 0.97f)) && all(vx.uv <= float32_t2(0.03f, 1.0f))) { return float32_t4(colorLUT[configIndex], 1.0f); } #else - nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(65536u + i); - const uint32_t2 seeds = uint32_t2(seedGen(), seedGen()); - nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(seeds); - float32_t2 xi = nextRandomUnorm2(rnd); + // Hash the invocation to offset the grid + uint32_t offset = 747796405u + 2891336453u; + uint32_t idx = (offset) & 63u; // Keep within 64 samples + float32_t2 xi = float32_t2( + (float32_t(idx & 7u) + 0.5) / 8.0f, + (float32_t(idx >> 3u) + 0.5) / 8.0f); float32_t pdf; - uint32_t triIdx; - float32_t3 sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, triIdx); - + uint32_t index = 0; + float32_t3 sampleDir; + if (pc.samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + pc.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, index); + } + else if (pc.samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + bool valid; + sampleDir = sampleFromParallelogram(paraSilhouette, xi, pdf, valid); + if (!valid) + { + pdf = 0.0f; + sampleDir = float32_t3(0, 0, 1); + } + } color += float4(sampleDir * 0.02f / pdf, 1.0f); #endif // VISUALIZE_SAMPLES setDebugData(sil, region, configIndex); diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl index 6d04538a5..0ea7c2afb 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl @@ -2,10 +2,30 @@ //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h #pragma shader_stage(compute) -#define DEBUG_DATA 0 + +#include "app_resources/hlsl/common.hlsl" +// doesn't change Z coordinate +float32_t3 sphereToCircle(float32_t3 spherePoint) +{ + if (spherePoint.z >= 0.0f) + { + return float32_t3(spherePoint.xy, spherePoint.z); + } + else + { + float32_t r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float32_t uv2Plus1 = r2 + 1.0f; + return float32_t3((spherePoint.xy * uv2Plus1 / 2.0f), spherePoint.z); + } +} + +#undef DEBUG_DATA // Avoid conflict with DebugDataBuffer in this file +#undef VISUALIZE_SAMPLES + #include "app_resources/hlsl/benchmark/common.hlsl" #include "app_resources/hlsl/silhouette.hlsl" #include "app_resources/hlsl/Sampling.hlsl" +#include "app_resources/hlsl/parallelogram_sampling.hlsl" using namespace nbl::hlsl; @@ -14,30 +34,50 @@ using namespace nbl::hlsl; [numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] [shader("compute")] void - main(uint3 invocationID : SV_DispatchThreadID) + main(uint32_t3 invocationID : SV_DispatchThreadID) { + // Perturb model matrix slightly per sample group + float32_t3x4 perturbedMatrix = pc.modelMatrix; + perturbedMatrix[0][3] += float32_t(invocationID.x) * 1e-6f; + uint32_t3 region; uint32_t configIndex; uint32_t vertexCount; - uint32_t sil = computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); + uint32_t sil = computeRegionAndConfig(perturbedMatrix, region, configIndex, vertexCount); ClippedSilhouette silhouette; - computeSilhouette(pc.modelMatrix, vertexCount, sil, silhouette); - - SamplingData samplingData; - samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); - - nbl::hlsl::random::PCG32 seedGen = nbl::hlsl::random::PCG32::construct(65536u + invocationID.x); - const uint32_t2 seeds = uint32_t2(seedGen(), seedGen()); - + computeSilhouette(perturbedMatrix, vertexCount, sil, silhouette); float32_t pdf; uint32_t triIdx; float32_t3 sampleDir = float32_t3(0.0, 0.0, 0.0); - for (uint32_t i = 0; i < 64; i++) + if (pc.benchmarkMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + pc.benchmarkMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) { - nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(seeds); - float32_t2 xi = nextRandomUnorm2(rnd); - sampleDir += sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + SamplingData samplingData; + samplingData = buildSamplingDataFromSilhouette(silhouette, pc.benchmarkMode); + + for (uint32_t i = 0; i < 64; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + } + } + else if (pc.benchmarkMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + // Precompute parallelogram for sampling + ParallelogramSilhouette paraSilhouette = buildParallelogram(silhouette); + for (uint32_t i = 0; i < 64; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + bool valid; + sampleDir += sampleFromParallelogram(paraSilhouette, xi, pdf, valid); + } } const uint32_t offset = sizeof(uint32_t) * invocationID.x; diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl index d54ee8a36..3091bc793 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl @@ -7,17 +7,5 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 64u; NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; -NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1920u * 1080u / BENCHMARK_WORKGROUP_DIMENSION_SIZE_X; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1000000u; -enum SAMPLING_BENCHMARK_MODE -{ - TRIANGLE_SOLID_ANGLE, - TRIANGLE_PROJECTED_SOLID_ANGLE, -}; - -struct BenchmarkPushConstants -{ - float32_t3x4 modelMatrix; - uint32_t samplingMode; - SAMPLING_BENCHMARK_MODE benchmarkMode; -}; \ No newline at end of file diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index db2f328b5..9e4954ebc 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -1,21 +1,40 @@ #ifndef _SOLID_ANGLE_VIS_COMMON_HLSL_ #define _SOLID_ANGLE_VIS_COMMON_HLSL_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -// Sampling mode enum -#define SAMPLING_MODE_SOLID_ANGLE 0 -#define SAMPLING_MODE_PROJECTED_SOLID_ANGLE 1 +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#define DEBUG_DATA 01 +#define VISUALIZE_SAMPLES 01 -#define DEBUG_DATA 1 #define FAST 1 namespace nbl { namespace hlsl { + // Sampling mode enum + enum SAMPLING_MODE : uint32_t + { + TRIANGLE_SOLID_ANGLE, + TRIANGLE_PROJECTED_SOLID_ANGLE, + PROJECTED_PARALLELOGRAM_SOLID_ANGLE + }; struct ResultData { + uint32_t parallelogramDoesNotBound; + float32_t parallelogramArea; + uint32_t failedVertexIndex; + uint32_t edgeIsConvex[4]; + + uint32_t parallelogramVerticesInside; + uint32_t parallelogramEdgesInside; + uint32_t failedEdgeIndex; + float32_t2 failedVertexUV; + float32_t3 failedPoint; + uint32_t failedEdgeSample; + float32_t2 failedEdgeUV; + float32_t2 parallelogramCorners[4]; + uint32_t3 region; uint32_t silhouetteIndex; @@ -38,11 +57,14 @@ namespace nbl uint32_t clippedSilhouetteVertexCount; float32_t3 clippedSilhouetteVertices[7]; + uint32_t clippedSilhouetteVerticesIndices[7]; uint32_t triangleCount; float32_t solidAngles[5]; float32_t totalSolidAngles; + uint32_t sampleOutsideSilhouette; + // Sampling ray visualization data uint32_t sampleCount; float32_t4 rayData[64]; // xyz = direction, w = PDF @@ -52,18 +74,26 @@ namespace nbl { float32_t3x4 modelMatrix; float32_t4 viewport; - uint32_t samplingMode; + SAMPLING_MODE samplingMode; + uint32_t sampleCount; uint32_t frameIndex; }; struct PushConstantRayVis { float32_t4x4 viewProjMatrix; + float32_t3x4 viewMatrix; float32_t3x4 modelMatrix; float32_t4 viewport; uint32_t frameIndex; }; + struct BenchmarkPushConstants + { + float32_t3x4 modelMatrix; + SAMPLING_MODE benchmarkMode; + }; + static const float32_t3 colorLUT[27] = { float32_t3(0, 0, 0), float32_t3(0.5, 0.5, 0.5), float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl index d4ef71d07..040883956 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl @@ -1,7 +1,8 @@ #ifndef GPU_COMMON_HLSL #define GPU_COMMON_HLSL -static const float32_t CIRCLE_RADIUS = 0.5f; +static const float32_t CIRCLE_RADIUS = 1.0f; +static const float32_t INV_CIRCLE_RADIUS = 1.0f / CIRCLE_RADIUS; // --- Geometry Utils --- struct ClippedSilhouette @@ -136,7 +137,7 @@ bool getVertexZNeg(float32_t3x4 modelMatrix, uint32_t vertexIdx) (vertexIdx & 2) ? 0.5f : -0.5f, (vertexIdx & 4) ? 0.5f : -0.5f); - float32_t transformedZ = dot(modelMatrix[2].xyz, localPos) + modelMatrix[2].w; + float32_t transformedZ = nbl::hlsl::dot(modelMatrix[2].xyz, localPos) + modelMatrix[2].w; return transformedZ < 0.0f; #else return corners[vertexIdx].z < 0.0f; diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl new file mode 100644 index 000000000..ea9bebcb3 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl @@ -0,0 +1,535 @@ +#ifndef _PARALLELOGRAM_SAMPLING_HLSL_ +#define _PARALLELOGRAM_SAMPLING_HLSL_ + +#include +#include + +#define MAX_SILHOUETTE_VERTICES 7 +#define MAX_CURVE_APEXES 2 +#define GET_PROJ_VERT(i) vertices[i].xy *CIRCLE_RADIUS + +// ============================================================================ +// Core structures +// ============================================================================ + +struct Parallelogram +{ + float16_t2 corner; + float16_t2 axisDir; + float16_t width; + float16_t height; +}; + +struct PrecomputedSilhouette +{ + float16_t3 edgeNormals[MAX_SILHOUETTE_VERTICES]; // 10.5 floats instead of 21 + uint32_t count; +}; + +struct ParallelogramSilhouette +{ + Parallelogram para; + PrecomputedSilhouette silhouette; +}; + +// ============================================================================ +// Silhouette helpers +// ============================================================================ + +PrecomputedSilhouette precomputeSilhouette(NBL_CONST_REF_ARG(ClippedSilhouette) sil) +{ + PrecomputedSilhouette result; + result.count = sil.count; + + float32_t3 v0 = sil.vertices[0]; + float32_t3 v1 = sil.vertices[1]; + float32_t3 v2 = sil.vertices[2]; + + result.edgeNormals[0] = float16_t3(cross(v0, v1)); + result.edgeNormals[1] = float16_t3(cross(v1, v2)); + + if (sil.count > 3) + { + float32_t3 v3 = sil.vertices[3]; + result.edgeNormals[2] = float16_t3(cross(v2, v3)); + + if (sil.count > 4) + { + float32_t3 v4 = sil.vertices[4]; + result.edgeNormals[3] = float16_t3(cross(v3, v4)); + + if (sil.count > 5) + { + float32_t3 v5 = sil.vertices[5]; + result.edgeNormals[4] = float16_t3(cross(v4, v5)); + + if (sil.count > 6) + { + float32_t3 v6 = sil.vertices[6]; + result.edgeNormals[5] = float16_t3(cross(v5, v6)); + result.edgeNormals[6] = float16_t3(cross(v6, v0)); + } + else + { + result.edgeNormals[5] = float16_t3(cross(v5, v0)); + result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); + } + } + else + { + result.edgeNormals[4] = float16_t3(cross(v4, v0)); + result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); + } + } + else + { + result.edgeNormals[3] = float16_t3(cross(v3, v0)); + result.edgeNormals[4] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); + } + } + else + { + result.edgeNormals[2] = float16_t3(cross(v2, v0)); + result.edgeNormals[3] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[4] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); + result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); + } + + return result; +} + +bool isInsideSilhouetteFast(float32_t3 dir, NBL_CONST_REF_ARG(PrecomputedSilhouette) sil) +{ + float16_t3 d = float16_t3(dir); + half maxDot = dot(d, sil.edgeNormals[0]); + maxDot = max(maxDot, dot(d, sil.edgeNormals[1])); + maxDot = max(maxDot, dot(d, sil.edgeNormals[2])); + maxDot = max(maxDot, dot(d, sil.edgeNormals[3])); + maxDot = max(maxDot, dot(d, sil.edgeNormals[4])); + maxDot = max(maxDot, dot(d, sil.edgeNormals[5])); + maxDot = max(maxDot, dot(d, sil.edgeNormals[6])); + return maxDot <= half(0.0f); +} +float32_t3 circleToSphere(float32_t2 circlePoint) +{ + float32_t2 xy = circlePoint / CIRCLE_RADIUS; + float32_t xy_len_sq = dot(xy, xy); + + // if (xy_len_sq >= 1.0f) + // return float32_t3(0, 0, 0); + + return float32_t3(xy, sqrt(1.0f - xy_len_sq)); +} + +bool isEdgeConvex(float32_t3 S, float32_t3 E) +{ + return nbl::hlsl::cross2D(S.xy, E.xy) < -1e-6f; +} + +// ============================================================================ +// Curve evaluation helpers +// ============================================================================ + +// Evaluate curve point at t using rsqrt +float32_t2 evalCurvePoint(float32_t3 S, float32_t3 E, float32_t t) +{ + float32_t3 v = S + t * (E - S); + float32_t invLen = rsqrt(dot(v, v)); + return v.xy * (invLen * CIRCLE_RADIUS); +} + +// Evaluate tangent at arbitrary t +float32_t2 evalCurveTangent(float32_t3 S, float32_t3 E, float32_t t) +{ + float32_t3 v = S + t * (E - S); + float32_t vLenSq = dot(v, v); + + if (vLenSq < 1e-12f) + return normalize(E.xy - S.xy); + + float32_t3 p = v * rsqrt(vLenSq); + float32_t3 vPrime = E - S; + float32_t2 tangent2D = (vPrime - p * dot(p, vPrime)).xy; + + float32_t len = length(tangent2D); + return (len > 1e-7f) ? tangent2D / len : normalize(E.xy - S.xy); +} + +// Get both endpoint tangents efficiently (shares SdotE computation) +void getProjectedTangents(float32_t3 S, float32_t3 E, out float32_t2 t0, out float32_t2 t1) +{ + float32_t SdotE = dot(S, E); + + float32_t2 tangent0_2D = (E - S * SdotE).xy; + float32_t2 tangent1_2D = (E * SdotE - S).xy; + + float32_t len0Sq = dot(tangent0_2D, tangent0_2D); + float32_t len1Sq = dot(tangent1_2D, tangent1_2D); + + const float32_t eps = 1e-14f; + + if (len0Sq > eps && len1Sq > eps) + { + t0 = tangent0_2D * rsqrt(len0Sq); + t1 = tangent1_2D * rsqrt(len1Sq); + return; + } + + // Rare fallback path + float32_t2 diff = E.xy - S.xy; + float32_t diffLenSq = dot(diff, diff); + float32_t2 fallback = diffLenSq > eps ? diff * rsqrt(diffLenSq) : float32_t2(1.0f, 0.0f); + + t0 = len0Sq > eps ? tangent0_2D * rsqrt(len0Sq) : fallback; + t1 = len1Sq > eps ? tangent1_2D * rsqrt(len1Sq) : fallback; +} + +// Compute apex with clamping to prevent apex explosion +void computeApexClamped(float32_t2 p0, float32_t2 p1, float32_t2 t0, float32_t2 t1, out float32_t2 apex) +{ + float32_t denom = t0.x * t1.y - t0.y * t1.x; + float32_t2 center = (p0 + p1) * 0.5f; + + if (abs(denom) < 1e-6f) + { + apex = center; + return; + } + + float32_t2 dp = p1 - p0; + float32_t s = (dp.x * t1.y - dp.y * t1.x) / denom; + apex = p0 + s * t0; + + float32_t2 toApex = apex - center; + float32_t distSq = dot(toApex, toApex); + float32_t maxDistSq = CIRCLE_RADIUS * CIRCLE_RADIUS * 4.0f; + + if (distSq > maxDistSq) + { + apex = center + toApex * (CIRCLE_RADIUS * 2.0f * rsqrt(distSq)); + } +} + +void testPoint(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, float32_t2 pt, float32_t2 axisDir, float32_t2 perpDir) +{ + float32_t projAlong = dot(pt, axisDir); + float32_t projPerp = dot(pt, perpDir); + + minAlong = min(minAlong, projAlong); + maxAlong = max(maxAlong, projAlong); + minPerp = min(minPerp, projPerp); + maxPerp = max(maxPerp, projPerp); +} + +template +void testEdgeForAxisFast(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, + uint32_t count, uint32_t n3Mask, float32_t2 axisDir, float32_t2 perpDir, + const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) +{ + const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; + + testPoint(minAlong, maxAlong, minPerp, maxPerp, GET_PROJ_VERT(I), axisDir, perpDir); + + if (n3Mask & (1u << I)) + { + float32_t2 midPoint = evalCurvePoint(vertices[I], vertices[nextIdx], 0.5f); + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, axisDir, perpDir); + } +} + +float32_t computeBoundingBoxAreaForAxisFast(NBL_CONST_REF_ARG(float32_t3) vertices[MAX_SILHOUETTE_VERTICES], uint32_t n3Mask, uint32_t count, float32_t2 axisDir) +{ + float32_t2 perpDir = float32_t2(-axisDir.y, axisDir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + testEdgeForAxisFast<0>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + testEdgeForAxisFast<1>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + testEdgeForAxisFast<2>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + if (count > 3) + { + testEdgeForAxisFast<3>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + if (count > 4) + { + testEdgeForAxisFast<4>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + if (count > 5) + { + testEdgeForAxisFast<5>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + if (count > 6) + { + testEdgeForAxisFast<6>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + } + } + } + } + + return (maxAlong - minAlong) * (maxPerp - minPerp); +} + +void tryCaliperDir(inout float32_t bestArea, inout float32_t2 bestDir, const float32_t2 dir, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t n3Mask, uint32_t count) +{ + float32_t area = computeBoundingBoxAreaForAxisFast(vertices, n3Mask, count, dir); + + if (area < bestArea) + { + bestArea = area; + bestDir = dir; + } +} + +template +inline void processEdge(inout float32_t bestArea, inout float32_t2 bestDir, inout uint32_t convexMask, inout uint32_t n3Mask, uint32_t count, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) +{ + const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; + float32_t3 S = vertices[I]; + float32_t3 E = vertices[nextIdx]; + + float32_t2 t0, t1; + getProjectedTangents(S, E, t0, t1); + + tryCaliperDir(bestArea, bestDir, t0, vertices, n3Mask, count); + + if (isEdgeConvex(S, E)) + { + convexMask |= (1u << I); + tryCaliperDir(bestArea, bestDir, t1, vertices, n3Mask, count); + + if (dot(t0, t1) < 0.5f) + { + n3Mask |= (1u << I); + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + tryCaliperDir(bestArea, bestDir, tangentAtMid, vertices, n3Mask, count); + } + } +} + +template +inline void testEdgeForAxisAccurate(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, uint32_t count, uint32_t convexMask, uint32_t n3Mask, + float32_t2 axisDir, float32_t2 perpDir, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) +{ + const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; + float32_t2 projectedVertex = vertices[I].xy * CIRCLE_RADIUS; + + testPoint(minAlong, maxAlong, minPerp, maxPerp, projectedVertex, axisDir, perpDir); + + bool isN3 = (n3Mask & (1u << I)) != 0; + bool isConvex = (convexMask & (1u << I)) != 0; + + if (!isN3 && !isConvex) + return; + + float32_t3 S = vertices[I]; + float32_t3 E = vertices[nextIdx]; + float32_t2 midPoint = evalCurvePoint(S, E, 0.5f); + + if (isN3) + { + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, axisDir, perpDir); + } + + if (isConvex) + { + float32_t2 t0, endTangent; + getProjectedTangents(S, E, t0, endTangent); + + if (dot(t0, perpDir) > 0.0f) + { + float32_t2 apex0; + if (isN3) + { + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + computeApexClamped(projectedVertex, midPoint, t0, tangentAtMid, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, axisDir, perpDir); + + if (dot(tangentAtMid, perpDir) > 0.0f) + { + float32_t2 apex1; + computeApexClamped(midPoint, E.xy * CIRCLE_RADIUS, tangentAtMid, endTangent, apex1); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex1, axisDir, perpDir); + } + } + else + { + computeApexClamped(projectedVertex, E.xy * CIRCLE_RADIUS, t0, endTangent, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, axisDir, perpDir); + } + } + } +} + +Parallelogram buildParallelogramForAxisAccurate(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t convexMask, uint32_t n3Mask, uint32_t count, float32_t2 axisDir) +{ + float32_t2 perpDir = float32_t2(-axisDir.y, axisDir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + testEdgeForAxisAccurate<0>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + testEdgeForAxisAccurate<1>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + testEdgeForAxisAccurate<2>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + if (count > 3) + { + testEdgeForAxisAccurate<3>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + if (count > 4) + { + testEdgeForAxisAccurate<4>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + if (count > 5) + { + testEdgeForAxisAccurate<5>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + if (count > 6) + { + testEdgeForAxisAccurate<6>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); + } + } + } + } + + Parallelogram result; + result.width = float16_t(maxAlong - minAlong); + result.height = float16_t(maxPerp - minPerp); + result.axisDir = float16_t2(axisDir); + result.corner = float16_t2(minAlong * axisDir + minPerp * float16_t2(-axisDir.y, axisDir.x)); + + return result; +} + +Parallelogram findMinimumBoundingBoxCurved(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t count +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, + inout float32_t4 color +#endif +) +{ + uint32_t convexMask = 0; + uint32_t n3Mask = 0; + float32_t bestArea = 1e10f; + float32_t2 bestDir = float32_t2(1.0f, 0.0f); + + processEdge<0>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + processEdge<1>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + processEdge<2>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + if (count > 3) + { + processEdge<3>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + if (count > 4) + { + processEdge<4>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + if (count > 5) + { + processEdge<5>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + if (count > 6) + { + processEdge<6>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + } + } + } + } + + tryCaliperDir(bestArea, bestDir, float32_t2(1.0f, 0.0f), vertices, n3Mask, count); + tryCaliperDir(bestArea, bestDir, float32_t2(0.0f, 1.0f), vertices, n3Mask, count); + + Parallelogram best = buildParallelogramForAxisAccurate(vertices, convexMask, n3Mask, count, bestDir); + +#if VISUALIZE_SAMPLES + for (uint32_t i = 0; i < count; i++) + { + if (convexMask & (1u << i)) + { + uint32_t nextIdx = (i + 1) % count; + float32_t2 p0 = vertices[i].xy * CIRCLE_RADIUS; + float32_t2 p1 = vertices[nextIdx].xy * CIRCLE_RADIUS; + + float32_t2 t0, endTangent; + getProjectedTangents(vertices[i], vertices[nextIdx], t0, endTangent); + + if (n3Mask & (1u << i)) + { + float32_t2 tangentAtMid = evalCurveTangent(vertices[i], vertices[nextIdx], 0.5f); + float32_t2 midPoint = evalCurvePoint(vertices[i], vertices[nextIdx], 0.5f); + + float32_t2 apex0, apex1; + computeApexClamped(p0, midPoint, t0, tangentAtMid, apex0); + computeApexClamped(midPoint, p1, tangentAtMid, endTangent, apex1); + + color += drawCorner(float32_t3(apex0, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + color += drawCorner(float32_t3(midPoint, 0.0f), ndc, aaWidth, 0.02, 0.0f, float32_t3(0, 1, 0)); + color += drawCorner(float32_t3(apex1, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0.5, 0)); + } + else + { + float32_t2 apex; + computeApexClamped(p0, p1, t0, endTangent, apex); + color += drawCorner(float32_t3(apex, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + } + } + } +#endif + + return best; +} +// ============================================================================ +// Main entry points +// ============================================================================ + +ParallelogramSilhouette buildParallelogram(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, + inout float32_t4 color +#endif +) +{ + ParallelogramSilhouette result; + + // if (silhouette.count < 3) + // { + // result.para.corner = float32_t2(0, 0); + // result.para.edge0 = float32_t2(1, 0); + // result.para.edge1 = float32_t2(0, 1); + // result.para.area = 1.0f; + // return result; + // } + + result.para = findMinimumBoundingBoxCurved(silhouette.vertices, silhouette.count +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + +#if DEBUG_DATA + DebugDataBuffer[0].parallelogramArea = result.para.width * result.para.height; +#endif + result.silhouette = precomputeSilhouette(silhouette); + + return result; +} + +float32_t3 sampleFromParallelogram(NBL_CONST_REF_ARG(ParallelogramSilhouette) paraSilhouette, float32_t2 xi, out float32_t pdf, out bool valid) +{ + float16_t2 axisDir = paraSilhouette.para.axisDir; + float16_t2 perpDir = float16_t2(-axisDir.y, axisDir.x); + + float16_t2 circleXY = paraSilhouette.para.corner + + float16_t(xi.x) * paraSilhouette.para.width * axisDir + + float16_t(xi.y) * paraSilhouette.para.height * perpDir; + + float32_t3 direction = circleToSphere(circleXY); + + valid = (direction.z > 0.0f) && isInsideSilhouetteFast(direction, paraSilhouette.silhouette); + pdf = valid ? (1.0f / (paraSilhouette.para.width * paraSilhouette.para.height)) : 0.0f; + + return direction; +} + +#endif // _PARALLELOGRAM_SAMPLING_HLSL_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl index 05d913e01..504db2db9 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl @@ -1,19 +1,25 @@ #ifndef _SILHOUETTE_HLSL_ #define _SILHOUETTE_HLSL_ -#include "gpu_common.hlsl" +#include "gpu_common.hlsl" #include "utils.hlsl" +// Special index values for clip points +static const uint32_t CLIP_POINT_A = 23; // Clip point between last positive and first negative +static const uint32_t CLIP_POINT_B = 24; // Clip point between last negative and first positive + // Compute region and configuration index from model matrix uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) { float32_t4x3 columnModel = transpose(modelMatrix); float32_t3 obbCenter = columnModel[3].xyz; float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + float32_t3 rcpSqScales = rcp(float32_t3( dot(upper3x3[0], upper3x3[0]), dot(upper3x3[1], upper3x3[1]), dot(upper3x3[2], upper3x3[2]))); + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; region = uint32_t3( @@ -23,9 +29,10 @@ uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, configIndex = region.x + region.y * 3u + region.z * 9u; - uint32_t sil = packSilhouette(silhouettes[configIndex]); - // uint32_t sil = binSilhouettes[configIndex]; + // uint32_t sil = packSilhouette(silhouettes[configIndex]); + uint32_t sil = binSilhouettes[configIndex]; vertexCount = getSilhouetteSize(sil); + return sil; } @@ -45,6 +52,7 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil #if VISUALIZE_SAMPLES float32_t4 color = float32_t4(0, 0, 0, 0); #endif + silhouette.count = 0; // Build clip mask (z < 0) @@ -74,9 +82,10 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil { uint32_t i0 = i; uint32_t i1 = (i + 1) % vertexCount; - float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(sil, i0)); - silhouette.vertices[silhouette.count++] = v0; + silhouette.vertices[silhouette.count] = v0; + silhouette.indices[silhouette.count++] = i0; // Original index (no rotation) + #if VISUALIZE_SAMPLES float32_t3 v1 = getVertex(modelMatrix, getSilhouetteVertex(sil, i1)); float32_t3 pts[2] = {v0, v1}; @@ -89,20 +98,19 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil // Rotate clip mask so positives come first uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - bool wrapAround = ((clipMask & 1u) != 0u) && - ((clipMask & (1u << (vertexCount - 1))) != 0u); + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); uint32_t rotateAmount = wrapAround ? firstbitlow(invertedMask) // -> First POSITIVE : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); - uint32_t positiveCount = vertexCount - clipCount; // ALWAYS compute both clip points uint32_t lastPosIdx = positiveCount - 1; uint32_t firstNegIdx = positiveCount; + float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); @@ -118,18 +126,23 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil { // Get raw vertex float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); - bool isLastPositive = (i == positiveCount - 1); bool useClipA = (clipCount > 0) && isLastPositive; -#if VISUALIZE_SAMPLES - float32_t3 v1 = useClipA ? clipA - : getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); + // Compute original index before rotation + uint32_t originalIndex = (i + rotateAmount) % vertexCount; +#if VISUALIZE_SAMPLES + float32_t3 v1 = useClipA ? clipA : getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); float32_t3 pts[2] = {normalize(v0), normalize(v1)}; color += drawEdge((i + 1) % vertexCount, pts, spherePos, aaWidth); #endif - silhouette.vertices[silhouette.count++] = v0; + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = v0; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = originalIndex; +#endif + silhouette.vertices[silhouette.count++] = normalize(v0); } if (clipCount > 0 && clipCount < vertexCount) @@ -143,11 +156,22 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil float32_t3 arcPts[2] = {normalize(clipA), normalize(clipB)}; color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); #endif - silhouette.vertices[silhouette.count++] = clipA; - silhouette.vertices[silhouette.count++] = clipB; + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = clipA; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = CLIP_POINT_A; +#endif + silhouette.vertices[silhouette.count++] = normalize(clipA); + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = clipB; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = CLIP_POINT_B; +#endif + silhouette.vertices[silhouette.count++] = normalize(clipB); } #if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertexCount = silhouette.count; DebugDataBuffer[0].clipMask = clipMask; DebugDataBuffer[0].clipCount = clipCount; DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; @@ -156,6 +180,7 @@ computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; DebugDataBuffer[0].rotatedSil = rotatedSil; #endif + #if VISUALIZE_SAMPLES return color; #endif diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl index f01667bf0..e4bf804cb 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -39,4 +39,11 @@ uint32_t packSilhouette(const uint32_t s[7]) return packed; } +float32_t2 hammersleySample(uint32_t i, uint32_t numSamples) +{ + return float32_t2( + float32_t(i) / float32_t(numSamples), + float32_t(reversebits(i)) / 4294967295.0f); +} + #endif // _UTILS_HLSL_ diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp index 4c32069ff..9d9941da3 100644 --- a/73_SolidAngleVisualizer/main.cpp +++ b/73_SolidAngleVisualizer/main.cpp @@ -20,10 +20,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR inline static std::string SolidAngleVisShaderPath = "app_resources/hlsl/SolidAngleVis.frag.hlsl"; inline static std::string RayVisShaderPath = "app_resources/hlsl/RayVis.frag.hlsl"; + public: inline SolidAngleVisualizer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 2048,1024 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + device_base_t({ 2048, 1024 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { } inline bool onAppInitialized(smart_refctd_ptr&& system) override @@ -44,60 +46,48 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { if (!pool) return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) return logFail("Couldn't create Command Buffer!"); } const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; m_scene = CGeometryCreatorScene::create( - { - .transferQueue = getTransferUpQueue(), - .utilities = m_utils.get(), - .logger = m_logger.get(), - .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies - }, - CSimpleDebugRenderer::DefaultPolygonGeometryPatch - ); + { .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies }, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch); // for the scene drawing pass { IGPURenderpass::SCreationParams params = {}; const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { - {{ - { - .format = sceneRenderDepthFormat, - .samples = IGPUImage::ESCF_1_BIT, - .mayAlias = false - }, - /*.loadOp =*/ {IGPURenderpass::LOAD_OP::CLEAR}, - /*.storeOp =*/ {IGPURenderpass::STORE_OP::STORE}, - /*.initialLayout =*/ {IGPUImage::LAYOUT::UNDEFINED}, - /*.finalLayout =*/ {IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} - }}, - IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd - }; + {{{.format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd }; params.depthStencilAttachments = depthAttachments; const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { {{ - { - .format = finalSceneRenderFormat, - .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, - .mayAlias = false - }, - /*.loadOp =*/ IGPURenderpass::LOAD_OP::CLEAR, - /*.storeOp =*/ IGPURenderpass::STORE_OP::STORE, - /*.initialLayout =*/ IGPUImage::LAYOUT::UNDEFINED, - /*.finalLayout =*/ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read - }}, - IGPURenderpass::SCreationParams::ColorAttachmentsEnd - }; + {.format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd }; params.colorAttachments = colorAttachments; IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { {}, - IGPURenderpass::SCreationParams::SubpassesEnd - }; - subpasses[0].depthStencilAttachment = { {.render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}} }; - subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} }; + IGPURenderpass::SCreationParams::SubpassesEnd }; + subpasses[0].depthStencilAttachment = { {.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}} }; + subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} }; params.subpasses = subpasses; const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { @@ -115,27 +105,21 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, // because depth and color get cleared first no read mask - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, - // the ImGUI will sample the color, then next frame we overwrite both attachments - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, - // but we only care about the availability-visibility chain between renderpass and imgui - .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, + { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = {// last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT} + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd }; params.dependencies = dependencies; auto solidAngleRenderpassParams = params; m_mainRenderpass = m_device->createRenderpass(std::move(params)); @@ -145,11 +129,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR m_solidAngleRenderpass = m_device->createRenderpass(std::move(solidAngleRenderpassParams)); if (!m_solidAngleRenderpass) return logFail("Failed to create Solid Angle Renderpass!"); - } const auto& geometries = m_scene->getInitParams().geometries; - m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, { &geometries.front().get(),geometries.size() }); + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, { &geometries.front().get(), geometries.size() }); // special case { const auto& pipelines = m_renderer->getInitParams().pipelines; @@ -192,7 +175,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); options.spirvOptimizer = opt.get(); #endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT;// | IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_FILE_BIT | IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); options.preprocessorOptions.logger = m_logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); @@ -226,39 +209,30 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUPipelineBase::SShaderSpecInfo solidAngleFragSpec = { .shader = solidAngleVisFragShader.get(), - .entryPoint = "main" - }; + .entryPoint = "main" }; auto rayVisFragShader = loadAndCompileHLSLShader(RayVisShaderPath, ESS_FRAGMENT); if (!rayVisFragShader) return logFail("Failed to Load and Compile Fragment Shader: rayVis!"); const IGPUPipelineBase::SShaderSpecInfo RayFragSpec = { .shader = rayVisFragShader.get(), - .entryPoint = "main" - }; + .entryPoint = "main" }; smart_refctd_ptr solidAngleVisLayout, rayVisLayout; nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { - { - .binding = 0, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_FRAGMENT, - .count = 1 - } - }; + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1} }; smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - const asset::SPushConstantRange saRanges[] = { { - .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(PushConstants) - } }; - const asset::SPushConstantRange rayRanges[] = { { - .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(PushConstantRayVis) - } }; + const asset::SPushConstantRange saRanges[] = { {.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants)} }; + const asset::SPushConstantRange rayRanges[] = { {.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstantRayVis)} }; if (!dsLayout) logFail("Failed to create a Descriptor Layout!\n"); @@ -301,21 +275,20 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); - smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(), 1 }); m_ds = pool->createDescriptorSet(std::move(dsLayout)); { IGPUDescriptorSet::SDescriptorInfo info[1]; info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; + info[0].info.buffer = { .offset = 0, .size = BufferSize }; IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info} }; m_device->updateDescriptorSets(writes, {}); } } - if (!m_allocation.memory->map({ 0ull,m_allocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ)) + if (!m_allocation.memory->map({ 0ull, m_allocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ)) logFail("Failed to map the Device Memory!\n"); // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches @@ -328,8 +301,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { auto scRes = static_cast(m_surface->getSwapchainResources()); ext::imgui::UI::SCreationParameters params = {}; - params.resources.texturesInfo = { .setIx = 0u,.bindingIx = TexturesImGUIBindingIndex }; - params.resources.samplersInfo = { .setIx = 0u,.bindingIx = 1u }; + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = TexturesImGUIBindingIndex }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; params.utilities = m_utils; params.transfer = getTransferUpQueue(); params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); @@ -349,7 +322,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources const auto* layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout, 1 }); auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); if (!interface.subAllocDS) @@ -369,12 +342,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .binding = TexturesImGUIBindingIndex, .arrayElement = ext::imgui::UI::FontAtlasTexId, .count = 1, - .info = &info - }; - if (!m_device->updateDescriptorSets({ &write,1 }, {})) + .info = &info }; + if (!m_device->updateDescriptorSets({ &write, 1 }, {})) return logFail("Failed to write the descriptor set"); } - imgui->registerListener([this]() {interface(); }); + imgui->registerListener([this]() + { interface(); }); } interface.camera.mapKeysToWASD(); @@ -411,16 +384,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto* const cb = m_cmdBufs.data()[resourceIx].get(); cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // clear to black for both things - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + if (m_solidAngleViewFramebuffer) { - asset::SBufferRange range - { + asset::SBufferRange range{ .offset = 0, .size = m_outputStorageBuffer->getSize(), - .buffer = m_outputStorageBuffer - }; + .buffer = m_outputStorageBuffer }; cb->fillBuffer(range, 0u); { @@ -428,16 +398,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->beginDebugMarker("Draw Circle View Frame"); { const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f, 0.f, 0.f, 1.f} }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { .framebuffer = m_solidAngleViewFramebuffer.get(), .colorClearValues = &clearValue, .depthStencilClearValues = &farValue, .renderArea = { - .offset = {0,0}, - .extent = {creationParams.width, creationParams.height} - } - }; + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}} }; beginRenderpass(cb, renderpassInfo); } // draw scene @@ -446,10 +415,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR lastFrameSeed = m_frameSeeding ? static_cast(m_realFrameIx) : lastFrameSeed; PushConstants pc{ .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, .samplingMode = m_samplingMode, - .frameIndex = lastFrameSeed - }; + .sampleCount = static_cast(m_SampleCount), + .frameIndex = lastFrameSeed }; auto pipeline = m_solidAngleVisPipeline; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); @@ -471,19 +440,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { auto creationParams = m_mainViewFramebuffer->getCreationParameters(); const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.1f, 0.1f, 0.1f, 1.f} }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = - { .framebuffer = m_mainViewFramebuffer.get(), .colorClearValues = &clearValue, .depthStencilClearValues = &farValue, .renderArea = { - .offset = {0,0}, - .extent = {creationParams.width, creationParams.height} - } - }; + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}} }; beginRenderpass(cb, renderpassInfo); - } { // draw rays visualization auto creationParams = m_mainViewFramebuffer->getCreationParameters(); @@ -492,12 +458,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // draw scene { float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); + float32_t3x4 view = *reinterpret_cast(&interface.camera.getViewMatrix()); PushConstantRayVis pc{ .viewProjMatrix = viewProj, + .viewMatrix = view, .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) }, - .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u - }; + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u }; auto pipeline = m_rayVisualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); @@ -524,7 +491,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto& instance = m_renderer->m_instances[0]; instance.world = float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)); instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; - m_renderer->render(cb, viewParams); // draw the cube/OBB + m_renderer->render(cb, viewParams); // draw the cube/OBB instance.world = float32_t3x4(1.0f); instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk @@ -539,16 +506,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); { auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f, 0.f, 0.f, 1.f} }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), .colorClearValues = &clearValue, .depthStencilClearValues = nullptr, .renderArea = { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - } - }; + .offset = {0, 0}, + .extent = {m_window->getWidth(), m_window->getHeight()}} }; beginRenderpass(cb, renderpassInfo); } // draw ImGUI @@ -560,7 +526,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const auto* ds = interface.subAllocDS->getDescriptorSet(); cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); // a timepoint in the future to release streaming resources for geometry - const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; if (!imgui->render(cb, drawFinished)) { m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); @@ -576,27 +542,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { .semaphore = m_semaphore.get(), .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - }; + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS }; const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = { - {.cmdbuf = cb } - }; + {.cmdbuf = cb} }; const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { - { - .semaphore = device_base_t::getCurrentAcquire().semaphore, - .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; + {.semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE} }; const IQueue::SSubmitInfo infos[] = { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval,1} - } - }; + {.waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval, 1}} }; if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) { @@ -604,7 +562,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR m_realFrameIx--; } - m_window->setCaption("[Nabla Engine] UI App Test Demo"); return retval; } @@ -619,19 +576,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .dstSubpass = 0, .memoryBarrier = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway .srcAccessMask = ACCESS_FLAGS::NONE, // layout transition needs to finish before the color write .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, // want layout transition to begin after all color output is done { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = { // last place where the color can get modified, depth is implicitly earlier .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, // only write ops, reads can't be made available @@ -640,8 +594,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // leave view offsets and flags default }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; + IGPURenderpass::SCreationParams::DependenciesEnd }; return dependencies; } @@ -667,7 +620,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // I think begin/end should always be called on camera, just events shouldn't be fed, why? // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to - // `timeDiff` being computed since `lastVirtualUpTimeStamp` + // `timeDiff` being computed since `lastVirtualUpTimeStamp` camera.beginInputProcessing(nextPresentationTimestamp); { mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void @@ -690,10 +643,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); //} - } - }, - m_logger.get() - ); + } }, + m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { if (interface.move) @@ -706,10 +657,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR previousEventTimestamp = e.timeStamp; uiEvents.keyboard.emplace_back(e); - } - }, - m_logger.get() - ); + } }, + m_logger.get()); } camera.endInputProcessing(nextPresentationTimestamp); @@ -717,37 +666,33 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ext::imgui::UI::SUpdateParameters params = { - .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), - .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()), + .displaySize = {m_window->getWidth(), m_window->getHeight()}, .mouseEvents = uiEvents.mouse, - .keyboardEvents = uiEvents.keyboard - }; + .keyboardEvents = uiEvents.keyboard }; - //interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + // interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; interface.imGUI->update(params); } void recreateFramebuffers() { - auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format)->smart_refctd_ptr + auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format) -> smart_refctd_ptr { - auto image = m_device->createImage({ { - .type = IGPUImage::ET_2D, - .samples = IGPUImage::ESCF_1_BIT, - .format = format, - .extent = {resolution.x,resolution.y,1}, - .mipLevels = 1, - .arrayLayers = 1, - .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT - } }); + auto image = m_device->createImage({ {.type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x, resolution.y, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT} }); if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) return nullptr; IGPUImageView::SCreationParams params = { .image = std::move(image), .viewType = IGPUImageView::ET_2D, - .format = format - }; + .format = format }; params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; return m_device->createImageView(std::move(params)); }; @@ -763,23 +708,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); - m_solidAngleViewFramebuffer = m_device->createFramebuffer({ { - .renderpass = m_solidAngleRenderpass, - .depthStencilAttachments = &solidAngleDepthView.get(), - .colorAttachments = &solidAngleView.get(), - .width = solidAngleViewRes.x, - .height = solidAngleViewRes.y - } }); + m_solidAngleViewFramebuffer = m_device->createFramebuffer({ {.renderpass = m_solidAngleRenderpass, + .depthStencilAttachments = &solidAngleDepthView.get(), + .colorAttachments = &solidAngleView.get(), + .width = solidAngleViewRes.x, + .height = solidAngleViewRes.y} }); mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); - m_mainViewFramebuffer = m_device->createFramebuffer({ { - .renderpass = m_mainRenderpass, - .depthStencilAttachments = &mainDepthView.get(), - .colorAttachments = &mainView.get(), - .width = mainViewRes.x, - .height = mainViewRes.y - } }); + m_mainViewFramebuffer = m_device->createFramebuffer({ {.renderpass = m_mainRenderpass, + .depthStencilAttachments = &mainDepthView.get(), + .colorAttachments = &mainView.get(), + .width = mainViewRes.x, + .height = mainViewRes.y} }); } else { @@ -788,7 +729,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // release previous slot and its image - interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1 }); + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1 }); // if (solidAngleView && mainView) { @@ -801,19 +742,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { {.dstSet = interface.subAllocDS->getDescriptorSet(), - .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], - .count = 1, - .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)] - }, - { - .dstSet = interface.subAllocDS->getDescriptorSet(), - .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], - .count = 1, - .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)] - } - }; + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)]}, + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)]} }; m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); } interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; @@ -827,8 +764,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .x = 0, .y = 0, .width = static_cast(info.renderArea.extent.width), - .height = static_cast(info.renderArea.extent.height) - }; + .height = static_cast(info.renderArea.extent.height) }; cb->setViewport(0u, 1u, &viewport); } @@ -845,7 +781,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; - static inline uint32_t m_samplingMode = SAMPLING_MODE_SOLID_ANGLE; + static inline SAMPLING_MODE m_samplingMode = SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE; + static inline int m_SampleCount = 64; static inline bool m_frameSeeding = true; static inline ResultData m_GPUOutResulData; // @@ -895,8 +832,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); } - return projection; - }()); + return projection; }()); ImGuizmo::SetOrthographic(!isPerspective); ImGuizmo::BeginFrame(); @@ -918,19 +854,29 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } ImGui::Separator(); - ImGui::Text("Sampling Mode: "); + ImGui::Text("Sampling Mode:"); ImGui::SameLine(); - if (ImGui::RadioButton("Solid Angle", m_samplingMode == 0)) - m_samplingMode = SAMPLING_MODE_SOLID_ANGLE; + const char* samplingModes[] = + { + "Triangle Solid Angle", + "Triangle Projected Solid Angle", + "Parallelogram Projected Solid Angle" + }; + + int currentMode = static_cast(m_samplingMode); + + if (ImGui::Combo("##SamplingMode", ¤tMode, samplingModes, IM_ARRAYSIZE(samplingModes))) + { + m_samplingMode = static_cast(currentMode); + } - ImGui::SameLine(); - if (ImGui::RadioButton("Projected Solid Angle", m_samplingMode == 1)) - m_samplingMode = SAMPLING_MODE_PROJECTED_SOLID_ANGLE; ImGui::Checkbox("Frame seeding", &m_frameSeeding); + ImGui::SliderInt("Sample Count", &m_SampleCount, 0, 512); + ImGui::Separator(); ImGui::Text("Camera"); @@ -952,7 +898,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR isPerspective = false; ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); - //ImGui::Checkbox("Enable camera movement", &move); + // ImGui::Checkbox("Enable camera movement", &move); ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); @@ -966,7 +912,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - if (firstFrame) { camera.setPosition(cameraIntialPosition); @@ -1057,16 +1002,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGuizmo::SetID(0u); - // TODO: camera will return hlsl::float32_tMxN + // TODO: camera will return hlsl::float32_tMxN auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); - // TODO: camera will return hlsl::float32_tMxN + // TODO: camera will return hlsl::float32_tMxN imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); - if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates - imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ transformParams.editTransformDecomposition = true; mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); @@ -1121,8 +1066,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR fieldName, ImVec4(c.r, c.g, c.b, 1.0f), 0, - ImVec2(20, 20) - ); + ImVec2(20, 20)); ImGui::SameLine(); ImGui::Text("%s", colorNames[index]); @@ -1140,9 +1084,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR drawColorField(":", m_GPUOutResulData.vertices[i]); ImGui::SameLine(); static const float32_t3 constCorners[8] = { - float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), - float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1) - }; + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1) }; float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); } @@ -1154,13 +1097,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR "", ImVec4(0.0f, 0.0f, 0.0f, 0.0f), 0, - ImVec2(20, 20) - ); + ImVec2(20, 20)); ImGui::SameLine(); ImGui::Text(""); - } - } } @@ -1178,8 +1118,24 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Separator(); ImGui::Text("Max triangles exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); ImGui::Text("spherical lune detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); + ImGui::Separator(); + //ImGui::Text("Sampling outside the silhouette: %s", m_GPUOutResulData.sampleOutsideSilhouette ? "true" : "false"); + ImGui::Text("Parallelogram does not bound: %s", m_GPUOutResulData.parallelogramDoesNotBound ? "true" : "false"); + ImGui::Text("Parallelogram vertices inside: %s", m_GPUOutResulData.parallelogramVerticesInside ? "true" : "false"); + ImGui::Text("Parallelogram edges inside: %s", m_GPUOutResulData.parallelogramEdgesInside ? "true" : "false"); + ImGui::Text("Parallelogram area: %.3f", m_GPUOutResulData.parallelogramArea); + ImGui::Text("Failed vertex index: %u", m_GPUOutResulData.failedVertexIndex); + ImGui::Text("Failed vertex UV: (%.3f, %.3f)", m_GPUOutResulData.failedVertexUV.x, m_GPUOutResulData.failedVertexUV.y); + ImGui::Text("Failed edge index: %u", m_GPUOutResulData.failedEdgeIndex); + ImGui::Text("Failed edge sample: %u", m_GPUOutResulData.failedEdgeSample); + ImGui::Text("Failed edge UV: (%.3f, %.3f)", m_GPUOutResulData.failedEdgeUV.x, m_GPUOutResulData.failedEdgeUV.y); + ImGui::Text("Failed point 3D: (%.3f, %.3f, %.3f)", m_GPUOutResulData.failedPoint.x, m_GPUOutResulData.failedPoint.y, m_GPUOutResulData.failedPoint.z); + for (uint32_t i = 0; i < 8; i++) + ImGui::Text("edge is convex: %s", m_GPUOutResulData.edgeIsConvex[i] ? "true" : "false"); + ImGui::Separator(); { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1205,7 +1161,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) { modalShown = false; - modalDismissed = false; // Allow modal to show again for new configuration + modalDismissed = false; // Allow modal to show again for new configuration lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; } @@ -1217,7 +1173,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // Open modal only if not already shown/dismissed - if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExceeded || m_GPUOutResulData.sphericalLuneDetected) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown && !modalDismissed) // Don't reopen if user dismissed it + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExceeded || m_GPUOutResulData.sphericalLuneDetected) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown && !modalDismissed) // Don't reopen if user dismissed it { ImGui::OpenPopup("Edge Visibility Mismatch Warning"); modalShown = true; @@ -1250,7 +1206,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { ImGui::CloseCurrentPopup(); modalShown = false; - modalDismissed = true; // Mark as dismissed to prevent reopening + modalDismissed = true; // Mark as dismissed to prevent reopening } ImGui::EndPopup(); } @@ -1284,7 +1240,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Silhouette mask printed in binary - auto printBin = [](uint32_t bin, const char* name) { char buf[33]; @@ -1347,7 +1302,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { lastTRS = m_TRS; // Backup before randomizing int attempts = 0; - do { + do + { m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); attempts++; } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); @@ -1363,17 +1319,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { lastTRS = m_TRS; // Backup before randomizing int attempts = 0; - do { + do + { m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); attempts++; } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); } - //ImGui::SameLine(); + // ImGui::SameLine(); if (ImGui::Button("Randomize All")) { lastTRS = m_TRS; // Backup before randomizing int attempts = 0; - do { + do + { m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); @@ -1399,9 +1357,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { auto* streaminingBuffer = imGUI->getStreamingBuffer(); - const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested - const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available - const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer float freePercentage = 100.0f * (float)(freeSize) / (float)total; float allocatedPercentage = (float)(consumedMemory) / (float)total; @@ -1420,11 +1378,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SetCursorPosX(windowPadding); if (freePercentage > 70.0f) - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green else if (freePercentage > 30.0f) - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow else - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red ImGui::ProgressBar(allocatedPercentage, barSize, ""); @@ -1440,19 +1398,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); ImVec2 textSize = ImGui::CalcTextSize(textBuffer); - ImVec2 textPos = ImVec2 - ( + ImVec2 textPos = ImVec2( progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, - progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f - ); + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f); ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); - drawList->AddRectFilled - ( + drawList->AddRectFilled( ImVec2(textPos.x - 5, textPos.y - 2), ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), - ImGui::GetColorU32(bgColor) - ); + ImGui::GetColorU32(bgColor)); ImGui::SetCursorScreenPos(textPos); ImGui::Text("%s", textBuffer); @@ -1483,12 +1437,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR struct TRS // Source of truth { float32_t3 translation{ 0.0f, 0.0f, 1.5f }; - float32_t3 rotation{ 0.0f }; // MUST stay orthonormal + float32_t3 rotation{ 0.0f }; // MUST stay orthonormal float32_t3 scale{ 1.0f }; } m_TRS; float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS - //std::string_view objectName; + // std::string_view objectName; TransformRequestParams transformParams; TransformReturnInfo mainViewTransformReturnInfo; TransformReturnInfo solidAngleViewTransformReturnInfo; @@ -1499,7 +1453,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; float viewWidth = 10.f; - //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + // uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; bool firstFrame = true; @@ -1516,7 +1470,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // setting up pipeline in the constructor m_queueFamily = base.getComputeQueue()->getFamilyIndex(); m_cmdpool = base.m_device->createCommandPool(m_queueFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - //core::smart_refctd_ptr* cmdBuffs[] = { &m_cmdbuf, &m_timestampBeforeCmdBuff, &m_timestampAfterCmdBuff }; + // core::smart_refctd_ptr* cmdBuffs[] = { &m_cmdbuf, &m_timestampBeforeCmdBuff, &m_timestampAfterCmdBuff }; if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) base.logFail("Failed to create Command Buffers!\n"); if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampBeforeCmdBuff)) @@ -1550,25 +1504,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR base.logFail("Failed to load precompiled \"benchmark\" shader!\n"); nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { - { - .binding = 0, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1} }; smart_refctd_ptr dsLayout = base.m_device->createDescriptorSetLayout(bindings); if (!dsLayout) base.logFail("Failed to create a Descriptor Layout!\n"); SPushConstantRange pushConstantRanges[] = { - { - .stageFlags = ShaderStage::ESS_COMPUTE, - .offset = 0, - .size = sizeof(BenchmarkPushConstants) - } - }; + {.stageFlags = ShaderStage::ESS_COMPUTE, + .offset = 0, + .size = sizeof(BenchmarkPushConstants)} }; m_pplnLayout = base.m_device->createPipelineLayout(pushConstantRanges, smart_refctd_ptr(dsLayout)); if (!m_pplnLayout) base.logFail("Failed to create a Pipeline Layout!\n"); @@ -1578,7 +1526,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; params.shader.shader = shader.get(); - if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + if (!base.m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_pipeline)) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); } @@ -1603,16 +1551,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR base.logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); assert(dummyBuff->getBoundMemory().memory == m_allocation.memory.get()); - smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(), 1 }); m_ds = pool->createDescriptorSet(std::move(dsLayout)); { IGPUDescriptorSet::SDescriptorInfo info[1]; info[0].desc = smart_refctd_ptr(dummyBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; + info[0].info.buffer = { .offset = 0, .size = BufferSize }; IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info} }; base.m_device->updateDescriptorSets(writes, {}); } } @@ -1630,15 +1577,20 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR void run() { m_logger->log("\n\nsampling benchmark result:", ILogger::ELL_PERFORMANCE); + + m_logger->log("sampling benchmark, parallelogram projected solid angle result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE); + m_logger->log("sampling benchmark, triangle solid angle result:", ILogger::ELL_PERFORMANCE); - performBenchmark(SAMPLING_BENCHMARK_MODE::TRIANGLE_SOLID_ANGLE, SAMPLING_MODE_SOLID_ANGLE); + performBenchmark(SAMPLING_MODE::TRIANGLE_SOLID_ANGLE); + + //m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); + //performBenchmark(SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE); - m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); - performBenchmark(SAMPLING_BENCHMARK_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE, SAMPLING_MODE_PROJECTED_SOLID_ANGLE); } private: - void performBenchmark(SAMPLING_BENCHMARK_MODE mode, uint32_t solidAngleMode) + void performBenchmark(SAMPLING_MODE mode) { m_device->waitIdle(); @@ -1648,7 +1600,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr semaphore = m_device->createSemaphore(semaphoreCounter); IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - IQueue::SSubmitInfo::SSemaphoreInfo waits[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT } }; + IQueue::SSubmitInfo::SSemaphoreInfo waits[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; IQueue::SSubmitInfo beforeTimestapSubmitInfo[1] = {}; const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = { {.cmdbuf = m_timestampBeforeCmdBuff.get()} }; @@ -1668,15 +1620,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR benchmarkSubmitInfos[0].signalSemaphores = signals; benchmarkSubmitInfos[0].waitSemaphores = waits; - m_pushConstants.benchmarkMode = mode; - m_pushConstants.samplingMode = solidAngleMode; m_pushConstants.modelMatrix = float32_t3x4(transpose(m_visualizer->interface.m_OBBModelMatrix)); recordCmdBuff(); // warmup runs for (int i = 0; i < WarmupIterations; ++i) { + if (i == 0) m_api->startCapture(); waits[0].value = semaphoreCounter; @@ -1776,8 +1727,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR static constexpr int Iterations = 1; }; - template - inline bool logFail(const char* msg, Args&&... args) + template + inline bool logFail(const char* msg, Args &&...args) { m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); return false; @@ -1786,5 +1737,4 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR std::ofstream m_logFile; }; - NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file From 1f24efb90587dad1e6417bee5d1e10e14c7c520a Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 23 Jan 2026 09:28:36 +0100 Subject: [PATCH 183/219] prep for cropping in the presentation --- 40_PathTracer/include/renderer/CRenderer.h | 27 +++---------------- 40_PathTracer/include/renderer/CSession.h | 15 ++++++++--- .../shaders/present/push_constants.hlsl | 21 ++++++++++++--- .../renderer/shaders/{ => resolve}/rwmc.hlsl | 4 +-- .../include/renderer/shaders/session.hlsl | 2 +- 40_PathTracer/src/renderer/CScene.cpp | 8 +++++- 6 files changed, 42 insertions(+), 35 deletions(-) rename 40_PathTracer/include/renderer/shaders/{ => resolve}/rwmc.hlsl (87%) diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 57d81446c..af34c63bb 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -8,8 +8,6 @@ #include "renderer/CScene.h" #include "renderer/CSession.h" -#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" - #include "renderer/shaders/pathtrace/push_constants.hlsl" #include "nbl/this_example/builtin/build/spirv/keys.hpp" @@ -71,9 +69,9 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl return true; } - video::IQueue* graphicsQueue = nullptr; - video::IQueue* computeQueue = nullptr; - video::IQueue* uploadQueue = nullptr; + video::CThreadSafeQueueAdapter* graphicsQueue = nullptr; + video::CThreadSafeQueueAdapter* computeQueue = nullptr; + video::CThreadSafeQueueAdapter* uploadQueue = nullptr; // core::smart_refctd_ptr utilities = nullptr; // can be null @@ -157,25 +155,6 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // Resources used for envmap sampling nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; #endif - -// Denoiser - // TODO: autoexposure - core::smart_refctd_ptr lumaMeasure; - // TODO: motion vector stuff - // rwmc resolve, apply exposure, interleave into OptiX input formats - core::smart_refctd_ptr rwmcResolve; - // TODO: OIDN denoise - // deinterlave from OptiX output format, perform first axis of FFT - core::smart_refctd_ptr postDenoise; // TODO - // second axis FFT, spectrum multiply and iFFT - core::smart_refctd_ptr secondAxisBloom; // TODO - // first axis iFFT, tonemap, encode into final EXR format - core::smart_refctd_ptr secondAxisFFTTonemap; // TODO - -// Presenter (invokes denoiser) - core::smart_refctd_ptr presentRenderpass; // TODO - core::smart_refctd_ptr regularPresent; // TODO - core::smart_refctd_ptr cubemapPresent; // TODO }; inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), m_frameIx(m_construction.semaphore->getCounterValue()) {} diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index b02e0dfa8..011b30bb0 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -31,24 +31,31 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm // bool init(video::IGPUCommandBuffer* cb); + // + inline bool isInitialized() const {return bool(m_active.immutables);} + // bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); // inline void deinit() {m_active = {};} - private: - friend class CScene; - + // struct SConstructionParams { core::string name = "TODO from `sensor`"; core::smart_refctd_ptr scene; + SResolveConstants initResolveConstants; SSensorUniforms uniforms; SSensorDynamics initDynamics; - SResolveConstants initResolveConstants; + hlsl::uint16_t2 cropOffsets; + hlsl::uint16_t2 cropResolution; sensor_type_e type; }; + inline const SConstructionParams& getConstructionParams() const {return m_params;} + + private: + friend class CScene; inline CSession(SConstructionParams&& _params) : m_params(std::move(_params)) {} const SConstructionParams m_params; diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl index c966188a3..65b7c4e5f 100644 --- a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -2,7 +2,7 @@ #define _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ -#include "renderer/shaders/rwmc.hlsl" +#include "renderer/shaders/resolve/rwmc.hlsl" // no uint16_t to be used because its going to be a push constant @@ -15,9 +15,24 @@ struct DefaultResolvePushConstants { NBL_CONSTEXPR_STATIC_INLINE uint32_t ImageCount = 16; - // 3 bits for cube layer + struct Regular + { + float32_t2 cropOffset; + float32_t2 scale; + }; + struct Cubemap + { + // TODO + }; + union + { + Regular regular; + Cubemap cubemap; + }; + // 3 extra bits for cube layer + uint32_t isCubemap : 1; uint32_t layer : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,3); - uint32_t imageIndex : BOOST_PP_SUB(29,MAX_CASCADE_COUNT_LOG2); + uint32_t imageIndex : BOOST_PP_SUB(28,MAX_CASCADE_COUNT_LOG2); }; } diff --git a/40_PathTracer/include/renderer/shaders/rwmc.hlsl b/40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl similarity index 87% rename from 40_PathTracer/include/renderer/shaders/rwmc.hlsl rename to 40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl index cf9c29c60..692a6fb16 100644 --- a/40_PathTracer/include/renderer/shaders/rwmc.hlsl +++ b/40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl @@ -26,8 +26,8 @@ struct SResolveConstants // TODO: move somewhere hlsl::float32_t reciprocalKappa; hlsl::float32_t colorReliabilityFactor; } rwmc; - uint32_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); - uint32_t unused : BOOST_PP_SUB(31,MAX_CASCADE_COUNT_LOG2); + uint64_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); + uint64_t scratchBDA : BOOST_PP_SUB(63,MAX_CASCADE_COUNT_LOG2); }; } diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl index f28d3faeb..45941405f 100644 --- a/40_PathTracer/include/renderer/shaders/session.hlsl +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -2,7 +2,7 @@ #define _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ -#include "renderer/shaders/rwmc.hlsl" +#include "renderer/shaders/resolve/rwmc.hlsl" namespace nbl diff --git a/40_PathTracer/src/renderer/CScene.cpp b/40_PathTracer/src/renderer/CScene.cpp index cb2f8dc0d..4a3bf9edf 100644 --- a/40_PathTracer/src/renderer/CScene.cpp +++ b/40_PathTracer/src/renderer/CScene.cpp @@ -23,12 +23,18 @@ smart_refctd_ptr CScene::createSession(const sensor_t& sensor) CSession::SConstructionParams params = { .scene = smart_refctd_ptr(this), + .cropOffsets = {mutDefaults.cropOffsetX,mutDefaults.cropOffsetY}, + .cropResolution = {mutDefaults.cropWidth,mutDefaults.cropHeight}, .type = raygen.getType() }; + + const uint16_t2 renderSize(constants.width,constants.height); + assert(all(params.cropOffsets(mutDefaults.maxPathDepth,1,0x1u<(mutDefaults.russianRouletteDepth,1,maxPathDepth); params.uniforms = { From 418e89bef82a3d956f7eed7437a35d37d3a5ffab Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 23 Jan 2026 15:00:50 +0100 Subject: [PATCH 184/219] work on the presenter and resolver --- 40_PathTracer/CMakeLists.txt | 2 + 40_PathTracer/include/renderer/CRenderer.h | 5 +- 40_PathTracer/include/renderer/CScene.h | 2 +- 40_PathTracer/include/renderer/CSession.h | 61 +- .../renderer/present/CWindowPresenter.h | 95 +++ .../include/renderer/present/IPresenter.h | 32 + .../renderer/resolve/CBasicRWMCResolver.h | 103 +++ .../include/renderer/resolve/IResolver.h | 41 ++ .../shaders/present/push_constants.hlsl | 30 +- 40_PathTracer/main.cpp | 656 +++++++----------- 40_PathTracer/src/renderer/CScene.cpp | 24 +- .../src/renderer/present/CWindowPresenter.cpp | 363 ++++++++++ .../renderer/resolve/CBasicRWMCResolver.cpp | 60 ++ 13 files changed, 1034 insertions(+), 440 deletions(-) create mode 100644 40_PathTracer/include/renderer/present/CWindowPresenter.h create mode 100644 40_PathTracer/include/renderer/present/IPresenter.h create mode 100644 40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h create mode 100644 40_PathTracer/include/renderer/resolve/IResolver.h create mode 100644 40_PathTracer/src/renderer/present/CWindowPresenter.cpp create mode 100644 40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index fb9c597f8..022910c9b 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -19,6 +19,8 @@ list(APPEND NBL_EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CSession.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CScene.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CRenderer.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/resolve/CBasicRWMCResolver.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/present/CWindowPresenter.cpp" ) list(APPEND NBL_ ) diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index af34c63bb..dd7d19eae 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -86,7 +86,10 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // inline const SCachedCreationParams& getCreationParams() const { return m_creation; } - + + // + inline system::logger_opt_ptr getLogger() const {return m_creation.logger.get().get();} + // inline video::ILogicalDevice* getDevice() const {return m_creation.utilities->getLogicalDevice();} diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index 0ebb36425..d11694cec 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -42,7 +42,7 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable inline std::span getSensors() const {return m_construction.sensors;} // - core::smart_refctd_ptr createSession(const sensor_t& sensor); + core::smart_refctd_ptr createSession(const CSession::SCreationParams& sensor); protected: friend class CRenderer; diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 011b30bb0..6bf86b98b 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -18,6 +18,8 @@ class CScene; class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable { public: + using sensor_t = CSceneLoader::SLoadResult::SSensor; + using sensor_type_e = sensor_t::SMutable::Raygen::Type; enum class RenderMode : uint8_t { Previs, @@ -25,8 +27,16 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm Debug, Count }; - using sensor_t = CSceneLoader::SLoadResult::SSensor; - using sensor_type_e = sensor_t::SMutable::Raygen::Type; + struct SCachedCreationParams + { + RenderMode mode = RenderMode::Beauty; + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const {return sensor;} + + const sensor_t* sensor; + }; // bool init(video::IGPUCommandBuffer* cb); @@ -34,6 +44,42 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm // inline bool isInitialized() const {return bool(m_active.immutables);} + // + inline video::IGPUImageView* getBeauty(const asset::E_FORMAT format) const + { + return m_active.immutables.beauty.getView(format); + } + + // + inline video::IGPUImageView* getRWMCCascades() const + { + return m_active.immutables.rwmcCascades.getView(asset::E_FORMAT::EF_R32G32_UINT); + } + + // + inline video::IGPUImageView* getAlbedo() const + { + return m_active.immutables.albedo.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); + } + + // + inline video::IGPUImageView* getNormal() const + { + return m_active.immutables.normal.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); + } + + // + inline video::IGPUImageView* getMotion() const + { + return m_active.immutables.motion.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); + } + + // + inline video::IGPUImageView* getMask() const + { + return m_active.immutables.mask.getView(asset::E_FORMAT::EF_R16_UNORM); + } + // bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); @@ -41,7 +87,7 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm inline void deinit() {m_active = {};} // - struct SConstructionParams + struct SConstructionParams : SCachedCreationParams { core::string name = "TODO from `sensor`"; core::smart_refctd_ptr scene; @@ -69,8 +115,15 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm return image && !views.empty() && views.begin()->second; } + inline video::IGPUImageView* getView(const asset::E_FORMAT format) const + { + if (const auto found=views.find(format); found!=views.end()) + return found->second.get(); + return nullptr; + } + core::smart_refctd_ptr image = {}; - core::unordered_map> views = {}; + core::unordered_map> views = {}; }; struct SImmutables { diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h new file mode 100644 index 000000000..4526a1225 --- /dev/null +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -0,0 +1,95 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_WINDOW_PRESENTER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ + + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include "renderer/CRenderer.h" +#include "renderer/present/IPresenter.h" + +#include "renderer/shaders/present/push_constants.hlsl" + + +namespace nbl::this_example +{ + +class CWindowPresenter : public IPresenter +{ + public: + using swapchain_resources_t = video::CDefaultSwapchainFramebuffers; + static const video::IGPURenderpass::SCreationParams::SSubpassDependency dependencies[3]; + + struct SCachedCreationParams + { + core::smart_refctd_ptr winMgr = nullptr; + system::logger_opt_smart_ptr logger = nullptr; + // for the UI, 1080p with 50% scaling + hlsl::uint16_t2 minResolution = {1264,698}; + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const {return winMgr && api && callback;} + + core::smart_refctd_ptr api = {}; + core::smart_refctd_ptr callback = {}; + std::string_view initialWindowCaption = ""; + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); + + // + inline const video::ISurface* getSurface() const {return m_construction.surface->getSurface();} + + // + bool init(CRenderer* renderer); + + // + inline const SCachedCreationParams& getCreationParams() const {return m_creation;} + + // + inline ui::ICursorControl* getCursorControl() const {return m_construction.cursorControl;} + + // + inline const video::IGPURenderpass* getRenderpass() const {return getSwapchainResources()->getRenderpass();} + + // + bool irrecoverable() const {return m_construction.surface->irrecoverable();} + + // returns expected presentation time for frame pacing + clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, CSession* session) override; + // + bool beginRenderpass(video::IGPUCommandBuffer* cb) override; + // + bool endRenderpassAndPresent(video::IGPUCommandBuffer* cb, video::ISemaphore* presentBeginSignal) override; + + protected: + using surface_t = video::CSimpleResizeSurface; + struct SCachedConstructionParams + { + core::smart_refctd_ptr surface; + ui::IWindow* window; + ui::ICursorControl* cursorControl; + hlsl::float64_t2 aspectRatioRange; + hlsl::uint16_t2 maxResolution; + }; + struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + { + }; + inline CWindowPresenter(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), m_pushConstants({}) {} + + inline video::ISurface* getSurface() {return m_construction.surface->getSurface();} + + inline swapchain_resources_t* getSwapchainResources() {return static_cast(m_construction.surface->getSwapchainResources());} + inline const swapchain_resources_t* getSwapchainResources() const {return static_cast(m_construction.surface->getSwapchainResources());} + + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; + core::smart_refctd_ptr m_present; + video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + DefaultResolvePushConstants m_pushConstants; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h new file mode 100644 index 000000000..dcf3c8872 --- /dev/null +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -0,0 +1,32 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_I_PRESENTER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_I_PRESENTER_H_INCLUDED_ + + +#include "renderer/CScene.h" +#include "renderer/CSession.h" + +#include "renderer/shaders/pathtrace/push_constants.hlsl" + + +namespace nbl::this_example +{ + +class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + // + virtual bool irrecoverable() const {return false;} + // returns expected presentation time for frame pacing + using clock_t = std::chrono::steady_clock; + virtual clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, CSession* background) = 0; + // + virtual bool beginRenderpass(video::IGPUCommandBuffer* cb) = 0; + // + virtual bool endRenderpassAndPresent(video::IGPUCommandBuffer* cb, video::ISemaphore* presentBeginSignal) = 0; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h new file mode 100644 index 000000000..acc325963 --- /dev/null +++ b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h @@ -0,0 +1,103 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ + + +#include "renderer/CRenderer.h" +#include "renderer/resolve/IResolver.h" +#include "renderer/shaders/resolve/rwmc.hlsl" + + +namespace nbl::this_example +{ + +class CBasicRWMCResolver : public IResolver +{ + public: + enum class AutoExposure : uint8_t + { + GeometricAverage, + Median, + Count + }; + enum class Tonemapping : uint8_t + { + Reinhard, + ACES, + Count + }; + + // + struct SCachedCreationParams + { + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const {return renderer;} + + CRenderer* renderer; + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); + + // + inline const SCachedCreationParams& getCreationParams() const { return m_creation; } + + struct SCachedConstructionParams + { + // TODO: autoexposure + core::smart_refctd_ptr lumaMeasure; + // TODO: motion vector stuff + // rwmc resolve, apply exposure, interleave into OptiX input formats + core::smart_refctd_ptr rwmcResolve; + // TODO: OIDN denoise + // deinterlave from OptiX output format, perform first axis of FFT + core::smart_refctd_ptr postDenoise; // TODO + // second axis FFT, spectrum multiply and iFFT + core::smart_refctd_ptr secondAxisBloom; // TODO + // first axis iFFT, tonemap, encode into final EXR format + core::smart_refctd_ptr secondAxisFFTTonemap; // TODO + // + core::smart_refctd_ptr persistentExposureArgs; + // + core::smart_refctd_ptr bloomKernelSpectrum; + }; + // + inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + + // + inline uint64_t computeScratchSize(const CSession* session) const override + { + if (!session) + return 0ull; + switch (session->getConstructionParams().mode) + { + case CSession::RenderMode::Previs: [[fallthrough]]; + case CSession::RenderMode::Debug: [[fallthrough]]; + return 0ull; + case CSession::RenderMode::Beauty: + return 0ull; // for now, as long as we blit + default: + break; + } + assert(false); // unimplemented + return ~0ull; + } + // + bool resolve(video::IGPUCommandBuffer* cb, video::IGPUBuffer* scratch) override; + + protected: + struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + { + }; + inline CBasicRWMCResolver(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} + + bool changeSession_impl() override; + + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/resolve/IResolver.h b/40_PathTracer/include/renderer/resolve/IResolver.h new file mode 100644 index 000000000..34b447925 --- /dev/null +++ b/40_PathTracer/include/renderer/resolve/IResolver.h @@ -0,0 +1,41 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_I_RESOLVER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_I_RESOLVER_H_INCLUDED_ + + +#include "renderer/CSession.h" + + +namespace nbl::this_example +{ + +class IResolver : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + // + inline bool changeSession(core::smart_refctd_ptr&& session) + { + m_activeSession = std::move(session); + if (!session || !session->isInitialized() || !changeSession_impl()) + { + m_activeSession = {}; + return false; + } + return true; + } + // + virtual uint64_t computeScratchSize(const CSession* session) const = 0; + inline uint64_t computeScratchSize() const {return computeScratchSize(m_activeSession.get());} + // + virtual bool resolve(video::IGPUCommandBuffer* cv, video::IGPUBuffer* scratch) = 0; + + protected: + virtual bool changeSession_impl() = 0; + + core::smart_refctd_ptr m_activeSession; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl index 65b7c4e5f..9da535329 100644 --- a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -10,6 +10,7 @@ namespace nbl { namespace this_example { +using namespace nbl::hlsl; struct DefaultResolvePushConstants { @@ -17,22 +18,43 @@ struct DefaultResolvePushConstants struct Regular { - float32_t2 cropOffset; + // float32_t2 scale; + // post-scale addition to uv coordinate to get to beginning + float32_t2 crop; + // Because `scale*uv+cropOffsets!=1.0` where the image is supposed to end + float32_t2 limit; }; struct Cubemap { - // TODO + // theoretically we only need inverse of product of 3x3 view with very sparse 4x4 + float32_t4x4 invProjView; }; +#ifndef __HLSL_VERSION union { Regular regular; Cubemap cubemap; }; +#else + // note how this is a conversion to a copy, and not handing out of a reference + // Ergo, its not a true "union" + inline Regular regular() + { + Regular retval; + retval.scale = __union.invProjView[0].xy; + retval.crop = __union.invProjView[0].zw; + retval.limit = __union.invProjView[1].xy; + return retval; + } + inline Cubemap cubemap() {return __union;} + + Cubemap __union; +#endif // 3 extra bits for cube layer uint32_t isCubemap : 1; - uint32_t layer : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,3); - uint32_t imageIndex : BOOST_PP_SUB(28,MAX_CASCADE_COUNT_LOG2); + uint32_t layer : MAX_CASCADE_COUNT_LOG2; + uint32_t imageIndex : BOOST_PP_SUB(31,MAX_CASCADE_COUNT_LOG2); }; } diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 4a03b9371..8b19b9dfc 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -1,323 +1,220 @@ // Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - - #include "nbl/examples/common/BuiltinResourcesApplication.hpp" #include "nbl/examples/examples.hpp" #include "renderer/CRenderer.h" +#include "renderer/resolve/CBasicRWMCResolver.h" +#include "renderer/present/CWindowPresenter.h" #include "nlohmann/json.hpp" -// TODO remove -#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "common.hpp" -#include "nbl/builtin/hlsl/indirect_commands.hlsl" - - using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; using namespace nbl::application_templates; using namespace nbl::examples; using namespace nbl::this_example; -class PathTracingApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +// TODO: move to argument parsing class +struct AppArguments { - using device_base_t = SimpleWindowedApplication; - using asset_base_t = BuiltinResourcesApplication; - - // TODO: move to Nabla proper - static inline void jsonizeGitInfo(nlohmann::json& target, const gtml::GitInfo& info) - { - target["isPopulated"] = info.isPopulated; - if (info.hasUncommittedChanges.has_value()) - target["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); - else - target["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; - - target["commitAuthorName"] = info.commitAuthorName; - target["commitAuthorEmail"] = info.commitAuthorEmail; - target["commitHash"] = info.commitHash; - target["commitShortHash"] = info.commitShortHash; - target["commitDate"] = info.commitDate; - target["commitSubject"] = info.commitSubject; - target["commitBody"] = info.commitBody; - target["describe"] = info.describe; - target["branchName"] = info.branchName; - target["latestTag"] = info.latestTag; - target["latestTagName"] = info.latestTagName; - } - - inline void printGitInfos() const - { - nlohmann::json j; - - auto& modules = j["modules"]; - jsonizeGitInfo(modules["nabla"],gtml::nabla_git_info); - jsonizeGitInfo(modules["dxc"],gtml::dxc_git_info); - - m_logger->log("Build Info:\n%s",ILogger::ELL_INFO,j.dump(4).c_str()); - } - -// TODO: remove - constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; // TODO: remove - constexpr static inline uint32_t MaxFramesInFlight = 3u; - constexpr static inline uint8_t MaxUITextureCount = 1u; // TODO: remove - constexpr static inline uint32_t NumberOfProceduralGeometries = 5; // TODO: remove - - static constexpr const char* s_lightTypeNames[E_LIGHT_TYPE::ELT_COUNT] = { // TODO: remove - "Directional", - "Point", - "Spot" - }; - - struct ShaderBindingTable // TODO: remove - { - SBufferRange raygenGroupRange; - SBufferRange hitGroupsRange; - uint32_t hitGroupsStride; - SBufferRange missGroupsRange; - uint32_t missGroupsStride; - SBufferRange callableGroupsRange; - uint32_t callableGroupsStride; - }; - - -public: - inline PathTracingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override - { - auto retval = device_base_t::getRequiredDeviceFeatures(); - return retval.unionWith(CRenderer::RequiredDeviceFeatures()); - } + bool headless = false; +}; - inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override - { - auto retval = device_base_t::getPreferredDeviceFeatures(); - return retval.unionWith(CRenderer::PreferredDeviceFeatures()); - } - inline SPhysicalDeviceLimits getRequiredDeviceLimits() const override - { - auto retval = device_base_t::getRequiredDeviceLimits(); - // TODO: need union/superset - retval.shaderStorageImageReadWithoutFormat = true; - return retval; - } +class PathTracingApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; - inline core::vector getSurfaces() const override - { - if (!m_surface) + // TODO: move to Nabla proper + static inline void jsonizeGitInfo(nlohmann::json& target, const nbl::gtml::GitInfo& info) { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WIN_W; - params.height = WIN_H; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "RaytracingPipelineApp"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } - - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + target["isPopulated"] = info.isPopulated; + if (info.hasUncommittedChanges.has_value()) + target["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); + else + target["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; + + target["commitAuthorName"] = info.commitAuthorName; + target["commitAuthorEmail"] = info.commitAuthorEmail; + target["commitHash"] = info.commitHash; + target["commitShortHash"] = info.commitShortHash; + target["commitDate"] = info.commitDate; + target["commitSubject"] = info.commitSubject; + target["commitBody"] = info.commitBody; + target["describe"] = info.describe; + target["branchName"] = info.branchName; + target["latestTag"] = info.latestTag; + target["latestTagName"] = info.latestTagName; } - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; - - return {}; - } - - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; + inline void printGitInfos() const + { + nlohmann::json j; - printGitInfos(); - - // TODO: move new members - smart_refctd_ptr m_sceneLoader; - smart_refctd_ptr m_renderer; + auto& modules = j["modules"]; + jsonizeGitInfo(modules["nabla"],nbl::gtml::nabla_git_info); + jsonizeGitInfo(modules["dxc"],nbl::gtml::dxc_git_info); - // set up the scene loader - m_sceneLoader = CSceneLoader::create({{ - .assMan = smart_refctd_ptr(m_assetMgr), - .logger = smart_refctd_ptr(m_logger) - }}); + m_logger->log("Build Info:\n%s",ILogger::ELL_INFO,j.dump(4).c_str()); + } - // - m_renderer = CRenderer::create({ - { - .graphicsQueue = getGraphicsQueue(), - .computeQueue = getComputeQueue(), - .uploadQueue = getTransferUpQueue(), - .utilities = smart_refctd_ptr(m_utils) - }, - "TODO Sample sequence cache", - m_assetMgr.get() - }); - - // TODO: tmp code - auto scene_daily_pt = m_renderer->createScene({ - .load = m_sceneLoader->load({ - .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", - .workingDirectory = localOutputCWD - }), - .converter = nullptr - }); - // the UI would have you load the zip first, then present a dropdown of what to load - // but still need to support archive mount for cmdline load -#if 0 // this particular zip goes down an unsupported path in our zip loader - auto scene_bedroom = m_sceneLoader->load({ - .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", - .workingDirectory = localOutputCWD - }); -#endif - auto session = scene_daily_pt->createSession(scene_daily_pt->getSensors().front()); + public: + inline PathTracingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - // temporary test + inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override { - auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - session->init(cb); - cb->end(); - - // TODO: stuff + auto retval = device_base_t::getRequiredDeviceFeatures(); + return retval.unionWith(CRenderer::RequiredDeviceFeatures()); } - session->deinit(); - scene_daily_pt = nullptr; - + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + return retval.unionWith(CRenderer::PreferredDeviceFeatures()); + } + inline SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + auto retval = device_base_t::getRequiredDeviceLimits(); + // TODO: need union/superset + retval.shaderStorageImageReadWithoutFormat = true; + return retval; + } - // Create renderpass and init surface - nbl::video::IGPURenderpass* renderpass; + inline nbl::core::vector getSurfaces() const override { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); + if (m_args.headless) + return {}; - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + if (!m_presenter) { - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - renderpass = scResources->getRenderpass(); + const_cast&>(m_presenter) = CWindowPresenter::create({ + { + .winMgr = m_winMgr, + .logger = smart_refctd_ptr(m_logger) + }, + m_api, + make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)), + "Path Tracer" + }); + } - if (!renderpass) - return logFail("Failed to create Renderpass!"); + if (m_presenter) + { + const auto* presenter = m_presenter.get(); + return { {presenter->getSurface()/*,EQF_NONE*/} }; + } - if (!m_surface || !m_surface->init(getGraphicsQueue(), std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); + return {}; } + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // TODO: parse the arguments + m_args = {}; + if (!m_args.headless) + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + printGitInfos(); -#if 0 // presenter - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 - }; - auto defaultSampler = m_device->createSampler(samplerParams); + // + if (!m_args.headless && !m_presenter) + return logFail("Failed to create CWindowPresenter"); - { - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - } - }; - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), - .entryPoint = "main", - }; - - auto presentLayout = m_device->createPipelineLayout( + // + m_renderer = CRenderer::create({ + { + .graphicsQueue = getGraphicsQueue(), + .computeQueue = getComputeQueue(), + .uploadQueue = getTransferUpQueue(), + .utilities = smart_refctd_ptr(m_utils) + }, + "TODO Sample sequence cache", + m_assetMgr.get() + }); + if (!m_renderer) + return logFail("Failed to create CRenderer"); + + // + if (!m_args.headless && !m_presenter->init(m_renderer.get())) + return logFail("Failed to initialize CWindowPresenter"); + + // + m_resolver = CBasicRWMCResolver::create({ {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); - } - - // write descriptors - IGPUDescriptorSet::SDescriptorInfo infos[3]; - infos[0].desc = m_gpuTlas; - - infos[1].desc = m_hdrImageView; - if (!infos[1].desc) - return logFail("Failed to create image view"); - infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + m_renderer.get() + }); + if (!m_resolver) + return logFail("Failed to create CBasicRWMCResolver"); - infos[2].desc = m_hdrImageView; - infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + // set up the scene loader + m_sceneLoader = CSceneLoader::create({ + { + .assMan = smart_refctd_ptr(m_assetMgr), + .logger = smart_refctd_ptr(m_logger) + } + }); + + // TODO: tmp code + auto scene_daily_pt = m_renderer->createScene({ + .load = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", + .workingDirectory = localOutputCWD + }), + .converter = nullptr + }); + // the UI would have you load the zip first, then present a dropdown of what to load + // but still need to support archive mount for cmdline load + #if 0 // this particular zip goes down an unsupported path in our zip loader + auto scene_bedroom = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", + .workingDirectory = localOutputCWD + }); + #endif + + auto session = scene_daily_pt->createSession({ + {.mode=CSession::RenderMode::Debug}, + scene_daily_pt->getSensors().data() + }); + + // temporary test + m_presenter->acquire({},session.get()); + { + auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + session->init(cb); +// renderer->render(cb); + m_resolver->resolve(cb,nullptr); +// m_presenter->acquire({},session); +// m_presenter->beginRenderpass(cb); +// m_presenter->endRenderpassAndPresent(cb); + + // TODO: submit + } + session->deinit(); + scene_daily_pt = nullptr; - IGPUDescriptorSet::SWriteDescriptorSet writes[] = { - {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, - {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, - {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, - }; - m_device->updateDescriptorSets(std::span(writes), {}); + return true; +#if 0 // ui // gui descriptor setup { using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; @@ -440,13 +337,9 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } ); #endif - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - - return true; - } + } +#if 0 // gui bool updateGUIDescriptorSet() { // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout @@ -467,9 +360,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes return m_device->updateDescriptorSets(writes, {}); } +#endif - inline void workLoopBody() override - { + inline void workLoopBody() override + { #if 0 // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); @@ -502,7 +396,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->beginDebugMarker("RaytracingPipelineApp Frame"); const auto viewMatrix = m_camera.getViewMatrix(); const auto projectionMatrix = m_camera.getProjectionMatrix(); @@ -603,59 +496,16 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); } - { - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - } - cmdbuf->setViewport(0u, 1u, &viewport); +// ... + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + m_ui.manager->render(cmdbuf, waitInfo); +// ... - VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; - cmdbuf->setScissor(defaultScisors); - - auto scRes = static_cast(m_surface->getSwapchainResources()); - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - const IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearColor, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; - - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDs.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); - - const auto uiParams = m_ui.manager->getCreationParameters(); - auto* uiPipeline = m_ui.manager->getPipeline(); - cmdbuf->bindGraphicsPipeline(uiPipeline); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); - m_ui.manager->render(cmdbuf, waitInfo); - - cmdbuf->endRenderPass(); - - } - - cmdbuf->endDebugMarker(); - cmdbuf->end(); - - { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { { .semaphore = m_semaphore.get(), @@ -665,60 +515,34 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }; { { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cmdbuf } - }; - - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; updateGUIDescriptorSet(); - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; } } - - m_window->setCaption("[Nabla Engine] Ray Tracing Pipeline"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); + = } m_api->endCapture(); m_frameAccumulationCounter++; #endif - } - - inline void update() - { - static std::chrono::microseconds previousEventTimestamp{}; + } - m_inputSystem->getDefaultMouse(&m_mouse); - m_inputSystem->getDefaultKeyboard(&m_keyboard); + inline void handleInputs() + { + if (m_args.headless) + return; - m_currentImageAcquire = m_surface->acquireNextImage(); + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; - { - const auto& io = ImGui::GetIO(); +// const auto& io = ImGui::GetIO(); + static std::chrono::microseconds previousEventTimestamp{}; m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { for (const auto& e : events) // here capture @@ -730,8 +554,8 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes capturedEvents.mouse.emplace_back(e); } - }, m_logger.get()); - + }, m_logger.get() + ); m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { for (const auto& e : events) // here capture @@ -742,52 +566,55 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes previousEventTimestamp = e.timeStamp; capturedEvents.keyboard.emplace_back(e); } - }, m_logger.get()); + }, m_logger.get() + ); +#if 0 // ui + const SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + const nbl::ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + m_ui.manager->update(params); +#endif } - const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); - const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); - - const ext::imgui::UI::SUpdateParameters params = + inline bool keepRunning() override { - .mousePosition = mousePosition, - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = mouseEvents, - .keyboardEvents = keyboardEvents - }; - - m_ui.manager->update(params); - } - - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; + if (m_args.headless) + return true; // TODO: till renders are complete + else + return !m_presenter->irrecoverable(); + } - return true; - } + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } + private: + AppArguments m_args = {}; + // + smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + // + smart_refctd_ptr m_presenter; + // + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_resolver; + // + smart_refctd_ptr m_sceneLoader; -private: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; uint64_t m_realFrameIx = 0; uint32_t m_frameAccumulationCounter = 0; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader m_mouse; - InputSystem::ChannelReader m_keyboard; - - video::CDumbPresentationOracle m_oracle; - +#if 0 // gui struct C_UI { nbl::core::smart_refctd_ptr manager; @@ -800,17 +627,8 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes core::smart_refctd_ptr descriptorSet; } m_ui; core::smart_refctd_ptr m_guiDescriptorSetPool; - +#endif uint64_t m_rayTracingStackSize; - ShaderBindingTable m_shaderBindingTable; - - smart_refctd_ptr m_presentDs; - smart_refctd_ptr m_presentDsPool; - smart_refctd_ptr m_presentPipeline; - - - core::matrix4SIMD m_cachedModelViewProjectionMatrix; - bool m_useIndirectCommand = false; }; NBL_MAIN_FUNC(PathTracingApp) \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CScene.cpp b/40_PathTracer/src/renderer/CScene.cpp index 4a3bf9edf..631d5b9b4 100644 --- a/40_PathTracer/src/renderer/CScene.cpp +++ b/40_PathTracer/src/renderer/CScene.cpp @@ -14,24 +14,26 @@ using namespace nbl::hlsl; using namespace nbl::video; // -smart_refctd_ptr CScene::createSession(const sensor_t& sensor) +smart_refctd_ptr CScene::createSession(const CSession::SCreationParams& _params) { - const auto& constants = sensor.constants; - const auto& dynDefaults = sensor.dynamicDefaults; - const auto& mutDefaults = sensor.mutableDefaults; + if (!_params) + return nullptr; + + const auto& constants = _params.sensor->constants; + const auto& dynDefaults = _params.sensor->dynamicDefaults; + const auto& mutDefaults = _params.sensor->mutableDefaults; const auto& raygen = mutDefaults.raygen; - CSession::SConstructionParams params = { - .scene = smart_refctd_ptr(this), - .cropOffsets = {mutDefaults.cropOffsetX,mutDefaults.cropOffsetY}, - .cropResolution = {mutDefaults.cropWidth,mutDefaults.cropHeight}, - .type = raygen.getType() - }; + CSession::SConstructionParams params = {std::move(_params)}; + params.scene = smart_refctd_ptr(this); + params.cropOffsets = {mutDefaults.cropOffsetX,mutDefaults.cropOffsetY}; + params.cropResolution = {mutDefaults.cropWidth,mutDefaults.cropHeight}; + params.type = raygen.getType(); const uint16_t2 renderSize(constants.width,constants.height); assert(all(params.cropOffsets CWindowPresenter::create(SCreationParams&& _params) +{ + if (!_params) + { + _params.logger.log("`CWindowPresenter::SCreationParams` are invalidl!",ILogger::ELL_ERROR); + return nullptr; + } + CWindowPresenter::SConstructorParams params = {std::move(_params)}; + + { + const auto& primDpyInfo = params.winMgr->getPrimaryDisplayInfo(); + // subtract window border/decoration elements + params.maxResolution = hlsl::max(int32_t2(primDpyInfo.resX,primDpyInfo.resY)-int32_t2(32,16),int32_t2(0,0)); + // we add an additional constraint that any dimension of maxResolution cannot be less than any dimension of minResolution + // e.g. max resolution Height cannot be less than min resolution width + if (hlsl::any(hlsl::less()(params.maxResolution.xxyy,params.minResolution.xyxy))) + { + params.logger.log( + "`CWindowPresenter::create` desktop resolution must allow for at least a %d x %d window!", + ILogger::ELL_ERROR,params.minResolution.x,params.minResolution.y + ); + return nullptr; + } + params.aspectRatioRange[0] = float64_t(params.minResolution.x)/float64_t(params.maxResolution.y); + params.aspectRatioRange[1] = float64_t(params.maxResolution.x)/float64_t(params.minResolution.y); + } + + // create the window + smart_refctd_ptr window; + { + IWindow::SCreationParams winParams = {}; + winParams.width = 64; + winParams.height = 64; + winParams.x = 32; + winParams.y = 32; + winParams.flags = IWindow::ECF_HIDDEN|IWindow::ECF_BORDERLESS|IWindow::ECF_RESIZABLE; + winParams.windowCaption = _params.initialWindowCaption; + winParams.callback = std::move(_params.callback); + window = params.winMgr->createWindow(std::move(winParams)); + } + if (!window) + { + params.logger.log("`CWindowPresenter::create` failed to create a window!",ILogger::ELL_ERROR); + return nullptr; + } + params.window = window.get(); + params.cursorControl = window->getCursorControl(); + + // create surface + { + auto surface = CSurfaceVulkanWin32::create(std::move(_params.api),move_and_static_cast(window)); + params.surface = surface_t::create(std::move(surface)); + } + if (!params.surface) + { + params.logger.log("`CWindowPresenter::create` failed to create a surface!",ILogger::ELL_ERROR); + return nullptr; + } + + return smart_refctd_ptr(new CWindowPresenter(std::move(params)),dont_grab); +} + +bool CWindowPresenter::init(CRenderer* renderer) +{ + auto& logger = m_creation.logger; + auto* device = renderer->getDevice(); + + // create swapchain and its resources (renderpass, etc.) + { + ISurface* const tmp = getSurface(); + ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr(tmp)}; + if (!swapchainParams.deduceFormat(device->getPhysicalDevice())) + { + logger.log("Could not choose a Surface Format for the Swapchain!",ILogger::ELL_ERROR); + return false; + } + + auto scResources = std::make_unique(device,swapchainParams.surfaceFormat.format,dependencies,IGPURenderpass::LOAD_OP::DONT_CARE); + if (!scResources || !scResources->getRenderpass()) + { + logger.log("Failed to create Renderpass!",ILogger::ELL_ERROR); + return false; + } + + if (!m_construction.surface->init(renderer->getCreationParams().graphicsQueue,std::move(scResources),swapchainParams.sharedParams)) + { + logger.log("Could not create Window & Surface or initialize the Surface!",ILogger::ELL_ERROR); + return false; + } + } + + // present pipeline layout + { + } + + // present pipelines + { + } + + return true; +} + +auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, CSession* session) -> clock_t::time_point +{ + auto expectedPresent = clock_t::time_point::min(); // invalid value + if (!session) + return expectedPresent; + const auto& sessionParams = session->getConstructionParams(); + m_pushConstants.isCubemap = sessionParams.type==CSession::sensor_type_e::Env; + + const auto maxResolution = m_construction.maxResolution; + uint16_t2 targetResolution = m_pushConstants.isCubemap ? sessionParams.cropResolution:maxResolution; + const auto aspectRatio = double(targetResolution.x)/double(targetResolution.y); + if (m_pushConstants.isCubemap) + { + // TODO: build default perspective projection matrix given aspect ratio and smaller axis (or diagonal) FOV of the viewer +// m_pushConstants.cubemap.invProjView = ; + } + else + { + m_pushConstants.regular.crop = float32_t2(sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; + // this we shall adjust to take care of aspect ratio mismatch + m_pushConstants.regular.scale = float32_t2(sessionParams.cropResolution)*sessionParams.uniforms.rcpPixelSize; + m_pushConstants.regular.limit = m_pushConstants.regular.scale+m_pushConstants.regular.crop; + // prevent extreme window size + const auto clampedAspectRatio = hlsl::clamp(aspectRatio,m_construction.aspectRatioRange[0],m_construction.aspectRatioRange[1]); + const float64_t asConv = core::min(1.0/clampedAspectRatio,clampedAspectRatio); + const uint8_t largeDim = targetResolution.x1.0 makes us wider (adds width), <1.0 makes us narrower (adds height) + if (aspectChange>1.0) + m_pushConstants.regular.scale[0] *= aspectChange; + else + m_pushConstants.regular.scale[1] /= aspectChange; + } + // `CWindowPresenter::create` aspect ratio ranges and min/max relationships help us stay valid + assert(all(minResolution<=targetResolution)&&all(targetResolution<=maxResolution)); + } + + // handle session resolution change + auto& winMgr = m_creation.winMgr; + auto* const window = m_construction.window; + if (const uint16_t2 currentResolution={window->getWidth(),window->getHeight()}; currentResolution!=targetResolution) + { + if (!winMgr->setWindowSize(window,targetResolution.x,targetResolution.y)) + return expectedPresent; + m_construction.surface->recreateSwapchain(); + } + if (window->isHidden()) + winMgr->show(window); + + + m_pushConstants.layer = 0; // TODO: cubemaps and RWMC debug + m_pushConstants.imageIndex = 0; + + if (!(m_currentImageAcquire=m_construction.surface->acquireNextImage())) + { + return expectedPresent; + } + + // TODO: Do this properly with present timing extension and a better oracle + expectedPresent = clock_t::now() + std::chrono::microseconds(16666); + + return expectedPresent; +} + +bool CWindowPresenter::beginRenderpass(IGPUCommandBuffer* cb) +{ + auto* const scRes = getSwapchainResources(); + auto* const framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex); + const uint16_t2 resolution = { framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height}; + + bool success = cb->beginDebugMarker("Present"); + const SViewport viewport[] = {{ + .x = 0u, .y = 0u, + .width = static_cast(resolution.x), + .height = static_cast(resolution.y), + .minDepth = 1.f, .maxDepth = 0.f + }}; + success = success && cb->setViewport(viewport,0); + { + const VkRect2D defaultScisors[] = {{ + .offset = {static_cast(viewport->x), static_cast(viewport->y)}, + .extent = {resolution.x,resolution.y} + }}; + cb->setScissor(defaultScisors); + const VkRect2D currentRenderArea = {.offset = {0,0}, .extent = defaultScisors->extent}; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = nullptr, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + success = success && cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + + success = success && cb->bindGraphicsPipeline(m_present.get()); + + const auto* layout = m_present->getLayout(); +// success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&m_presentDs.get()); + success = success && cb->pushConstants(layout,ShaderStage::ESS_FRAGMENT,0,sizeof(m_pushConstants),&m_pushConstants); + ext::FullScreenTriangle::recordDrawCall(cb); + + success = success && cb->endDebugMarker(); + return success; +} +bool CWindowPresenter::endRenderpassAndPresent(IGPUCommandBuffer* cb, ISemaphore* presentBeginSignal) +{ + bool success = cb->endRenderPass(); + success = success && cb->end(); +/* + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb} + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; +*/ + +// m_construction.surface->present(m_currentImageAcquire.imageIndex,rendered); + return false; +} + +} + +#if 0 + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + { + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + } + }; + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main", + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } + + // write descriptors + IGPUDescriptorSet::SDescriptorInfo infos[3]; + infos[0].desc = m_gpuTlas; + + infos[1].desc = m_hdrImageView; + if (!infos[1].desc) + return logFail("Failed to create image view"); + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + + infos[2].desc = m_hdrImageView; + infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, + {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, + {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, + }; + m_device->updateDescriptorSets(std::span(writes), {}); +#endif \ No newline at end of file diff --git a/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp new file mode 100644 index 000000000..0608becc2 --- /dev/null +++ b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/resolve/CBasicRWMCResolver.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::ui; +using namespace nbl::video; + +// +smart_refctd_ptr CBasicRWMCResolver::create(SCreationParams&& _params) +{ + if (!_params) + { + _params.renderer->getLogger().log("`CBasicRWMCResolver::SCreationParams` are invalidl!", ILogger::ELL_ERROR); + return nullptr; + } + CBasicRWMCResolver::SConstructorParams params = {std::move(_params)}; + + // TODO: all the pipelines! + + return smart_refctd_ptr(new CBasicRWMCResolver(std::move(params)),dont_grab); +} + +bool CBasicRWMCResolver::changeSession_impl() +{ + return true; +} + +bool CBasicRWMCResolver::resolve(video::IGPUCommandBuffer* cb, video::IGPUBuffer* scratch) +{ + if (!cb) + return false; + + switch (m_activeSession->getConstructionParams().mode) + { + case CSession::RenderMode::Previs: [[fallthrough]]; + case CSession::RenderMode::Debug: + return true; // do nothing + case CSession::RenderMode::Beauty: + break; + default: + return false; + } + + constexpr auto raytracingStages = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto firstResolveStage = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + // TODO: pipeline barrier from raytracing pipeline to first resolve pass + + auto* const cascades = m_activeSession->getRWMCCascades(); + auto* const beauty = m_activeSession->getBeauty(E_FORMAT::EF_R32_UINT); + return false; // TODO: uimplemented yet +} + +} \ No newline at end of file From 4d8570562e47cab73ad3dc2360e6976d079154f0 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 23 Jan 2026 16:42:32 +0100 Subject: [PATCH 185/219] add the image views as sample-able descriptors to the Session DS, also transition the session images --- 40_PathTracer/include/common.hpp | 3 - 40_PathTracer/include/renderer/CSession.h | 90 +++++--------- .../renderer/present/CWindowPresenter.h | 2 +- .../include/renderer/present/IPresenter.h | 2 +- .../renderer/resolve/CBasicRWMCResolver.h | 1 + .../include/renderer/resolve/IResolver.h | 12 +- .../include/renderer/shaders/session.hlsl | 30 ++++- 40_PathTracer/main.cpp | 79 +++++++----- 40_PathTracer/src/renderer/CRenderer.cpp | 33 ++++- 40_PathTracer/src/renderer/CSession.cpp | 115 ++++++++++++------ .../src/renderer/present/CWindowPresenter.cpp | 8 +- .../renderer/resolve/CBasicRWMCResolver.cpp | 30 +++-- 12 files changed, 249 insertions(+), 156 deletions(-) diff --git a/40_PathTracer/include/common.hpp b/40_PathTracer/include/common.hpp index b33e7abfa..f7a31513d 100644 --- a/40_PathTracer/include/common.hpp +++ b/40_PathTracer/include/common.hpp @@ -9,12 +9,9 @@ using namespace nbl::asset; using namespace nbl::ui; using namespace nbl::video; -#include "nbl/ui/ICursorControl.h" #include "nbl/ext/ImGui/ImGui.h" #include "imgui/imgui_internal.h" -#include "app_resources/common.hlsl" - namespace nbl::scene { diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 6bf86b98b..5e724b61e 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -45,40 +45,40 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm inline bool isInitialized() const {return bool(m_active.immutables);} // - inline video::IGPUImageView* getBeauty(const asset::E_FORMAT format) const + struct SImmutables { - return m_active.immutables.beauty.getView(format); - } - - // - inline video::IGPUImageView* getRWMCCascades() const - { - return m_active.immutables.rwmcCascades.getView(asset::E_FORMAT::EF_R32G32_UINT); - } + struct SImageWithViews + { + inline operator bool() const + { + return image && !views.empty() && views.begin()->second; + } - // - inline video::IGPUImageView* getAlbedo() const - { - return m_active.immutables.albedo.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); - } + inline video::IGPUImageView* getView(const asset::E_FORMAT format) const + { + if (const auto found=views.find(format); found!=views.end()) + return found->second.get(); + return nullptr; + } - // - inline video::IGPUImageView* getNormal() const - { - return m_active.immutables.normal.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); - } + core::smart_refctd_ptr image = {}; + core::unordered_map> views = {}; + }; - // - inline video::IGPUImageView* getMotion() const - { - return m_active.immutables.motion.getView(asset::E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); - } + inline operator bool() const + { + return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; + } - // - inline video::IGPUImageView* getMask() const - { - return m_active.immutables.mask.getView(asset::E_FORMAT::EF_R16_UNORM); - } + // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? + // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. + // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. + // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? + SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; + // stores all the sensor data required + core::smart_refctd_ptr ds = {}; + }; + inline const SImmutables& getImmutables() const {return m_active.immutables;} // bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); @@ -108,38 +108,6 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm // heavy VRAM data and data only needed during an active session struct SActiveResources { - struct SImageWithViews - { - inline operator bool() const - { - return image && !views.empty() && views.begin()->second; - } - - inline video::IGPUImageView* getView(const asset::E_FORMAT format) const - { - if (const auto found=views.find(format); found!=views.end()) - return found->second.get(); - return nullptr; - } - - core::smart_refctd_ptr image = {}; - core::unordered_map> views = {}; - }; - struct SImmutables - { - inline operator bool() const - { - return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; - } - - // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? - // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. - // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. - // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? - SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; - // stores all the sensor data required - core::smart_refctd_ptr ds = {}; - }; SImmutables immutables = {}; SSensorDynamics prevSensorState = {}; } m_active = {}; diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h index 4526a1225..62141a597 100644 --- a/40_PathTracer/include/renderer/present/CWindowPresenter.h +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -58,7 +58,7 @@ class CWindowPresenter : public IPresenter bool irrecoverable() const {return m_construction.surface->irrecoverable();} // returns expected presentation time for frame pacing - clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, CSession* session) override; + clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, const CSession* session) override; // bool beginRenderpass(video::IGPUCommandBuffer* cb) override; // diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h index dcf3c8872..838e358fd 100644 --- a/40_PathTracer/include/renderer/present/IPresenter.h +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -21,7 +21,7 @@ class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovab virtual bool irrecoverable() const {return false;} // returns expected presentation time for frame pacing using clock_t = std::chrono::steady_clock; - virtual clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, CSession* background) = 0; + virtual clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, const CSession* background) = 0; // virtual bool beginRenderpass(video::IGPUCommandBuffer* cb) = 0; // diff --git a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h index acc325963..634bd44e8 100644 --- a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h +++ b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h @@ -46,6 +46,7 @@ class CBasicRWMCResolver : public IResolver struct SCachedConstructionParams { + core::smart_refctd_ptr layout; // TODO: autoexposure core::smart_refctd_ptr lumaMeasure; // TODO: motion vector stuff diff --git a/40_PathTracer/include/renderer/resolve/IResolver.h b/40_PathTracer/include/renderer/resolve/IResolver.h index 34b447925..d113fbd6e 100644 --- a/40_PathTracer/include/renderer/resolve/IResolver.h +++ b/40_PathTracer/include/renderer/resolve/IResolver.h @@ -14,6 +14,14 @@ namespace nbl::this_example class IResolver : public core::IReferenceCounted, public core::InterfaceUnmovable { public: + // + inline CSession* getActiveSession() {return m_activeSession.get();} + inline const CSession* getActiveSession() const {return m_activeSession.get();} + + // + virtual uint64_t computeScratchSize(const CSession* session) const = 0; + inline uint64_t computeScratchSize() const {return computeScratchSize(m_activeSession.get());} + // inline bool changeSession(core::smart_refctd_ptr&& session) { @@ -25,9 +33,7 @@ class IResolver : public core::IReferenceCounted, public core::InterfaceUnmovabl } return true; } - // - virtual uint64_t computeScratchSize(const CSession* session) const = 0; - inline uint64_t computeScratchSize() const {return computeScratchSize(m_activeSession.get());} + // virtual bool resolve(video::IGPUCommandBuffer* cv, video::IGPUBuffer* scratch) = 0; diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl index 45941405f..1b7f97ec1 100644 --- a/40_PathTracer/include/renderer/shaders/session.hlsl +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -36,8 +36,6 @@ struct SSensorUniforms }; #undef MAX_PATH_DEPTH_LOG2 - - struct SensorDSBindings { NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; @@ -57,6 +55,30 @@ struct SensorDSBindings NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; // R16_UNORM NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 9; + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t AsSampledImages = 10; + + enum class SampledImageIndex : uint16_t + { + ScrambleKey = ScrambleKey-ScrambleKey, + SampleCount = SampleCount-ScrambleKey, + RWMCCascades = RWMCCascades-ScrambleKey, + Beauty = Beauty-ScrambleKey, + Albedo = Albedo-ScrambleKey, + Normal = Normal-ScrambleKey, + Motion = Motion-ScrambleKey, + Mask = Mask-ScrambleKey, + Count + }; +}; + +struct SensorDSBindingCounts +{ + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 1; + NBL_CONSTEXPR_STATIC_INLINE uint32_t AsSampledImages = hlsl::_static_cast(SensorDSBindings::SampledImageIndex::Count); }; @@ -79,6 +101,10 @@ struct SensorDSBindings [[vk::binding(SensorDSBindings::Motion,SessionDSIndex)]] RWTexture2DArray gMotion; // could be float32_t [[vk::binding(SensorDSBindings::Mask,SessionDSIndex)]] RWTexture2DArray gMask; +// +[[vk::binding(SensorDSBindings::Samplers,SessionDSIndex)]] SamplerState gSensorSamplers[SensorDSBindingCounts::Samplers]; +// +[[vk::binding(SensorDSBindings::AsSampledImages,SessionDSIndex)]] Texture2DArray gSensorTextures[SensorDSBindingCounts::AsSampledImages]; #endif } } diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 8b19b9dfc..4954027ed 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -174,43 +174,58 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes }); // TODO: tmp code - auto scene_daily_pt = m_renderer->createScene({ - .load = m_sceneLoader->load({ - .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", - .workingDirectory = localOutputCWD - }), - .converter = nullptr - }); - // the UI would have you load the zip first, then present a dropdown of what to load - // but still need to support archive mount for cmdline load - #if 0 // this particular zip goes down an unsupported path in our zip loader - auto scene_bedroom = m_sceneLoader->load({ - .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", - .workingDirectory = localOutputCWD - }); - #endif + { + m_api->startCapture(); + auto scene_daily_pt = m_renderer->createScene({ + .load = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", + .workingDirectory = localOutputCWD + }), + .converter = nullptr + }); + // the UI would have you load the zip first, then present a dropdown of what to load + // but still need to support archive mount for cmdline load + #if 0 // this particular zip goes down an unsupported path in our zip loader + auto scene_bedroom = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", + .workingDirectory = localOutputCWD + }); + #endif - auto session = scene_daily_pt->createSession({ - {.mode=CSession::RenderMode::Debug}, - scene_daily_pt->getSensors().data() - }); + auto session = scene_daily_pt->createSession({ + {.mode=CSession::RenderMode::Debug}, + scene_daily_pt->getSensors().data() + }); + // init + m_utils->autoSubmit({ .queue = getGraphicsQueue() }, [&session](SIntendedSubmitInfo& info)->bool + { + return session->init(info.getCommandBufferForRecording()->cmdbuf); + } + ); + m_resolver->changeSession(std::move(session)); + m_api->endCapture(); + } // temporary test - m_presenter->acquire({},session.get()); { - auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - session->init(cb); -// renderer->render(cb); - m_resolver->resolve(cb,nullptr); -// m_presenter->acquire({},session); -// m_presenter->beginRenderpass(cb); -// m_presenter->endRenderpassAndPresent(cb); - - // TODO: submit + m_api->startCapture(); + + const auto* const session = m_resolver->getActiveSession(); + m_presenter->acquire({},session); + { + auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // renderer->render(cb); + m_resolver->resolve(cb,nullptr); + // m_presenter->acquire({},session); + // m_presenter->beginRenderpass(cb); + // m_presenter->endRenderpassAndPresent(cb); + + // TODO: submit + } + m_api->endCapture(); } - session->deinit(); - scene_daily_pt = nullptr; + m_resolver->getActiveSession()->deinit(); return true; diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 58f7d3387..1d9ff5653 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -76,6 +76,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) smart_refctd_ptr renderingLayouts[uint8_t(CSession::RenderMode::Count)]; { constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING | hlsl::ShaderStage::ESS_COMPUTE; + constexpr auto RenderingStages = RTStages | hlsl::ShaderStage::ESS_COMPUTE; // descriptor { using binding_create_flags_t = IDescriptorSetLayoutBase::SBindingBase::E_CREATE_FLAGS; @@ -83,7 +84,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) .binding = SensorDSBindings::UBO, .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, .createFlags = binding_create_flags_t::ECF_NONE, - .stageFlags = hlsl::ShaderStage::ESS_ALL_OR_LIBRARY, + .stageFlags = RenderingStages, .count = 1 }; // the generic single-UBO @@ -99,8 +100,8 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) return { .binding = binding, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = binding_create_flags_t::ECF_NONE, - .stageFlags = RTStages, + .createFlags = binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = RenderingStages, .count = 1 }; }; @@ -185,6 +186,15 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) } // the sensor layout { + constexpr auto ResolveAndPresentStages = hlsl::ShaderStage::ESS_COMPUTE | hlsl::ShaderStage::ESS_FRAGMENT; + const auto defaultSampler = device->createSampler({ + { + .AnisotropicFilter = 0 + }, + 0.f, + 0.f, + 0.f + }); std::initializer_list bindings = { UBOBinding, singleStorageImage(SensorDSBindings::ScrambleKey), @@ -194,7 +204,22 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) singleStorageImage(SensorDSBindings::Albedo), singleStorageImage(SensorDSBindings::Normal), singleStorageImage(SensorDSBindings::Motion), - singleStorageImage(SensorDSBindings::Mask) + singleStorageImage(SensorDSBindings::Mask), + { + .binding = SensorDSBindings::Samplers, + .type = IDescriptor::E_TYPE::ET_SAMPLER, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = ResolveAndPresentStages, + .count = SensorDSBindingCounts::Samplers, + .immutableSamplers = &defaultSampler + }, + { + .binding = SensorDSBindings::AsSampledImages, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = ResolveAndPresentStages, + .count = SensorDSBindingCounts::AsSampledImages + } }; params.sensorDSLayout = device->createDescriptorSetLayout(bindings); if (checkNullObject(params.sensorDSLayout,"Sensor Descriptor Layout")) diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 52f3465a9..4f850e6ea 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -72,10 +72,10 @@ bool CSession::init(video::IGPUCommandBuffer* cb) const auto allowedFormatUsages = device->getPhysicalDevice()->getImageFormatUsagesOptimalTiling(); auto createImage = [&]( const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, - const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT - ) -> SActiveResources::SImageWithViews + const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT|IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT + ) -> SImmutables::SImageWithViews { - SActiveResources::SImageWithViews retval = {}; + SImmutables::SImageWithViews retval = {}; { { IGPUImage::SCreationParams params = {}; @@ -137,28 +137,52 @@ bool CSession::init(video::IGPUCommandBuffer* cb) addWrite(binding,std::move(info)); }; immutables.scrambleKey = createImage("Scramble Key",E_FORMAT::EF_R32G32_UINT,promote(SSensorUniforms::ScrambleKeyTextureSize),1); - addImageWrite(SensorDSBindings::ScrambleKey,immutables.scrambleKey.views[E_FORMAT::EF_R32G32_UINT]); + auto scrambleKeyView = immutables.scrambleKey.views[E_FORMAT::EF_R32G32_UINT]; + addImageWrite(SensorDSBindings::ScrambleKey,scrambleKeyView); // create the render-sized images - auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SActiveResources::SImageWithViews + auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SImmutables::SImageWithViews { return createImage(debugName,format,m_params.uniforms.renderSize,std::forward(args)...); }; immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT,1); - addImageWrite(SensorDSBindings::SampleCount,immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]); + auto sampleCountView = immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]; + addImageWrite(SensorDSBindings::SampleCount,sampleCountView); + immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); + auto rwmcCascadesView = immutables.rwmcCascades.views[E_FORMAT::EF_R32G32_UINT]; + addImageWrite(SensorDSBindings::RWMCCascades,rwmcCascadesView); immutables.beauty = createScreenSizedImage("Beauty",E_FORMAT::EF_E5B9G9R9_UFLOAT_PACK32,1,std::bitset().set(E_FORMAT::EF_R32_UINT)); addImageWrite(SensorDSBindings::Beauty,immutables.beauty.views[E_FORMAT::EF_R32_UINT]); - immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); - addImageWrite(SensorDSBindings::RWMCCascades,immutables.rwmcCascades.views[E_FORMAT::EF_R32G32_UINT]); immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); - addImageWrite(SensorDSBindings::Albedo,immutables.albedo.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + auto albedoView = immutables.albedo.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Albedo,albedoView); // Normal and Albedo should have used `EF_A2B10G10R10_SNORM_PACK32` but Nvidia doesn't support immutables.normal = createScreenSizedImage("Normal",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); - addImageWrite(SensorDSBindings::Normal,immutables.normal.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + auto normalView = immutables.normal.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Normal,normalView); immutables.motion = createScreenSizedImage("Motion",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); - addImageWrite(SensorDSBindings::Motion,immutables.motion.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]); + auto motionView = immutables.motion.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Motion,motionView); immutables.mask = createScreenSizedImage("Mask",E_FORMAT::EF_R16_UNORM,1); - addImageWrite(SensorDSBindings::Mask,immutables.mask.views[E_FORMAT::EF_R16_UNORM]); + auto maskView = immutables.mask.views[E_FORMAT::EF_R16_UNORM]; + addImageWrite(SensorDSBindings::Mask,maskView); + // shorthand a little bit + addImageWrite(SensorDSBindings::AsSampledImages,scrambleKeyView); + writes.back().count = SensorDSBindingCounts::AsSampledImages; + { + const auto lastInfoIx = infos.size()-1; + infos.resize(lastInfoIx+SensorDSBindingCounts::AsSampledImages,infos.back()); + const auto viewInfos = infos.data()+lastInfoIx; + using index_e = SensorDSBindings::SampledImageIndex; + viewInfos[uint8_t(index_e::ScrambleKey)].desc = scrambleKeyView; + viewInfos[uint8_t(index_e::SampleCount)].desc = sampleCountView; + viewInfos[uint8_t(index_e::RWMCCascades)].desc = rwmcCascadesView; + viewInfos[uint8_t(index_e::Beauty)].desc = immutables.beauty.views[E_FORMAT::EF_E5B9G9R9_UFLOAT_PACK32]; + viewInfos[uint8_t(index_e::Albedo)].desc = albedoView; + viewInfos[uint8_t(index_e::Normal)].desc = normalView; + viewInfos[uint8_t(index_e::Motion)].desc = motionView; + viewInfos[uint8_t(index_e::Mask)].desc = maskView; + } } // create descriptor set @@ -203,35 +227,54 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) auto* const device = renderer->getDevice(); const auto& immutables = m_active.immutables; - // slam the barriers as big as possible, it wont happen frequently bool success = true; - const SMemoryBarrier before[] = { - { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, - .srcAccessMask = ACCESS_FLAGS::NONE, // because we don't care about reading previously written values - .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, - .dstAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS - } - }; - success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=before}); - auto clearImage = [cb,&success](const SActiveResources::SImageWithViews& img)->void + // slam the barriers as big as possible, it wont happen frequently + using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; + core::vector before; { - const IGPUImage::SSubresourceRange subresRng = { - .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .levelCount = 1, - .layerCount = img.image->getCreationParameters().arrayLayers + constexpr image_barrier_t beforeBase = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .srcAccessMask = ACCESS_FLAGS::NONE, // because we don't care about reading previously written values + .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .dstAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS + } + }, + .subresourceRange = {}, + .newLayout = IGPUImage::LAYOUT::GENERAL + }; + before.reserve(SensorDSBindingCounts::AsSampledImages); + + auto enqueueClear = [&before,beforeBase](const SImmutables::SImageWithViews& img)->void + { + auto& out = before.emplace_back(beforeBase); + out.image = img.image.get(); + out.subresourceRange = { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .levelCount = 1, + .layerCount = out.image->getCreationParameters().arrayLayers + }; }; + enqueueClear(immutables.sampleCount); + enqueueClear(immutables.beauty); + enqueueClear(immutables.rwmcCascades); + enqueueClear(immutables.albedo); + enqueueClear(immutables.normal); + enqueueClear(immutables.motion); + enqueueClear(immutables.mask); + success = success && cb->pipelineBarrier(asset::EDF_NONE,{.imgBarriers=before}); + } + + { IGPUCommandBuffer::SClearColorValue color; memset(&color,0,sizeof(color)); - success = success && cb->clearColorImage(img.image.get(),IGPUImage::LAYOUT::GENERAL,&color,1,&subresRng); - }; - clearImage(immutables.sampleCount); - clearImage(immutables.beauty); - clearImage(immutables.rwmcCascades); - clearImage(immutables.albedo); - clearImage(immutables.normal); - clearImage(immutables.motion); - clearImage(immutables.mask); + for (const auto& entry : before) + { + success = success && cb->clearColorImage(const_cast(entry.image),IGPUImage::LAYOUT::GENERAL,&color,1,&entry.subresourceRange); + } + } + const SMemoryBarrier after[] = { { .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 94be634b3..d841ad5c9 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -141,6 +141,7 @@ bool CWindowPresenter::init(CRenderer* renderer) // present pipeline layout { + // } // present pipelines @@ -150,7 +151,7 @@ bool CWindowPresenter::init(CRenderer* renderer) return true; } -auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, CSession* session) -> clock_t::time_point +auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSession* session) -> clock_t::time_point { auto expectedPresent = clock_t::time_point::min(); // invalid value if (!session) @@ -298,11 +299,6 @@ bool CWindowPresenter::endRenderpassAndPresent(IGPUCommandBuffer* cb, ISemaphore } #if 0 - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 - }; - auto defaultSampler = m_device->createSampler(samplerParams); - { const IGPUDescriptorSetLayout::SBinding bindings[] = { { diff --git a/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp index 0608becc2..4646684b1 100644 --- a/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp +++ b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp @@ -15,14 +15,27 @@ using namespace nbl::video; // smart_refctd_ptr CBasicRWMCResolver::create(SCreationParams&& _params) { + auto logger = _params.renderer->getLogger(); if (!_params) { - _params.renderer->getLogger().log("`CBasicRWMCResolver::SCreationParams` are invalidl!", ILogger::ELL_ERROR); + logger.log("`CBasicRWMCResolver::SCreationParams` are invalid!",ILogger::ELL_ERROR); return nullptr; } CBasicRWMCResolver::SConstructorParams params = {std::move(_params)}; - // TODO: all the pipelines! + auto* const device = _params.renderer->getDevice(); + { + const SPushConstantRange pcRange[] = { + {.stageFlags=ShaderStage::ESS_COMPUTE,.offset=0,.size=sizeof(SResolveConstants)} + }; + if (!(params.layout=device->createPipelineLayout(pcRange,_params.renderer->getConstructionParams().sensorDSLayout))) + { + logger.log("`CBasicRWMCResolver::create` failed to create Pipeline Layout!",ILogger::ELL_ERROR); + return nullptr; + } + } + + // TODO: create all the pipelines! return smart_refctd_ptr(new CBasicRWMCResolver(std::move(params)),dont_grab); } @@ -48,12 +61,15 @@ bool CBasicRWMCResolver::resolve(video::IGPUCommandBuffer* cb, video::IGPUBuffer return false; } - constexpr auto raytracingStages = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; - constexpr auto firstResolveStage = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - // TODO: pipeline barrier from raytracing pipeline to first resolve pass + const auto* const layout = m_construction.layout.get(); + { + constexpr auto raytracingStages = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto firstResolveStage = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + // TODO: pipeline barrier from raytracing pipeline to first resolve pass + } + + // compute passes - auto* const cascades = m_activeSession->getRWMCCascades(); - auto* const beauty = m_activeSession->getBeauty(E_FORMAT::EF_R32_UINT); return false; // TODO: uimplemented yet } From 51a63fea4fbf395df52dc491e6928b81bdcb00ea Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 23 Jan 2026 17:05:24 +0100 Subject: [PATCH 186/219] fix tiny bugs and create the presentation pipelines --- .../renderer/present/CWindowPresenter.h | 5 ++- .../include/renderer/resolve/IResolver.h | 2 +- .../shaders/present/push_constants.hlsl | 2 +- 40_PathTracer/main.cpp | 10 ++--- .../src/renderer/present/CWindowPresenter.cpp | 37 +++++++++++++++++-- 5 files changed, 43 insertions(+), 13 deletions(-) diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h index 62141a597..9fd180cfb 100644 --- a/40_PathTracer/include/renderer/present/CWindowPresenter.h +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -24,6 +24,7 @@ class CWindowPresenter : public IPresenter struct SCachedCreationParams { + core::smart_refctd_ptr assMan = nullptr; core::smart_refctd_ptr winMgr = nullptr; system::logger_opt_smart_ptr logger = nullptr; // for the UI, 1080p with 50% scaling @@ -31,7 +32,7 @@ class CWindowPresenter : public IPresenter }; struct SCreationParams : SCachedCreationParams { - inline operator bool() const {return winMgr && api && callback;} + inline operator bool() const {return assMan && winMgr && api && callback;} core::smart_refctd_ptr api = {}; core::smart_refctd_ptr callback = {}; @@ -88,7 +89,7 @@ class CWindowPresenter : public IPresenter SCachedConstructionParams m_construction; core::smart_refctd_ptr m_present; video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - DefaultResolvePushConstants m_pushConstants; + SDefaultResolvePushConstants m_pushConstants; }; } diff --git a/40_PathTracer/include/renderer/resolve/IResolver.h b/40_PathTracer/include/renderer/resolve/IResolver.h index d113fbd6e..74e708edf 100644 --- a/40_PathTracer/include/renderer/resolve/IResolver.h +++ b/40_PathTracer/include/renderer/resolve/IResolver.h @@ -26,7 +26,7 @@ class IResolver : public core::IReferenceCounted, public core::InterfaceUnmovabl inline bool changeSession(core::smart_refctd_ptr&& session) { m_activeSession = std::move(session); - if (!session || !session->isInitialized() || !changeSession_impl()) + if (!m_activeSession || !m_activeSession->isInitialized() || !changeSession_impl()) { m_activeSession = {}; return false; diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl index 9da535329..ecd03cb2f 100644 --- a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -12,7 +12,7 @@ namespace this_example { using namespace nbl::hlsl; -struct DefaultResolvePushConstants +struct SDefaultResolvePushConstants { NBL_CONSTEXPR_STATIC_INLINE uint32_t ImageCount = 16; diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 4954027ed..3ec85ac0a 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -101,6 +101,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes { const_cast&>(m_presenter) = CWindowPresenter::create({ { + .assMan = m_assetMgr, .winMgr = m_winMgr, .logger = smart_refctd_ptr(m_logger) }, @@ -127,10 +128,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes if (!m_args.headless) m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; printGitInfos(); @@ -211,14 +212,13 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes m_api->startCapture(); const auto* const session = m_resolver->getActiveSession(); - m_presenter->acquire({},session); { auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); // renderer->render(cb); m_resolver->resolve(cb,nullptr); - // m_presenter->acquire({},session); - // m_presenter->beginRenderpass(cb); + m_presenter->acquire({},session); + m_presenter->beginRenderpass(cb); // m_presenter->endRenderpassAndPresent(cb); // TODO: submit diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index d841ad5c9..02772936b 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -139,16 +139,45 @@ bool CWindowPresenter::init(CRenderer* renderer) } } + // + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_creation.assMan.get(),device,logger.get().get()); + if (!fsTriProtoPPln) + { + logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); + return false; + } + // present pipeline layout + smart_refctd_ptr layout; { - // + const SPushConstantRange pcRange[] = { + {.stageFlags=ShaderStage::ESS_FRAGMENT,.offset=0,.size=sizeof(m_pushConstants)} + }; + if (!(layout=device->createPipelineLayout(pcRange,renderer->getConstructionParams().sensorDSLayout))) + { + logger.log("`CWindowPresenter::create` failed to create Pipeline Layout!",ILogger::ELL_ERROR); + return false; + } } - // present pipelines + // present pipeline + if (auto shader=renderer->loadPrecompiledShader<"present_default">(m_creation.assMan.get(),device,logger.get().get()); shader) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = shader.get(), + .entryPoint = "present_default", + }; + m_present = fsTriProtoPPln.createPipeline(fragSpec,layout.get(),getRenderpass()); + if (!m_present) + logger.log("`CWindowPresenter::create` failed to create Graphics Pipeline!",ILogger::ELL_ERROR); + } + else { + logger.log("`CWindowPresenter::create` failed to load shader!",ILogger::ELL_ERROR); + return false; } - return true; + return bool(m_present); } auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSession* session) -> clock_t::time_point @@ -160,7 +189,7 @@ auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSess m_pushConstants.isCubemap = sessionParams.type==CSession::sensor_type_e::Env; const auto maxResolution = m_construction.maxResolution; - uint16_t2 targetResolution = m_pushConstants.isCubemap ? sessionParams.cropResolution:maxResolution; + uint16_t2 targetResolution = m_pushConstants.isCubemap ? maxResolution:sessionParams.cropResolution; const auto aspectRatio = double(targetResolution.x)/double(targetResolution.y); if (m_pushConstants.isCubemap) { From 739a7a23eb15050f9d3ebb9b54918d881be991f6 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 21:42:24 +0100 Subject: [PATCH 187/219] get the presenter and headless mode fleshed out --- 40_PathTracer/include/renderer/CRenderer.h | 38 ++- 40_PathTracer/include/renderer/CScene.h | 3 + 40_PathTracer/include/renderer/CSession.h | 81 ++++--- .../renderer/present/CWindowPresenter.h | 32 ++- .../include/renderer/present/IPresenter.h | 178 +++++++++++++- 40_PathTracer/main.cpp | 221 ++++-------------- 40_PathTracer/src/renderer/CRenderer.cpp | 106 ++++++++- 40_PathTracer/src/renderer/CSession.cpp | 23 +- .../src/renderer/present/CWindowPresenter.cpp | 130 ++--------- 9 files changed, 453 insertions(+), 359 deletions(-) diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index dd7d19eae..3b09d01db 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -17,6 +17,7 @@ namespace nbl::this_example class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovable { + friend struct SSubmitInfo; public: // constexpr static video::SPhysicalDeviceFeatures RequiredDeviceFeatures() @@ -41,6 +42,14 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl return retval; } #endif + // + template + static inline core::smart_refctd_ptr loadPrecompiledShader( + asset::IAssetManager* assMan, video::ILogicalDevice* device, system::logger_opt_ptr logger={} + ) + { + return loadPrecompiledShader_impl(assMan,builtin::build::get_spirv_key(device),logger); + } struct SCachedCreationParams { @@ -92,9 +101,6 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // inline video::ILogicalDevice* getDevice() const {return m_creation.utilities->getLogicalDevice();} - - // - core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); struct SCachedConstructionParams { @@ -116,15 +122,29 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl }; // inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + + // + core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); // - template - static inline core::smart_refctd_ptr loadPrecompiledShader( - asset::IAssetManager* assMan, video::ILogicalDevice* device, system::logger_opt_ptr logger={} - ) + struct SSubmit final : core::Uncopyable { - return loadPrecompiledShader_impl(assMan,builtin::build::get_spirv_key(device),logger); - } + public: + inline SSubmit() {} + inline SSubmit(CRenderer* _renderer, video::IGPUCommandBuffer* _cb) : renderer(_renderer), cb(_cb) {assert(operator bool());} + + inline operator bool() const {return cb;} + inline operator video::IGPUCommandBuffer*() const {return cb;} + + // returns semaphore signalled by submit + video::IQueue::SSubmitInfo::SSemaphoreInfo operator()(std::span extraWaits); + + asset::PIPELINE_STAGE_FLAGS stageMask = asset::PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + private: + CRenderer* renderer = nullptr; + video::IGPUCommandBuffer* cb = nullptr; + }; + SSubmit render(CSession* session); protected: struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index d11694cec..60af60e06 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -37,6 +37,9 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable // inline CRenderer* getRenderer() const {return m_construction.renderer.get();} + // + inline const video::IGPUDescriptorSet* getDescriptorSet() const {return m_construction.sceneDS->getDescriptorSet();} + using sensor_t = CSceneLoader::SLoadResult::SSensor; // inline std::span getSensors() const {return m_construction.sensors;} diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index 5e724b61e..0a0b6f28e 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -44,47 +44,65 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm // inline bool isInitialized() const {return bool(m_active.immutables);} - // - struct SImmutables + // heavy VRAM data and data only needed during an active session + struct SImageWithViews { - struct SImageWithViews + inline operator bool() const { - inline operator bool() const - { - return image && !views.empty() && views.begin()->second; - } + return image && !views.empty() && views.begin()->second; + } - inline video::IGPUImageView* getView(const asset::E_FORMAT format) const + inline video::IGPUImageView* getView(const asset::E_FORMAT format) const + { + if (const auto found=views.find(format); found!=views.end()) + return found->second.get(); + return nullptr; + } + + core::smart_refctd_ptr image = {}; + core::unordered_map> views = {}; + }; + struct SActiveResources + { + struct SImmutables + { + inline operator bool() const { - if (const auto found=views.find(format); found!=views.end()) - return found->second.get(); - return nullptr; + return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; } - core::smart_refctd_ptr image = {}; - core::unordered_map> views = {}; + // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? + // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. + // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. + // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? + SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; + // stores all the sensor data required + core::smart_refctd_ptr ds = {}; }; - - inline operator bool() const - { - return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; - } - - // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? - // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. - // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. - // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? - SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; - // stores all the sensor data required - core::smart_refctd_ptr ds = {}; + SImmutables immutables = {}; + SSensorDynamics currentSensorState = {}, prevSensorState = {}; }; - inline const SImmutables& getImmutables() const {return m_active.immutables;} + + // + inline const SActiveResources& getActiveResources() const {return m_active;} // bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); // - inline void deinit() {m_active = {};} + bool update(const SSensorDynamics& newVal); + + // TODO: figure this out + inline float getProgress() const + { + return std::numeric_limits::quiet_NaN(); + } + + // + inline void deinit() + { + m_active = {}; + } // struct SConstructionParams : SCachedCreationParams @@ -105,12 +123,7 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm inline CSession(SConstructionParams&& _params) : m_params(std::move(_params)) {} const SConstructionParams m_params; - // heavy VRAM data and data only needed during an active session - struct SActiveResources - { - SImmutables immutables = {}; - SSensorDynamics prevSensorState = {}; - } m_active = {}; + SActiveResources m_active = {}; }; } diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h index 9fd180cfb..a35f8d5c0 100644 --- a/40_PathTracer/include/renderer/present/CWindowPresenter.h +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -20,17 +20,15 @@ class CWindowPresenter : public IPresenter { public: using swapchain_resources_t = video::CDefaultSwapchainFramebuffers; - static const video::IGPURenderpass::SCreationParams::SSubpassDependency dependencies[3]; + static const video::IGPURenderpass::SCreationParams::SSubpassDependency Dependencies[3]; struct SCachedCreationParams { - core::smart_refctd_ptr assMan = nullptr; core::smart_refctd_ptr winMgr = nullptr; - system::logger_opt_smart_ptr logger = nullptr; // for the UI, 1080p with 50% scaling hlsl::uint16_t2 minResolution = {1264,698}; }; - struct SCreationParams : SCachedCreationParams + struct SCreationParams : IPresenter::SCachedCreationParams, SCachedCreationParams { inline operator bool() const {return assMan && winMgr && api && callback;} @@ -43,9 +41,6 @@ class CWindowPresenter : public IPresenter // inline const video::ISurface* getSurface() const {return m_construction.surface->getSurface();} - // - bool init(CRenderer* renderer); - // inline const SCachedCreationParams& getCreationParams() const {return m_creation;} @@ -58,13 +53,6 @@ class CWindowPresenter : public IPresenter // bool irrecoverable() const {return m_construction.surface->irrecoverable();} - // returns expected presentation time for frame pacing - clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, const CSession* session) override; - // - bool beginRenderpass(video::IGPUCommandBuffer* cb) override; - // - bool endRenderpassAndPresent(video::IGPUCommandBuffer* cb, video::ISemaphore* presentBeginSignal) override; - protected: using surface_t = video::CSimpleResizeSurface; struct SCachedConstructionParams @@ -75,10 +63,20 @@ class CWindowPresenter : public IPresenter hlsl::float64_t2 aspectRatioRange; hlsl::uint16_t2 maxResolution; }; - struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + struct SConstructorParams : IPresenter::SCachedCreationParams, SCachedCreationParams, SCachedConstructionParams { }; - inline CWindowPresenter(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), m_pushConstants({}) {} + inline CWindowPresenter(SConstructorParams&& _params) : IPresenter(std::move(_params)), m_creation(std::move(_params)), m_construction(std::move(_params)), m_pushConstants({}) {} + // + bool init_impl(CRenderer* renderer) override; + + // + clock_t::time_point acquire_impl(const CSession* session, video::ISemaphore::SWaitInfo* p_currentImageAcquire) override; + bool beginRenderpass_impl() override; + inline bool present(const video::IQueue::SSubmitInfo::SSemaphoreInfo& readyToPresent) override + { + return m_construction.surface->present(m_currentImageIndex,{&readyToPresent,1}); + } inline video::ISurface* getSurface() {return m_construction.surface->getSurface();} @@ -88,8 +86,8 @@ class CWindowPresenter : public IPresenter SCachedCreationParams m_creation; SCachedConstructionParams m_construction; core::smart_refctd_ptr m_present; - video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; SDefaultResolvePushConstants m_pushConstants; + uint8_t m_currentImageIndex = ~0u; }; } diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h index 838e358fd..81db4d9ee 100644 --- a/40_PathTracer/include/renderer/present/IPresenter.h +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -17,15 +17,187 @@ namespace nbl::this_example class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovable { public: + constexpr static inline uint8_t CircularBufferSize = 4; + + struct SCachedCreationParams + { + core::smart_refctd_ptr assMan = nullptr; + system::logger_opt_smart_ptr logger = nullptr; + }; + // + inline const SCachedCreationParams& getCreationParams() const {return m_creation;} + + // + inline bool init(CRenderer* renderer) + { + if (!m_queue) + return isInitialized(); + + auto& logger = m_creation.logger; + auto* device = renderer->getDevice(); + m_queue = renderer->getCreationParams().graphicsQueue; + + bool success = false; + auto deinit = core::makeRAIIExiter([&]()->void{ + if (success) + return; + m_semaphore = nullptr; + std::fill(m_cmdbufs.begin(),m_cmdbufs.end(),nullptr); + }); + + using namespace nbl::system; + if (!(m_semaphore=device->createSemaphore(m_presentCount))) + { + logger.log("`IPresenter::init` failed to create a semaphore!",ILogger::ELL_ERROR); + return false; + } + + for (auto& cmdbuf : m_cmdbufs) + { + using namespace nbl::video; + auto pool=device->createCommandPool(m_queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!pool || !pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(logger.get()))) + { + logger.log("`IPresenter::init` failed to create Command Buffer!",ILogger::ELL_ERROR); + return false; + } + } + + return success = init_impl(renderer); + } + inline bool isInitialized() const {return bool(m_semaphore);} + + // + inline video::IQueue* getQueue() const {return m_queue;} + // + inline video::ILogicalDevice* getDevice() const {return const_cast(m_semaphore->getOriginDevice());} + // virtual bool irrecoverable() const {return false;} + // returns expected presentation time for frame pacing using clock_t = std::chrono::steady_clock; - virtual clock_t::time_point acquire(const video::ISwapchain::SAcquireInfo& info, const CSession* background) = 0; + inline clock_t::time_point acquire(const CSession* background) + { + auto expectedPresent = clock_t::time_point::min(); // invalid value + m_currentImageAcquire = {}; + if (!background) + { + m_currentSessionDS = nullptr; + return expectedPresent; + } + m_currentSessionDS = background->getActiveResources().immutables.ds; + return acquire_impl(background,&m_currentImageAcquire); + } + // - virtual bool beginRenderpass(video::IGPUCommandBuffer* cb) = 0; + inline video::IGPUCommandBuffer* beginRenderpass() + { + if (!isInitialized() || !m_currentImageAcquire.semaphore) + return nullptr; + + using namespace nbl::video; + if (m_presentCount>=CircularBufferSize) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_presentCount+1-CircularBufferSize + } + }; + if (getDevice()->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return {}; + } + + auto* const cb = getCurrentCmdBuffer(); + cb->getPool()->reset(); + if (!cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return nullptr; + + if (!beginRenderpass_impl()) + { + cb->end(); + return nullptr; + } + return cb; + } + // - virtual bool endRenderpassAndPresent(video::IGPUCommandBuffer* cb, video::ISemaphore* presentBeginSignal) = 0; + inline bool endRenderpassAndPresent(const video::IQueue::SSubmitInfo::SSemaphoreInfo& extraSubmitWait) + { + using namespace nbl::asset; + using namespace nbl::video; + auto* const cb = getCurrentCmdBuffer(); + if (cb->getState()!=IGPUCommandBuffer::STATE::RECORDING) + return false; + + if (!endRenderpass() || !cb->end()) + return false; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_presentCount, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb} + }; + const IQueue::SSubmitInfo::SSemaphoreInfo wait[] = + { + { + .semaphore = const_cast(m_currentImageAcquire.semaphore), + .value = m_currentImageAcquire.value, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + }, + extraSubmitWait + }; + IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = wait, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + if (!extraSubmitWait.semaphore) + infos->waitSemaphores; + + if (m_queue->submit(infos)!=IQueue::RESULT::SUCCESS) + { + m_presentCount--; + return false; + } + return present(*rendered); + } + + protected: + inline IPresenter(SCachedCreationParams&& _params) : m_creation(std::move(_params)) {} + virtual bool init_impl(CRenderer* renderer) = 0; + + virtual clock_t::time_point acquire_impl(const CSession* background, video::ISemaphore::SWaitInfo* p_currentImageAcquire) = 0; + virtual bool beginRenderpass_impl() = 0; + virtual bool endRenderpass() + { + return getCurrentCmdBuffer()->endRenderPass(); + } + virtual bool present(const video::IQueue::SSubmitInfo::SSemaphoreInfo& readyToPresent) = 0; + + inline video::IGPUDescriptorSet* getCurrentSessionDS() const {return m_currentSessionDS.get();} + inline video::IGPUCommandBuffer* getCurrentCmdBuffer() const {return m_cmdbufs[m_presentCount % CircularBufferSize].get();} + + private: + SCachedCreationParams m_creation; + video::CThreadSafeQueueAdapter* m_queue; + core::smart_refctd_ptr m_semaphore; + std::array,CircularBufferSize> m_cmdbufs; + video::ISemaphore::SWaitInfo m_currentImageAcquire = {}; + core::smart_refctd_ptr m_currentSessionDS; + uint64_t m_presentCount = 0; }; } diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 3ec85ac0a..64efc54fa 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -102,9 +102,11 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes const_cast&>(m_presenter) = CWindowPresenter::create({ { .assMan = m_assetMgr, - .winMgr = m_winMgr, .logger = smart_refctd_ptr(m_logger) }, + { + .winMgr = m_winMgr + }, m_api, make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)), "Path Tracer" @@ -123,7 +125,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline bool onAppInitialized(smart_refctd_ptr&& system) override { // TODO: parse the arguments - m_args = {}; + m_args.headless = true; if (!m_args.headless) m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -197,35 +199,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes {.mode=CSession::RenderMode::Debug}, scene_daily_pt->getSensors().data() }); - // init - m_utils->autoSubmit({ .queue = getGraphicsQueue() }, [&session](SIntendedSubmitInfo& info)->bool - { - return session->init(info.getCommandBufferForRecording()->cmdbuf); - } - ); - m_resolver->changeSession(std::move(session)); m_api->endCapture(); - } - // temporary test - { - m_api->startCapture(); - - const auto* const session = m_resolver->getActiveSession(); - { - auto cb = m_renderer->getConstructionParams().commandBuffers[0].get(); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // renderer->render(cb); - m_resolver->resolve(cb,nullptr); - m_presenter->acquire({},session); - m_presenter->beginRenderpass(cb); - // m_presenter->endRenderpassAndPresent(cb); - - // TODO: submit - } - m_api->endCapture(); + m_sessionQueue.push(std::move(session)); } - m_resolver->getActiveSession()->deinit(); return true; @@ -379,137 +356,51 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline void workLoopBody() override { -#if 0 - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = + CSession* session; + for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f;) { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - - m_api->startCapture(); - - update(); - - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[resourceIx].get(); - - if (!keepRunning()) - return; - - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - const auto viewMatrix = m_camera.getViewMatrix(); - const auto projectionMatrix = m_camera.getProjectionMatrix(); - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); - - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - if (m_cachedModelViewProjectionMatrix != modelViewProjectionMatrix) - { - m_frameAccumulationCounter = 0; - m_cachedModelViewProjectionMatrix = modelViewProjectionMatrix; - } - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); - - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; - imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, // previous frame read from framgent shader - .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }; - imageBarriers[0].image = m_hdrImage.get(); - imageBarriers[0].subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - imageBarriers[0].oldLayout = m_frameAccumulationCounter == 0 ? IImage::LAYOUT::UNDEFINED : IImage::LAYOUT::READ_ONLY_OPTIMAL; - imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); - } - - // Trace Rays Pass - { - SPushConstants pc; - pc.light = m_light; - pc.proceduralGeomInfoBuffer = m_proceduralGeomInfoBuffer->getDeviceAddress(); - pc.triangleGeomInfoBuffer = m_triangleGeomInfoBuffer->getDeviceAddress(); - pc.frameCounter = m_frameAccumulationCounter; - const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); - pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); - - cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); - cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); - cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); - cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); - if (m_useIndirectCommand) - { - cmdbuf->traceRaysIndirect( - SBufferBinding{ - .offset = 0, - .buffer = m_indirectBuffer, - }); + if (m_sessionQueue.empty()) + return; + session = m_sessionQueue.front().get(); + // init + m_utils->autoSubmit({.queue=getGraphicsQueue()},[&session](SIntendedSubmitInfo& info)->bool + { + return session->init(info.getCommandBufferForRecording()->cmdbuf); + } + ); + m_resolver->changeSession(std::move(m_sessionQueue.front())); + m_sessionQueue.pop(); } - else + + m_api->startCapture(); + IQueue::SSubmitInfo::SSemaphoreInfo rendered = {}; { - cmdbuf->traceRays( - m_shaderBindingTable.raygenGroupRange, - m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, - m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, - m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, - WIN_W, WIN_H, 1); + auto deferredSubmit = m_renderer->render(session); + if (deferredSubmit) + { + IGPUCommandBuffer* const cb = deferredSubmit; + if (!m_args.headless || session->getProgress()>=1.f) + { + m_resolver->resolve(cb,nullptr); + } + rendered = deferredSubmit({}); + } } - } + m_api->endCapture(); - // pipeline barrier - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; - imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }; - imageBarriers[0].image = m_hdrImage.get(); - imageBarriers[0].subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }; - imageBarriers[0].oldLayout = IImage::LAYOUT::GENERAL; - imageBarriers[0].newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + if (m_args.headless) + return; + handleInputs(); + if (!keepRunning()) + return; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); - } + m_presenter->acquire(session); + auto* const cb = m_presenter->beginRenderpass(); + { + // can do additional stuff like ImGUI work here + } + m_presenter->endRenderpassAndPresent(rendered); +#if 0 // gui // ... const auto uiParams = m_ui.manager->getCreationParameters(); @@ -518,16 +409,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; m_ui.manager->render(cmdbuf, waitInfo); -// ... - - { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - } - }; { { @@ -535,10 +417,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } } - = - } - m_api->endCapture(); - m_frameAccumulationCounter++; #endif } @@ -603,7 +481,11 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline bool keepRunning() override { if (m_args.headless) - return true; // TODO: till renders are complete + { + if (auto* const currentSession=m_resolver->getActiveSession(); m_sessionQueue.empty() && (!currentSession || currentSession->getProgress()>=1.f)) + return false; + return true; + } else return !m_presenter->irrecoverable(); } @@ -626,9 +508,9 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes smart_refctd_ptr m_resolver; // smart_refctd_ptr m_sceneLoader; + // + nbl::core::queue> m_sessionQueue; - uint64_t m_realFrameIx = 0; - uint32_t m_frameAccumulationCounter = 0; #if 0 // gui struct C_UI { @@ -643,7 +525,6 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes } m_ui; core::smart_refctd_ptr m_guiDescriptorSetPool; #endif - uint64_t m_rayTracingStackSize; }; NBL_MAIN_FUNC(PathTracingApp) \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 1d9ff5653..883df80dd 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -75,7 +75,7 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) // create the layouts smart_refctd_ptr renderingLayouts[uint8_t(CSession::RenderMode::Count)]; { - constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING | hlsl::ShaderStage::ESS_COMPUTE; + constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING;// | hlsl::ShaderStage::ESS_COMPUTE; constexpr auto RenderingStages = RTStages | hlsl::ShaderStage::ESS_COMPUTE; // descriptor { @@ -279,17 +279,6 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) } } -// TODO: move to CBasicPresenter - // the renderpass: custom dependencies, but everything else fixed from outside (format, and number of subpasses) - { -// params.presentRenderpass = device->createRenderpass(); - } - - // present pipelines - { - // TODO - } - // command buffers for (uint8_t i=0; i CRenderer::createScene(CScene::SCreationParams&& return core::smart_refctd_ptr(new CScene(std::move(params)),core::dont_grab); } + +auto CRenderer::render(CSession* session) -> SSubmit +{ + if (!session || !session->isInitialized()) + return {}; + const auto& sessionParams = session->getConstructionParams(); + auto* const device = getDevice(); + + if (m_frameIx>=SCachedConstructionParams::FramesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_construction.semaphore.get(), + .value = m_frameIx+1-SCachedConstructionParams::FramesInFlight + } + }; + if (device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return {}; + } + const auto resourceIx = m_frameIx % SCachedConstructionParams::FramesInFlight; + + auto* const cb = m_construction.commandBuffers[resourceIx].get(); + cb->getPool()->reset(); + if (!cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return {}; + + const auto mode = sessionParams.mode; + const auto& sessionResources = session->getActiveResources(); + const auto* const pipeline = m_construction.renderingPipelines[static_cast(mode)].get(); + + bool success; + // push constants + { + switch (mode) + { + case CSession::RenderMode::Debug: + { + SDebugPushConstants pc = {sessionResources.currentSensorState}; + success = cb->pushConstants(pipeline->getLayout(),hlsl::ShaderStage::ESS_ALL_RAY_TRACING,0,sizeof(pc),&pc); + break; + } + default: + getLogger().log("Unimplemented RenderMode::%s !",ILogger::ELL_ERROR,system::to_string(mode).c_str()); + return {}; + } + } + // bind pipelines + success = success && cb->bindRayTracingPipeline(pipeline); + { + const IGPUDescriptorSet* sets[2] = {sessionParams.scene->getDescriptorSet(),sessionResources.immutables.ds.get()}; + success = success && cb->bindDescriptorSets(EPBP_RAY_TRACING,pipeline->getLayout(),0,2,sets); + } + + const auto renderSize = sessionParams.uniforms.renderSize; + success = success && cb->traceRays({},{},0,{},0,{},0,renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); + + if (success) + return SSubmit(this,cb); + else + return {}; +} + +IQueue::SSubmitInfo::SSemaphoreInfo CRenderer::SSubmit::operator()(std::span extraWaits) +{ + if (cb) + return {}; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = renderer->m_construction.semaphore.get(), + .value = ++renderer->m_frameIx, + .stageMask = stageMask + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = {{.cmdbuf=cb}}; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = extraWaits, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + if (renderer->getCreationParams().graphicsQueue->submit(infos)!=IQueue::RESULT::SUCCESS) + { + renderer->m_frameIx--; + return {}; + } + return rendered[0]; +} + } \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 4f850e6ea..32ef14978 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -73,9 +73,9 @@ bool CSession::init(video::IGPUCommandBuffer* cb) auto createImage = [&]( const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT|IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT - ) -> SImmutables::SImageWithViews + ) -> SImageWithViews { - SImmutables::SImageWithViews retval = {}; + SImageWithViews retval = {}; { { IGPUImage::SCreationParams params = {}; @@ -141,7 +141,7 @@ bool CSession::init(video::IGPUCommandBuffer* cb) addImageWrite(SensorDSBindings::ScrambleKey,scrambleKeyView); // create the render-sized images - auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SImmutables::SImageWithViews + auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SImageWithViews { return createImage(debugName,format,m_params.uniforms.renderSize,std::forward(args)...); }; @@ -223,6 +223,9 @@ bool CSession::init(video::IGPUCommandBuffer* cb) bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) { + if (!isInitialized()) + return false; + auto* const renderer = m_params.scene->getRenderer(); auto* const device = renderer->getDevice(); const auto& immutables = m_active.immutables; @@ -246,7 +249,7 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) }; before.reserve(SensorDSBindingCounts::AsSampledImages); - auto enqueueClear = [&before,beforeBase](const SImmutables::SImageWithViews& img)->void + auto enqueueClear = [&before,beforeBase](const SImageWithViews& img)->void { auto& out = before.emplace_back(beforeBase); out.image = img.image.get(); @@ -286,8 +289,18 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=after}); if (success) - m_active.prevSensorState = newVal; + m_active.prevSensorState = m_active.currentSensorState = newVal; return success; } +bool CSession::update(const SSensorDynamics& newVal) +{ + if (!isInitialized()) + return false; + + m_active.prevSensorState = m_active.currentSensorState; + m_active.currentSensorState = newVal; + return true; +} + } \ No newline at end of file diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 02772936b..74415c996 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -14,7 +14,7 @@ using namespace nbl::video; constexpr auto SessionImageWritingStages = PIPELINE_STAGE_FLAGS::CLEAR_BIT|PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; -constexpr IGPURenderpass::SCreationParams::SSubpassDependency CWindowPresenter::dependencies[3] = +constexpr IGPURenderpass::SCreationParams::SSubpassDependency CWindowPresenter::Dependencies[3] = { { .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, @@ -110,9 +110,9 @@ smart_refctd_ptr CWindowPresenter::create(SCreationParams&& _p return smart_refctd_ptr(new CWindowPresenter(std::move(params)),dont_grab); } -bool CWindowPresenter::init(CRenderer* renderer) +bool CWindowPresenter::init_impl(CRenderer* renderer) { - auto& logger = m_creation.logger; + auto& logger = IPresenter::getCreationParams().logger; auto* device = renderer->getDevice(); // create swapchain and its resources (renderpass, etc.) @@ -125,7 +125,7 @@ bool CWindowPresenter::init(CRenderer* renderer) return false; } - auto scResources = std::make_unique(device,swapchainParams.surfaceFormat.format,dependencies,IGPURenderpass::LOAD_OP::DONT_CARE); + auto scResources = std::make_unique(device,swapchainParams.surfaceFormat.format,Dependencies,IGPURenderpass::LOAD_OP::DONT_CARE); if (!scResources || !scResources->getRenderpass()) { logger.log("Failed to create Renderpass!",ILogger::ELL_ERROR); @@ -140,7 +140,8 @@ bool CWindowPresenter::init(CRenderer* renderer) } // - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_creation.assMan.get(),device,logger.get().get()); + auto* const assMan = IPresenter::getCreationParams().assMan.get(); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(assMan,device,logger.get().get()); if (!fsTriProtoPPln) { logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); @@ -161,7 +162,7 @@ bool CWindowPresenter::init(CRenderer* renderer) } // present pipeline - if (auto shader=renderer->loadPrecompiledShader<"present_default">(m_creation.assMan.get(),device,logger.get().get()); shader) + if (auto shader=renderer->loadPrecompiledShader<"present_default">(assMan,device,logger.get().get()); shader) { const IGPUPipelineBase::SShaderSpecInfo fragSpec = { .shader = shader.get(), @@ -180,7 +181,7 @@ bool CWindowPresenter::init(CRenderer* renderer) return bool(m_present); } -auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSession* session) -> clock_t::time_point +auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitInfo* p_currentImageAcquire) -> clock_t::time_point { auto expectedPresent = clock_t::time_point::min(); // invalid value if (!session) @@ -235,14 +236,14 @@ auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSess if (window->isHidden()) winMgr->show(window); - m_pushConstants.layer = 0; // TODO: cubemaps and RWMC debug m_pushConstants.imageIndex = 0; - if (!(m_currentImageAcquire=m_construction.surface->acquireNextImage())) - { + auto acquireResult = m_construction.surface->acquireNextImage(); + *p_currentImageAcquire = {.semaphore=acquireResult.semaphore,.value=acquireResult.acquireCount}; + m_currentImageIndex = acquireResult.imageIndex; + if (!acquireResult) return expectedPresent; - } // TODO: Do this properly with present timing extension and a better oracle expectedPresent = clock_t::now() + std::chrono::microseconds(16666); @@ -250,12 +251,13 @@ auto CWindowPresenter::acquire(const ISwapchain::SAcquireInfo& info, const CSess return expectedPresent; } -bool CWindowPresenter::beginRenderpass(IGPUCommandBuffer* cb) +bool CWindowPresenter::beginRenderpass_impl() { auto* const scRes = getSwapchainResources(); - auto* const framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex); + auto* const framebuffer = scRes->getFramebuffer(m_currentImageIndex); const uint16_t2 resolution = { framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height}; + auto* const cb = getCurrentCmdBuffer(); bool success = cb->beginDebugMarker("Present"); const SViewport viewport[] = {{ .x = 0u, .y = 0u, @@ -273,7 +275,7 @@ bool CWindowPresenter::beginRenderpass(IGPUCommandBuffer* cb) const VkRect2D currentRenderArea = {.offset = {0,0}, .extent = defaultScisors->extent}; const IGPUCommandBuffer::SRenderpassBeginInfo info = { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .framebuffer = framebuffer, .colorClearValues = nullptr, .depthStencilClearValues = nullptr, .renderArea = currentRenderArea @@ -284,105 +286,15 @@ bool CWindowPresenter::beginRenderpass(IGPUCommandBuffer* cb) success = success && cb->bindGraphicsPipeline(m_present.get()); const auto* layout = m_present->getLayout(); -// success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&m_presentDs.get()); + { + const auto* ds = getCurrentSessionDS(); + success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&ds); + } success = success && cb->pushConstants(layout,ShaderStage::ESS_FRAGMENT,0,sizeof(m_pushConstants),&m_pushConstants); ext::FullScreenTriangle::recordDrawCall(cb); success = success && cb->endDebugMarker(); return success; } -bool CWindowPresenter::endRenderpassAndPresent(IGPUCommandBuffer* cb, ISemaphore* presentBeginSignal) -{ - bool success = cb->endRenderPass(); - success = success && cb->end(); -/* - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb} - }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; -*/ - -// m_construction.surface->present(m_currentImageAcquire.imageIndex,rendered); - return false; -} - -} - -#if 0 - { - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - } - }; - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), - .entryPoint = "main", - }; - auto presentLayout = m_device->createPipelineLayout( - {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); - } - - // write descriptors - IGPUDescriptorSet::SDescriptorInfo infos[3]; - infos[0].desc = m_gpuTlas; - - infos[1].desc = m_hdrImageView; - if (!infos[1].desc) - return logFail("Failed to create image view"); - infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; - - infos[2].desc = m_hdrImageView; - infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[] = { - {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, - {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, - {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, - }; - m_device->updateDescriptorSets(std::span(writes), {}); -#endif \ No newline at end of file +} \ No newline at end of file From 483dd78422db4d3f1cfe4af2e45464e7eac26c72 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 22:29:23 +0100 Subject: [PATCH 188/219] attempt to test the non-headless mode --- .../app_resources/present/default.hlsl | 26 ++++++++++++-- .../include/renderer/present/IPresenter.h | 2 +- .../renderer/resolve/CBasicRWMCResolver.h | 2 +- .../shaders/present/push_constants.hlsl | 10 +++--- 40_PathTracer/main.cpp | 36 ++++++++++++++----- 40_PathTracer/src/renderer/CRenderer.cpp | 2 +- .../src/renderer/present/CWindowPresenter.cpp | 18 +++++----- 7 files changed, 66 insertions(+), 30 deletions(-) diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index 97cfb7c0e..c5f88504d 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -3,20 +3,40 @@ // For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/shaders/session.hlsl" #include "renderer/shaders/present/push_constants.hlsl" // vertex shader is provided by the fullScreenTriangle extension #include + using namespace nbl::hlsl; using namespace nbl::this_example; using namespace ext::FullScreenTriangle; -[[vk::binding(0)]] Texture2DArray images[DefaultResolvePushConstants::ImageCount]; -[[vk::binding(1)]] SamplerState samplerState; [[vk::push_constant]] DefaultResolvePushConstants pc; [shader("pixel")] float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 { - return float32_t4(images[pc.imageIndex].SampleLevel(samplerState,float32_t3(vxAttr.uv,0.f),0.f).rgb,1.0f); + float32_t3 tint = promote(1); + float32_t3 uv; + if (pc.isCubemap) + { + const float32_t4 ndc = float32_t4(vxAttr.uv*2.f-float32_t2(1,1),1.f,1.f); + float32_t4 tmp = mul(pc.operator SDefaultResolvePushConstants::Cubemap().invProjView,ndc); + float32_t3 dir = tmp.xyz/tmp.www; + // TODO: convert dir to cubemap face, and the UV coord + tint = float32_t3(1,0,1); // right now go magenta error colour + } + else + { + const SDefaultResolvePushConstants::Regular regular = pc.operator SDefaultResolvePushConstants::Regular(); + uv.xy = vxAttr.uv; + if (regular.scale<0.f) + uv.y *= -regular.scale; + else + uv.y *= regular.scale; + uv.z = pc.layer; + } + return float32_t4(gSensorTextures[pc.imageIndex].SampleLevel(gSensorSamplers[0],uv,0.f).rgb*tint,1.0f); } diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h index 81db4d9ee..909c08708 100644 --- a/40_PathTracer/include/renderer/present/IPresenter.h +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -165,7 +165,7 @@ class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovab } }; if (!extraSubmitWait.semaphore) - infos->waitSemaphores; + infos->waitSemaphores = {wait,1}; if (m_queue->submit(infos)!=IQueue::RESULT::SUCCESS) { diff --git a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h index 634bd44e8..339e7cb7c 100644 --- a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h +++ b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h @@ -75,7 +75,7 @@ class CBasicRWMCResolver : public IResolver switch (session->getConstructionParams().mode) { case CSession::RenderMode::Previs: [[fallthrough]]; - case CSession::RenderMode::Debug: [[fallthrough]]; + case CSession::RenderMode::Debug: return 0ull; case CSession::RenderMode::Beauty: return 0ull; // for now, as long as we blit diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl index ecd03cb2f..cc90da273 100644 --- a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -18,12 +18,10 @@ struct SDefaultResolvePushConstants struct Regular { - // - float32_t2 scale; - // post-scale addition to uv coordinate to get to beginning - float32_t2 crop; - // Because `scale*uv+cropOffsets!=1.0` where the image is supposed to end - float32_t2 limit; + // if positive then we multiply X, otherwise Y + float32_t scale; + // to visualize what will get cropped out + float32_t2 _min,_max; }; struct Cubemap { diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 64efc54fa..0b6162a34 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -125,7 +125,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline bool onAppInitialized(smart_refctd_ptr&& system) override { // TODO: parse the arguments - m_args.headless = true; + m_args.headless = false; if (!m_args.headless) m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -194,14 +194,32 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes .workingDirectory = localOutputCWD }); #endif - - auto session = scene_daily_pt->createSession({ - {.mode=CSession::RenderMode::Debug}, - scene_daily_pt->getSensors().data() - }); m_api->endCapture(); - m_sessionQueue.push(std::move(session)); + // quick test code + nbl::core::vector sensors(3,scene_daily_pt->getSensors().front()); + { + sensors[1].constants.width = 640; + sensors[1].constants.height = 360; + sensors[1].mutableDefaults.cropOffsetX = 0; + sensors[1].mutableDefaults.cropOffsetY = 0; + sensors[1].mutableDefaults.cropWidth = 0; + sensors[1].mutableDefaults.cropHeight = 0; + } + { + sensors[2].mutableDefaults.cropWidth = 5120; + sensors[2].mutableDefaults.cropHeight = 2880; + sensors[2].mutableDefaults.cropOffsetX = 128; + sensors[2].mutableDefaults.cropOffsetY = 128; + sensors[2].constants.width = sensors[2].mutableDefaults.cropWidth+2*sensors[2].mutableDefaults.cropOffsetX; + sensors[2].constants.height = sensors[2].mutableDefaults.cropHeight+2*sensors[2].mutableDefaults.cropOffsetY; + } + for (const auto& sensor : sensors) + m_sessionQueue.push( + scene_daily_pt->createSession({ + {.mode=CSession::RenderMode::Debug},&sensor + }) + ); } return true; @@ -357,8 +375,10 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline void workLoopBody() override { CSession* session; - for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f;) + volatile bool skip = true; // skip using the debugger + for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f || skip;) { + skip = false; if (m_sessionQueue.empty()) return; session = m_sessionQueue.front().get(); diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 883df80dd..7feddfe86 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -425,7 +425,7 @@ auto CRenderer::render(CSession* session) -> SSubmit } const auto renderSize = sessionParams.uniforms.renderSize; - success = success && cb->traceRays({},{},0,{},0,{},0,renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); +// success = success && cb->traceRays({},{},0,{},0,{},0,renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); if (success) return SSubmit(this,cb); diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 74415c996..051c8054b 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -55,7 +55,7 @@ smart_refctd_ptr CWindowPresenter::create(SCreationParams&& _p _params.logger.log("`CWindowPresenter::SCreationParams` are invalidl!",ILogger::ELL_ERROR); return nullptr; } - CWindowPresenter::SConstructorParams params = {std::move(_params)}; + CWindowPresenter::SConstructorParams params = {std::move(_params),std::move(_params)}; { const auto& primDpyInfo = params.winMgr->getPrimaryDisplayInfo(); @@ -190,7 +190,7 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn m_pushConstants.isCubemap = sessionParams.type==CSession::sensor_type_e::Env; const auto maxResolution = m_construction.maxResolution; - uint16_t2 targetResolution = m_pushConstants.isCubemap ? maxResolution:sessionParams.cropResolution; + uint16_t2 targetResolution = m_pushConstants.isCubemap ? maxResolution:sessionParams.uniforms.renderSize; const auto aspectRatio = double(targetResolution.x)/double(targetResolution.y); if (m_pushConstants.isCubemap) { @@ -199,10 +199,8 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn } else { - m_pushConstants.regular.crop = float32_t2(sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; - // this we shall adjust to take care of aspect ratio mismatch - m_pushConstants.regular.scale = float32_t2(sessionParams.cropResolution)*sessionParams.uniforms.rcpPixelSize; - m_pushConstants.regular.limit = m_pushConstants.regular.scale+m_pushConstants.regular.crop; + m_pushConstants.regular._min = float32_t2(sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; + m_pushConstants.regular._max = float32_t2(sessionParams.cropResolution+sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; // prevent extreme window size const auto clampedAspectRatio = hlsl::clamp(aspectRatio,m_construction.aspectRatioRange[0],m_construction.aspectRatioRange[1]); const float64_t asConv = core::min(1.0/clampedAspectRatio,clampedAspectRatio); @@ -215,10 +213,10 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn { const auto aspectChange = clampedAspectRatio/aspectRatio; // >1.0 makes us wider (adds width), <1.0 makes us narrower (adds height) - if (aspectChange>1.0) - m_pushConstants.regular.scale[0] *= aspectChange; + if (aspectChange<1.0) + m_pushConstants.regular.scale = -1.f/aspectChange; else - m_pushConstants.regular.scale[1] /= aspectChange; + m_pushConstants.regular.scale = aspectChange; } // `CWindowPresenter::create` aspect ratio ranges and min/max relationships help us stay valid assert(all(minResolution<=targetResolution)&&all(targetResolution<=maxResolution)); @@ -271,7 +269,7 @@ bool CWindowPresenter::beginRenderpass_impl() .offset = {static_cast(viewport->x), static_cast(viewport->y)}, .extent = {resolution.x,resolution.y} }}; - cb->setScissor(defaultScisors); + success = success && cb->setScissor(defaultScisors); const VkRect2D currentRenderArea = {.offset = {0,0}, .extent = defaultScisors->extent}; const IGPUCommandBuffer::SRenderpassBeginInfo info = { From c7c54d27b641dc207fe02c4ed3c6b59659c50073 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 22:42:57 +0100 Subject: [PATCH 189/219] finish some of the headless mode stuff --- .../app_resources/present/default.hlsl | 2 ++ 40_PathTracer/main.cpp | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index c5f88504d..1b384e495 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -37,6 +37,8 @@ float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 else uv.y *= regular.scale; uv.z = pc.layer; + if (any(regular._min>uv.xy || regular._max Date: Mon, 26 Jan 2026 23:22:09 +0100 Subject: [PATCH 190/219] get the resizing to work --- .../app_resources/present/default.hlsl | 6 +--- .../renderer/present/CWindowPresenter.h | 2 +- .../shaders/present/push_constants.hlsl | 4 +-- .../src/renderer/present/CWindowPresenter.cpp | 31 +++++++++---------- 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index 1b384e495..8c29734a4 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -31,11 +31,7 @@ float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 else { const SDefaultResolvePushConstants::Regular regular = pc.operator SDefaultResolvePushConstants::Regular(); - uv.xy = vxAttr.uv; - if (regular.scale<0.f) - uv.y *= -regular.scale; - else - uv.y *= regular.scale; + uv.xy = vxAttr.uv*regular.scale; uv.z = pc.layer; if (any(regular._min>uv.xy || regular._max winMgr = nullptr; // for the UI, 1080p with 50% scaling - hlsl::uint16_t2 minResolution = {1264,698}; + hlsl::uint16_t2 minResolution = {1248,688}; }; struct SCreationParams : IPresenter::SCachedCreationParams, SCachedCreationParams { diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl index cc90da273..c6551fad9 100644 --- a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -18,8 +18,8 @@ struct SDefaultResolvePushConstants struct Regular { - // if positive then we multiply X, otherwise Y - float32_t scale; + // if more than 1.f + float32_t2 scale; // to visualize what will get cropped out float32_t2 _min,_max; }; diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 051c8054b..07aef440b 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -60,7 +60,7 @@ smart_refctd_ptr CWindowPresenter::create(SCreationParams&& _p { const auto& primDpyInfo = params.winMgr->getPrimaryDisplayInfo(); // subtract window border/decoration elements - params.maxResolution = hlsl::max(int32_t2(primDpyInfo.resX,primDpyInfo.resY)-int32_t2(32,16),int32_t2(0,0)); + params.maxResolution = hlsl::max(int32_t2(primDpyInfo.resX,primDpyInfo.resY)-int32_t2(32,32),int32_t2(0,0)); // we add an additional constraint that any dimension of maxResolution cannot be less than any dimension of minResolution // e.g. max resolution Height cannot be less than min resolution width if (hlsl::any(hlsl::less()(params.maxResolution.xxyy,params.minResolution.xyxy))) @@ -191,7 +191,6 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn const auto maxResolution = m_construction.maxResolution; uint16_t2 targetResolution = m_pushConstants.isCubemap ? maxResolution:sessionParams.uniforms.renderSize; - const auto aspectRatio = double(targetResolution.x)/double(targetResolution.y); if (m_pushConstants.isCubemap) { // TODO: build default perspective projection matrix given aspect ratio and smaller axis (or diagonal) FOV of the viewer @@ -202,21 +201,21 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn m_pushConstants.regular._min = float32_t2(sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; m_pushConstants.regular._max = float32_t2(sessionParams.cropResolution+sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; // prevent extreme window size - const auto clampedAspectRatio = hlsl::clamp(aspectRatio,m_construction.aspectRatioRange[0],m_construction.aspectRatioRange[1]); - const float64_t asConv = core::min(1.0/clampedAspectRatio,clampedAspectRatio); - const uint8_t largeDim = targetResolution.x1.0 makes us wider (adds width), <1.0 makes us narrower (adds height) - if (aspectChange<1.0) - m_pushConstants.regular.scale = -1.f/aspectChange; - else - m_pushConstants.regular.scale = aspectChange; + const auto tmp = float64_t(minResolution[i])/float64_t(targetResolution[i]); + if (tmp>1.0) + { + targetResolution[i] = minResolution[i]; + m_pushConstants.regular.scale[i] = tmp; + } } // `CWindowPresenter::create` aspect ratio ranges and min/max relationships help us stay valid assert(all(minResolution<=targetResolution)&&all(targetResolution<=maxResolution)); @@ -289,7 +288,7 @@ bool CWindowPresenter::beginRenderpass_impl() success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&ds); } success = success && cb->pushConstants(layout,ShaderStage::ESS_FRAGMENT,0,sizeof(m_pushConstants),&m_pushConstants); - ext::FullScreenTriangle::recordDrawCall(cb); +// ext::FullScreenTriangle::recordDrawCall(cb); success = success && cb->endDebugMarker(); return success; From 4a011d55c99715b2a6ca769cb1824af20b68764f Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 00:36:19 +0100 Subject: [PATCH 191/219] post merge fixes --- 40_PathTracer/CMakeLists.txt | 3 --- 40_PathTracer/app_resources/present/default.hlsl | 14 +++++++------- .../renderer/shaders/present/push_constants.hlsl | 4 ++-- .../include/renderer/shaders/session.hlsl | 2 +- 40_PathTracer/src/renderer/CSession.cpp | 6 +++--- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 022910c9b..d491e4446 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -22,8 +22,6 @@ list(APPEND NBL_EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/resolve/CBasicRWMCResolver.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/present/CWindowPresenter.cpp" ) -list(APPEND NBL_ -) nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "${}" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") @@ -61,7 +59,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index 8c29734a4..d063292da 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -2,38 +2,38 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - -#include "renderer/shaders/session.hlsl" #include "renderer/shaders/present/push_constants.hlsl" // vertex shader is provided by the fullScreenTriangle extension #include +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t SessionDSIndex = 0; +#include "renderer/shaders/session.hlsl" using namespace nbl::hlsl; using namespace nbl::this_example; using namespace ext::FullScreenTriangle; -[[vk::push_constant]] DefaultResolvePushConstants pc; +[[vk::push_constant]] SDefaultResolvePushConstants pc; [shader("pixel")] float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 { - float32_t3 tint = promote(1); + float32_t3 tint = promote(1.f); float32_t3 uv; if (pc.isCubemap) { const float32_t4 ndc = float32_t4(vxAttr.uv*2.f-float32_t2(1,1),1.f,1.f); - float32_t4 tmp = mul(pc.operator SDefaultResolvePushConstants::Cubemap().invProjView,ndc); + float32_t4 tmp = mul(pc.cubemap().invProjView,ndc); float32_t3 dir = tmp.xyz/tmp.www; // TODO: convert dir to cubemap face, and the UV coord tint = float32_t3(1,0,1); // right now go magenta error colour } else { - const SDefaultResolvePushConstants::Regular regular = pc.operator SDefaultResolvePushConstants::Regular(); + const SDefaultResolvePushConstants::Regular regular = pc.regular(); uv.xy = vxAttr.uv*regular.scale; uv.z = pc.layer; - if (any(regular._min>uv.xy || regular._maxuv.xy) || any(regular._max(SensorDSBindings::SampledImageIndex::Count); + NBL_CONSTEXPR_STATIC_INLINE uint32_t AsSampledImages = SensorDSBindings::Samplers-SensorDSBindings::ScrambleKey; }; diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 32ef14978..a20a8a7ae 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -170,9 +170,9 @@ bool CSession::init(video::IGPUCommandBuffer* cb) addImageWrite(SensorDSBindings::AsSampledImages,scrambleKeyView); writes.back().count = SensorDSBindingCounts::AsSampledImages; { - const auto lastInfoIx = infos.size()-1; - infos.resize(lastInfoIx+SensorDSBindingCounts::AsSampledImages,infos.back()); - const auto viewInfos = infos.data()+lastInfoIx; + const auto oldSize = infos.size(); + infos.resize(oldSize +SensorDSBindingCounts::AsSampledImages,infos.back()); + const auto viewInfos = infos.data()+oldSize-1; using index_e = SensorDSBindings::SampledImageIndex; viewInfos[uint8_t(index_e::ScrambleKey)].desc = scrambleKeyView; viewInfos[uint8_t(index_e::SampleCount)].desc = sampleCountView; From ae80ef181a1fabb4cd142e2df40ebe4557b48123 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 10:08:59 +0100 Subject: [PATCH 192/219] handle window closing in ex 40 --- 40_PathTracer/include/renderer/present/CWindowPresenter.h | 2 +- 40_PathTracer/main.cpp | 4 ++++ 40_PathTracer/src/renderer/present/CWindowPresenter.cpp | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h index 5b27c291f..08715445f 100644 --- a/40_PathTracer/include/renderer/present/CWindowPresenter.h +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -51,7 +51,7 @@ class CWindowPresenter : public IPresenter inline const video::IGPURenderpass* getRenderpass() const {return getSwapchainResources()->getRenderpass();} // - bool irrecoverable() const {return m_construction.surface->irrecoverable();} + bool irrecoverable() const {return m_construction.surface->irrecoverable() || !m_construction.surface->isWindowOpen();} protected: using surface_t = video::CSimpleResizeSurface; diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index 0215e5e66..f4dbdd84c 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -395,7 +395,11 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes { skip = false; if (m_sessionQueue.empty()) + { + if (!m_args.headless) + handleInputs(); return; + } session = m_sessionQueue.front().get(); // init m_utils->autoSubmit({.queue=getGraphicsQueue()},[&session](SIntendedSubmitInfo& info)->bool diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 07aef440b..87cd816b7 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -288,7 +288,7 @@ bool CWindowPresenter::beginRenderpass_impl() success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&ds); } success = success && cb->pushConstants(layout,ShaderStage::ESS_FRAGMENT,0,sizeof(m_pushConstants),&m_pushConstants); -// ext::FullScreenTriangle::recordDrawCall(cb); + ext::FullScreenTriangle::recordDrawCall(cb); success = success && cb->endDebugMarker(); return success; From 4d917ddeb3de1109ccf7250a591b0550235979aa Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 10:12:22 +0100 Subject: [PATCH 193/219] changes to ex 71 to align with RT pipeline improvements --- 71_RayTracingPipeline/main.cpp | 92 +++++++++++++--------------------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 307fd3e99..9f061daa5 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -31,17 +31,6 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui "Spot" }; - struct ShaderBindingTable - { - SBufferRange raygenGroupRange; - SBufferRange hitGroupsRange; - uint32_t hitGroupsStride; - SBufferRange missGroupsRange; - uint32_t missGroupsStride; - SBufferRange callableGroupsRange; - uint32_t callableGroupsStride; - }; - public: inline RaytracingPipelineApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) @@ -672,22 +661,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); if (m_useIndirectCommand) - { - cmdbuf->traceRaysIndirect( - SBufferBinding{ - .offset = 0, - .buffer = m_indirectBuffer, - }); - } + cmdbuf->traceRaysIndirect({.offset=0,.buffer=m_indirectBuffer}); else - { - cmdbuf->traceRays( - m_shaderBindingTable.raygenGroupRange, - m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, - m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, - m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, - WIN_W, WIN_H, 1); - } + cmdbuf->traceRays(m_shaderBindingTable,WIN_W,WIN_H,1); } // pipeline barrier @@ -916,22 +892,22 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui bool createIndirectBuffer() { - const auto getBufferRangeAddress = [](const SBufferRange& range) + const auto getBufferRangeAddress = [](const SBufferRange& range) { return range.buffer->getDeviceAddress() + range.offset; }; const auto command = TraceRaysIndirectCommand_t{ - .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygenGroupRange), - .raygenShaderRecordSize = m_shaderBindingTable.raygenGroupRange.size, - .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.missGroupsRange), - .missShaderBindingTableSize = m_shaderBindingTable.missGroupsRange.size, - .missShaderBindingTableStride = m_shaderBindingTable.missGroupsStride, - .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hitGroupsRange), - .hitShaderBindingTableSize = m_shaderBindingTable.hitGroupsRange.size, - .hitShaderBindingTableStride = m_shaderBindingTable.hitGroupsStride, - .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callableGroupsRange), - .callableShaderBindingTableSize = m_shaderBindingTable.callableGroupsRange.size, - .callableShaderBindingTableStride = m_shaderBindingTable.callableGroupsStride, + .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygen), + .raygenShaderRecordSize = m_shaderBindingTable.raygen.size, + .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.miss.range), + .missShaderBindingTableSize = m_shaderBindingTable.miss.range.size, + .missShaderBindingTableStride = m_shaderBindingTable.miss.stride, + .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hit.range), + .hitShaderBindingTableSize = m_shaderBindingTable.hit.range.size, + .hitShaderBindingTableStride = m_shaderBindingTable.hit.stride, + .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callable.range), + .callableShaderBindingTableSize = m_shaderBindingTable.callable.range.size, + .callableShaderBindingTableStride = m_shaderBindingTable.callable.stride, .width = WIN_W, .height = WIN_H, .depth = 1, @@ -972,15 +948,15 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto handleSizeAligned = nbl::core::alignUp(handleSize, limits.shaderGroupHandleAlignment); - auto& raygenRange = m_shaderBindingTable.raygenGroupRange; + auto& raygenRange = m_shaderBindingTable.raygen; - auto& hitRange = m_shaderBindingTable.hitGroupsRange; + auto& hitRange = m_shaderBindingTable.hit.range; const auto hitHandles = pipeline->getHitHandles(); - auto& missRange = m_shaderBindingTable.missGroupsRange; + auto& missRange = m_shaderBindingTable.miss.range; const auto missHandles = pipeline->getMissHandles(); - auto& callableRange = m_shaderBindingTable.callableGroupsRange; + auto& callableRange = m_shaderBindingTable.callable.range; const auto callableHandles = pipeline->getCallableHandles(); raygenRange = { @@ -992,19 +968,19 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui .offset = raygenRange.size, .size = core::alignUp(missHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.missGroupsStride = handleSizeAligned; + m_shaderBindingTable.miss.stride = handleSizeAligned; hitRange = { .offset = missRange.offset + missRange.size, .size = core::alignUp(hitHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.hitGroupsStride = handleSizeAligned; + m_shaderBindingTable.hit.stride = handleSizeAligned; callableRange = { .offset = hitRange.offset + hitRange.size, .size = core::alignUp(callableHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.callableGroupsStride = handleSizeAligned; + m_shaderBindingTable.callable.stride = handleSizeAligned; const auto bufferSize = raygenRange.size + missRange.size + hitRange.size + callableRange.size; @@ -1021,7 +997,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : missHandles) { memcpy(pMissData, &handle, handleSize); - pMissData += m_shaderBindingTable.missGroupsStride; + pMissData += m_shaderBindingTable.miss.stride; } // copy hit region @@ -1029,7 +1005,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : hitHandles) { memcpy(pHitData, &handle, handleSize); - pHitData += m_shaderBindingTable.hitGroupsStride; + pHitData += m_shaderBindingTable.miss.stride; } // copy callable region @@ -1037,17 +1013,21 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : callableHandles) { memcpy(pCallableData, &handle, handleSize); - pCallableData += m_shaderBindingTable.callableGroupsStride; + pCallableData += m_shaderBindingTable.callable.stride; } { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; - params.size = bufferSize; - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(raygenRange.buffer); - missRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - hitRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - callableRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + smart_refctd_ptr buffer; + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; + params.size = bufferSize; + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(buffer); + } + raygenRange.buffer = smart_refctd_ptr(buffer); + missRange.buffer = smart_refctd_ptr(raygenRange.buffer); + hitRange.buffer = smart_refctd_ptr(raygenRange.buffer); + callableRange.buffer = smart_refctd_ptr(raygenRange.buffer); } return true; @@ -1510,7 +1490,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui smart_refctd_ptr m_rayTracingDs; smart_refctd_ptr m_rayTracingPipeline; uint64_t m_rayTracingStackSize; - ShaderBindingTable m_shaderBindingTable; + IGPURayTracingPipeline::SShaderBindingTable m_shaderBindingTable; smart_refctd_ptr m_presentDs; smart_refctd_ptr m_presentDsPool; From c2ef64607b33be5d7a38204d308ec6ecd940d94b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 11:25:03 +0100 Subject: [PATCH 194/219] precompile fri ext --- 40_PathTracer/CMakeLists.txt | 4 ++-- 40_PathTracer/app_resources/present/default.hlsl | 4 ++++ .../src/renderer/present/CWindowPresenter.cpp | 16 +++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index d491e4446..91b4a9b6d 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -50,9 +50,9 @@ set(JSON [=[ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" -I "${CMAKE_CURRENT_SOURCE_DIR}" -# -I "${CMAKE_CURRENT_SOURCE_DIR}/../include" # TODO: Arek for some reason, the `-I` are relative to each `.hlsl` file getting compiled, and not this CMAke file - -I "D:\\work\\Nabla-master\\examples_tests\\40_PathTracer\\include" + -I "${CMAKE_CURRENT_SOURCE_DIR}/include" -T lib_${SM} ) diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index d063292da..993e24056 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -38,3 +38,7 @@ float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 } return float32_t4(gSensorTextures[pc.imageIndex].SampleLevel(gSensorSamplers[0],uv,0.f).rgb*tint,1.0f); } + +// Precompile the FullScreenTriangle vertex shader along with this fragment shader. +#define NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT present_fri_ext +#include diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 07aef440b..768635f67 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -141,12 +141,6 @@ bool CWindowPresenter::init_impl(CRenderer* renderer) // auto* const assMan = IPresenter::getCreationParams().assMan.get(); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(assMan,device,logger.get().get()); - if (!fsTriProtoPPln) - { - logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); - return false; - } // present pipeline layout smart_refctd_ptr layout; @@ -166,9 +160,13 @@ bool CWindowPresenter::init_impl(CRenderer* renderer) { const IGPUPipelineBase::SShaderSpecInfo fragSpec = { .shader = shader.get(), - .entryPoint = "present_default", + .entryPoint = "present_default" }; - m_present = fsTriProtoPPln.createPipeline(fragSpec,layout.get(),getRenderpass()); + + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPln(shader, "present_fri_ext"); + if (!fsTriProtoPln) { logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); return false; } + m_present = fsTriProtoPln.createPipeline(fragSpec, layout.get(), getRenderpass()); + if (!m_present) logger.log("`CWindowPresenter::create` failed to create Graphics Pipeline!",ILogger::ELL_ERROR); } @@ -294,4 +292,4 @@ bool CWindowPresenter::beginRenderpass_impl() return success; } -} \ No newline at end of file +} From 5a6727dfb7776ec51725e4bede1c4771c9c038f2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 12:32:46 +0100 Subject: [PATCH 195/219] those 2 `-I` actually not needed --- 40_PathTracer/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 91b4a9b6d..09dc15db1 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -50,8 +50,6 @@ set(JSON [=[ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS - -I "${NBL_ROOT_PATH}/include" - -I "${CMAKE_CURRENT_SOURCE_DIR}" -I "${CMAKE_CURRENT_SOURCE_DIR}/include" -T lib_${SM} ) From cb7b0e75b8d5349b4a915182a5ccf65571af99eb Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 20:31:34 +0100 Subject: [PATCH 196/219] get to the SBT stage --- 40_PathTracer/include/renderer/CRenderer.h | 5 +- 40_PathTracer/include/renderer/CScene.h | 16 + .../include/renderer/shaders/scene.hlsl | 4 +- 40_PathTracer/main.cpp | 23 +- 40_PathTracer/src/renderer/CRenderer.cpp | 336 +++++++++++++++--- 71_RayTracingPipeline/main.cpp | 2 +- 6 files changed, 335 insertions(+), 51 deletions(-) diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 3b09d01db..9283b338e 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -114,8 +114,11 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // descriptor set for sensors core::smart_refctd_ptr sensorDSLayout; + // temporary + std::array,uint8_t(CSession::RenderMode::Count)> shaders; + std::array,uint8_t(CSession::RenderMode::Count)> renderingLayouts; // TODO - std::array,uint8_t(CSession::RenderMode::Count)> renderingPipelines; +// std::array,uint8_t(CSession::RenderMode::Count)> genericPipelines; // core::smart_refctd_ptr commandBuffers[FramesInFlight]; diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h index 60af60e06..babcd14cf 100644 --- a/40_PathTracer/include/renderer/CScene.h +++ b/40_PathTracer/include/renderer/CScene.h @@ -37,6 +37,15 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable // inline CRenderer* getRenderer() const {return m_construction.renderer.get();} + // + inline video::IGPURayTracingPipeline* getPipeline(const CSession::RenderMode mode) const + { + return m_construction.pipelines[static_cast(mode)].get(); + } + + // + inline const auto& getSBT(const CSession::RenderMode mode) const {return m_construction.sbts[static_cast(mode)];} + // inline const video::IGPUDescriptorSet* getDescriptorSet() const {return m_construction.sceneDS->getDescriptorSet();} @@ -57,6 +66,10 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable core::vector sensors; // backward link for reference counting core::smart_refctd_ptr renderer; + // specialized per-scene pipelines + core::smart_refctd_ptr pipelines[uint8_t(CSession::RenderMode::Count)]; + // + video::IGPURayTracingPipeline::SShaderBindingTable sbts[uint8_t(CSession::RenderMode::Count)]; // descriptor set for a scene shall contain sampled textures and compiled materials core::smart_refctd_ptr sceneDS; // main TLAS @@ -67,6 +80,9 @@ class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable // sensor list can be empty, we can just make one up as we go along inline operator bool() const { + for (uint8_t i=0; i(CSession::RenderMode::Count); i++) + if (const auto* pipeline=pipelines[i].get(); !pipeline || !sbts[i].valid(pipeline->getCreationFlags())) + return false; return renderer && sceneDS; } }; diff --git a/40_PathTracer/include/renderer/shaders/scene.hlsl b/40_PathTracer/include/renderer/shaders/scene.hlsl index d55bb9cb1..4418d3040 100644 --- a/40_PathTracer/include/renderer/shaders/scene.hlsl +++ b/40_PathTracer/include/renderer/shaders/scene.hlsl @@ -10,7 +10,7 @@ namespace this_example { struct SSceneUniforms { - struct SIndirectInit + struct SInit { // // bda_t pQuantizedSequence; @@ -19,7 +19,7 @@ struct SSceneUniforms hlsl::float32_t envmapPDFNormalizationFactor; hlsl::float16_t envmapScale; uint16_t unused; - } indirect; + } init; }; struct SceneDSBindings diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index f4dbdd84c..f2a85a357 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -102,6 +102,27 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes return retval; } + inline void filterDevices(nbl::core::set& physicalDevices) const override + { + device_base_t::filterDevices(physicalDevices); + std::erase_if(physicalDevices,[&](const IPhysicalDevice* device)->bool + { + const auto& props = device->getMemoryProperties(); + uint64_t largestVRAMHeap = 0; + using heap_flags_e = IDeviceMemoryAllocation::E_MEMORY_HEAP_FLAGS; + for (uint32_t h=0; hgetDirectVRAMAccessMemoryTypeBits(); + for (uint32_t t=0; t>t)&0x1u) && props.memoryHeaps[props.memoryTypes[t].heapIndex].size==largestVRAMHeap) + return false; + m_logger->log("Filtering out Device %p (%s) due to lack of ReBAR",ILogger::ELL_WARNING,device,device->getProperties().deviceName); + return true; + } + ); + } + inline nbl::core::vector getSurfaces() const override { if (m_args.headless) @@ -390,7 +411,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline void workLoopBody() override { CSession* session; - volatile bool skip = true; // skip using the debugger + volatile bool skip = false; // skip using the debugger for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f || skip;) { skip = false; diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 7feddfe86..96cab01bf 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -61,7 +61,6 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) // ILogicalDevice* device = params.utilities->getLogicalDevice(); - // limits // params.semaphore = device->createSemaphore(0); @@ -73,7 +72,6 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) using render_mode_e = CSession::RenderMode; // create the layouts - smart_refctd_ptr renderingLayouts[uint8_t(CSession::RenderMode::Count)]; { constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING;// | hlsl::ShaderStage::ESS_COMPUTE; constexpr auto RenderingStages = RTStages | hlsl::ShaderStage::ESS_COMPUTE; @@ -238,45 +236,22 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) setPCRange.operator()(render_mode_e::Debug); for (uint8_t t=0; tcreatePipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); + params.renderingLayouts[t] = device->createPipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); string debugName = to_string(static_cast(t))+"Rendering Pipeline Layout"; - if (checkNullObject(renderingLayouts[t],debugName)) + if (checkNullObject(params.renderingLayouts[t],debugName)) return nullptr; } } - // create the pipelines + // TODO: create the generic pipelines + params.shaders[uint8_t(render_mode_e::Previs)] = loadPrecompiledShader<"pathtrace_previs">(_params.assMan,device,logger); + params.shaders[uint8_t(render_mode_e::Beauty)] = loadPrecompiledShader<"pathtrace_beauty">(_params.assMan,device,logger); + params.shaders[uint8_t(render_mode_e::Debug)] = loadPrecompiledShader<"pathtrace_debug">(_params.assMan,device,logger); + for (auto i=0; i raygenShaders[uint8_t(render_mode_e::Count)] = {}; - raygenShaders[uint8_t(render_mode_e::Previs)] = loadPrecompiledShader<"pathtrace_previs">(_params.assMan,device,logger); - raygenShaders[uint8_t(render_mode_e::Beauty)] = loadPrecompiledShader<"pathtrace_beauty">(_params.assMan,device,logger); - raygenShaders[uint8_t(render_mode_e::Debug)] = loadPrecompiledShader<"pathtrace_debug">(_params.assMan,device,logger); - IGPURayTracingPipeline::SShaderSpecInfo missShaders[uint8_t(render_mode_e::Count)] = {}; - for (uint8_t m=0; mcreateRayTracingPipelines(nullptr,creationParams,params.renderingPipelines.data())) - { - logger.log("Failed to create Path Tracing Pipelines",ILogger::ELL_ERROR); - return nullptr; - } + logger.log("Failed to Load %s Shader!",ILogger::ELL_ERROR,system::to_string(static_cast(i))); + return nullptr; } // command buffers @@ -292,7 +267,6 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); } - core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& _params) { if (!_params) @@ -302,6 +276,7 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& auto converter = core::smart_refctd_ptr(_params.converter); CScene::SConstructorParams params = {std::move(_params)}; +// params.sceneBound = ; params.sensors = std::move(_params.load.sensors); params.renderer = smart_refctd_ptr(this); { @@ -314,18 +289,286 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& } params.sceneDS = make_smart_refctd_ptr(std::move(ds)); } + + constexpr auto RenderModeCount = uint8_t(CSession::RenderMode::Count); + // create the pipelines + { + IGPURayTracingPipeline::SCreationParams creationParams[RenderModeCount] = {}; + using creation_flags_e = IGPURayTracingPipeline::SCreationParams::FLAGS; + auto flags = creation_flags_e::NO_NULL_MISS_SHADERS; + { + IGPURayTracingPipeline::SShaderSpecInfo missShaders[RenderModeCount] = {}; + for (uint8_t m=0; mcreateRayTracingPipelines(nullptr,creationParams,params.pipelines)) + { + m_creation.logger.log("Failed to create Path Tracing Pipelines",ILogger::ELL_ERROR); + return nullptr; + } + } // new cache if none provided if (!converter) converter = CAssetConverter::create({.device=device,.optimizer={}}); - - // -// converter->reserve(); - // build the BLAS and TLAS + + smart_refctd_ptr ubo; { - // TODO + struct Buffers final + { + using render_mode_e = CSession::RenderMode; + inline operator std::span() const {return {&ubo.get(),1+RenderModeCount};} + + smart_refctd_ptr ubo; + smart_refctd_ptr sbts[RenderModeCount]; + } tmpBuffers; + // + using buffer_usage_e = IGPUBuffer::E_USAGE_FLAGS; + constexpr auto BasicBufferUsages = buffer_usage_e::EUF_SHADER_DEVICE_ADDRESS_BIT; + { + tmpBuffers.ubo = ICPUBuffer::create({{.size=sizeof(SSceneUniforms),.usage=BasicBufferUsages|buffer_usage_e::EUF_UNIFORM_BUFFER_BIT},nullptr}); + auto& uniforms = *reinterpret_cast(tmpBuffers.ubo->getPointer()); + uniforms.init = {}; // TODO: fill with stuff + tmpBuffers.ubo->setContentHash(tmpBuffers.ubo->computeContentHash()); + } + // SBT + const auto& limits = device->getPhysicalDevice()->getLimits(); + assert(limits.shaderGroupBaseAlignment>=limits.shaderGroupHandleAlignment); + constexpr auto HandleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; + const auto handleSizeAligned = nbl::core::alignUp(HandleSize,limits.shaderGroupHandleAlignment); + for (uint8_t i=0; igetHitHandles(); + const auto missHandles = pipeline->getMissHandles(); + const auto callableHandles = pipeline->getCallableHandles(); + // + { + class CVectorBacked final : public core::refctd_memory_resource + { + public: + inline CVectorBacked(const size_t reservation) + { + storage.reserve(reservation*HandleSize); + } + + inline void* allocate(size_t bytes, size_t alignment) override + { + assert(bytes==storage.size()); + return storage.data(); + } + inline void deallocate(void* p, size_t bytes, size_t alignment) override {storage = {};} + + core::vector storage; + }; + auto memRsc = core::make_smart_refctd_ptr(hitHandles.size()+missHandles.size()+callableHandles.size()+1); + { + // TODO: move to material compiler + core::LinearAddressAllocatorST allocator(nullptr,0,0,limits.shaderGroupBaseAlignment,0x7fff0000u); + auto copyShaderHandles = [&](const std::span handles)->SBufferRange + { + SBufferRange range = {.size=handles.size()*handleSizeAligned}; + range.offset = allocator.alloc_addr(range.size,limits.shaderGroupBaseAlignment); + memRsc->storage.resize(allocator.get_allocated_size()); + uint8_t* out = memRsc->storage.data()+range.offset; + for (const auto& handle : handles) + { + memcpy(out,&handle,HandleSize); + out += handleSizeAligned; + } + return range; + }; + auto& sbt = params.sbts[i]; + sbt.raygen = copyShaderHandles({&pipeline->getRaygen(),1}); + sbt.miss.range = copyShaderHandles(pipeline->getMissHandles()); + sbt.hit.range = copyShaderHandles(pipeline->getHitHandles()); + sbt.callable.range = copyShaderHandles(pipeline->getCallableHandles()); + sbt.miss.stride = sbt.hit.stride = sbt.callable.stride = handleSizeAligned; + } + auto& sbtBuff = tmpBuffers.sbts[i]; + sbtBuff = ICPUBuffer::create({ + { + .size=memRsc->storage.size(),.usage=BasicBufferUsages|buffer_usage_e::EUF_SHADER_BINDING_TABLE_BIT + }, + /*.data = */memRsc->storage.data(), + /*.memoryResource = */memRsc + },core::adopt_memory); + sbtBuff->setContentHash(sbtBuff->computeContentHash()); + } + } + + // customized setup + struct MyInputs : CAssetConverter::SInputs + { + // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all + inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override + { + assert(memoryBacked); + return memoryBacked->getObjectType()!=IDeviceMemoryBacked::EOT_BUFFER ? (~0u):rebarMemoryTypes; + } + + uint32_t rebarMemoryTypes; + } inputs = {}; + inputs.logger = m_creation.logger.get().get(); + inputs.rebarMemoryTypes = device->getPhysicalDevice()->getDirectVRAMAccessMemoryTypeBits(); + // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in + // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) + struct MyAllocator final : public IDeviceMemoryAllocator + { + ILogicalDevice* getDeviceForAllocations() const override {return device;} + + SAllocation allocate(const SAllocateInfo& info) override + { + auto retval = device->allocate(info); + // map what is mappable by default so ReBAR checks succeed + if (retval.isValid() && retval.memory->isMappable()) + retval.memory->map({.offset=0,.length=info.size}); + return retval; + } + + ILogicalDevice* device; + } myalloc; + myalloc.device = device; + inputs.allocator = &myalloc; + + // TODO: construct the TLASes + core::vector tmpTLASes; + { + std::get>(inputs.assets) = tmpBuffers; + std::get>(inputs.assets) = tmpTLASes; + } + + CAssetConverter::SReserveResult reservation = converter->reserve(inputs); + { + bool success = true; + auto check = [&](const CAssetConverter::SInputs::asset_span_t references)->void + { + auto objects = reservation.getGPUObjects(); + auto referenceIt = references.begin(); + for (auto& object : objects) + { + auto* reference = *(referenceIt++); + if (!reference) + continue; + + success = bool(object.value); + if (!success) + { + inputs.logger.log("Failed to convert a CPU object to GPU of type %s!",ILogger::ELL_ERROR,system::to_string(reference->getAssetType())); + return; + } + } + }; + check.template operator()(tmpBuffers); + if (!success) + return nullptr; + } + + // convert + { + smart_refctd_ptr scratchAlloc; + { + constexpr auto scratchUsages = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT|IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + + constexpr uint16_t MaxAlignment = 256; + constexpr uint64_t MinAllocationSize = 1024; + const auto scratchSize = core::alignUp(hlsl::max(reservation.getMaxASBuildScratchSize(false),MinAllocationSize),MaxAlignment); + + auto scratchBuffer = device->createBuffer({{.size=scratchSize,.usage=scratchUsages}}); + + auto reqs = scratchBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= device->getPhysicalDevice()->getDirectVRAMAccessMemoryTypeBits(); + + auto allocation = device->allocate(reqs,scratchBuffer.get(),IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + allocation.memory->map({.offset=0,.length=reqs.size}); + + scratchAlloc = make_smart_refctd_ptr( + SBufferRange{0ull,scratchSize,std::move(scratchBuffer)}, + core::allocator(), MaxAlignment, MinAllocationSize + ); + } + + constexpr auto CompBufferCount = 2; + + std::array,CompBufferCount> compBufs = {}; + std::array compBufInfos = {}; + { + constexpr auto RequiredFlags = IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT; + auto pool = device->createCommandPool(m_creation.computeQueue->getFamilyIndex(),RequiredFlags); + if (!pool || !pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs)) + { + inputs.logger.log("Failed to create Command Buffers for the Compute Queue!",ILogger::ELL_ERROR); + return nullptr; + } + compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i=0; icreateSemaphore(0u); + + // TODO: `SIntendedSubmitInfo transfer` as well, because of images + SIntendedSubmitInfo compute = {}; + compute.queue = m_creation.computeQueue; + compute.scratchCommandBuffers = compBufInfos; + compute.scratchSemaphore = { + .semaphore = compSema.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + }; + struct MyParams final : CAssetConverter::SConvertParams + { + inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + + uint8_t finalUser; + } cvtParam = {}; + cvtParam.utilities = m_creation.utilities.get(); + cvtParam.compute = &compute; + cvtParam.scratchForDeviceASBuild = scratchAlloc.get(); + cvtParam.finalUser = m_creation.graphicsQueue->getFamilyIndex(); + + auto future = reservation.convert(cvtParam); + if (future.copy()!=IQueue::RESULT::SUCCESS) + { + inputs.logger.log("Failed to await `CAssetConverter::SReserveResult::convert(...)` submission semaphore!",ILogger::ELL_ERROR); + return nullptr; + } + + const auto buffers = reservation.getGPUObjects(); + ubo = buffers[0].value; + for (uint8_t i=0; i& stRange)->void + { + stRange.range.buffer = stRange.range.size ? buffer:nullptr; + }; + params.sbts[i].raygen.buffer = buffer; + setSBTBuffer(params.sbts[i].miss); + setSBTBuffer(params.sbts[i].hit); + setSBTBuffer(params.sbts[i].callable); + } + } } - core::smart_refctd_ptr ubo; // write into DS { @@ -343,7 +586,7 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& }; infos.push_back(std::move(info)); }; - addWrite(SceneDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(SSceneUniforms),.buffer=ubo}); + addWrite(SceneDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(SSceneUniforms),.buffer=std::move(ubo)}); // TODO: Envmap // TODO: TLASes // TODO: Samplers @@ -352,7 +595,7 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& // TODO: Envmap Warp Map for (auto& write : writes) write.info = infos.data()+reinterpret_cast(write.info); -// device->updateDescriptorSets(writes,{}); + device->updateDescriptorSets(writes,{}); } #if 0 @@ -397,9 +640,10 @@ auto CRenderer::render(CSession* session) -> SSubmit if (!cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) return {}; + const auto* const scene = session->getConstructionParams().scene.get(); const auto mode = sessionParams.mode; const auto& sessionResources = session->getActiveResources(); - const auto* const pipeline = m_construction.renderingPipelines[static_cast(mode)].get(); + const auto* const pipeline = scene->getPipeline(mode); bool success; // push constants @@ -425,7 +669,7 @@ auto CRenderer::render(CSession* session) -> SSubmit } const auto renderSize = sessionParams.uniforms.renderSize; -// success = success && cb->traceRays({},{},0,{},0,{},0,renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); + success = success && cb->traceRays(scene->getSBT(mode),renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); if (success) return SSubmit(this,cb); diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 9f061daa5..43a7f5fd0 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -1367,7 +1367,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto future = reservation.convert(params); if (future.copy() != IQueue::RESULT::SUCCESS) { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + m_logger->log("Failed to await submission future!", ILogger::ELL_ERROR); return false; } // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! From 924b87023e3b2d2e7fb18cb30831cf11fa6df40c Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 20:56:30 +0100 Subject: [PATCH 197/219] houston we have a visual --- 40_PathTracer/src/renderer/CRenderer.cpp | 2 +- 40_PathTracer/src/renderer/present/CWindowPresenter.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index 96cab01bf..e40741a08 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -679,7 +679,7 @@ auto CRenderer::render(CSession* session) -> SSubmit IQueue::SSubmitInfo::SSemaphoreInfo CRenderer::SSubmit::operator()(std::span extraWaits) { - if (cb) + if (!cb || !cb->end()) return {}; const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 87cd816b7..9ffe8fb85 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "renderer/present/CWindowPresenter.h" +#include "renderer/shaders/session.hlsl" namespace nbl::this_example { @@ -234,7 +235,7 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn winMgr->show(window); m_pushConstants.layer = 0; // TODO: cubemaps and RWMC debug - m_pushConstants.imageIndex = 0; + m_pushConstants.imageIndex = uint8_t(SensorDSBindings::SampledImageIndex::Albedo); auto acquireResult = m_construction.surface->acquireNextImage(); *p_currentImageAcquire = {.semaphore=acquireResult.semaphore,.value=acquireResult.acquireCount}; From 2262b7eb109b08f4871d18e7b99298d60aa05ef4 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 22:06:06 +0100 Subject: [PATCH 198/219] fix all the little bugs --- 40_PathTracer/app_resources/present/default.hlsl | 2 ++ 40_PathTracer/include/renderer/CSession.h | 2 +- 40_PathTracer/include/renderer/present/IPresenter.h | 10 +++++----- 40_PathTracer/main.cpp | 5 +++-- 40_PathTracer/src/renderer/CRenderer.cpp | 2 +- .../src/renderer/present/CWindowPresenter.cpp | 12 ++++++++---- 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index d063292da..dc857fb2d 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -32,6 +32,8 @@ float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 { const SDefaultResolvePushConstants::Regular regular = pc.regular(); uv.xy = vxAttr.uv*regular.scale; + if (any(uv.xy>float32_t2(1,1))) + return promote(0.f); uv.z = pc.layer; if (any(regular._min>uv.xy) || any(regular._max::quiet_NaN(); + return 0.f; } // diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h index 909c08708..405e60289 100644 --- a/40_PathTracer/include/renderer/present/IPresenter.h +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -30,7 +30,7 @@ class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovab // inline bool init(CRenderer* renderer) { - if (!m_queue) + if (m_queue) return isInitialized(); auto& logger = m_creation.logger; @@ -192,11 +192,11 @@ class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovab private: SCachedCreationParams m_creation; - video::CThreadSafeQueueAdapter* m_queue; - core::smart_refctd_ptr m_semaphore; - std::array,CircularBufferSize> m_cmdbufs; + video::CThreadSafeQueueAdapter* m_queue = nullptr; + core::smart_refctd_ptr m_semaphore = {}; + std::array, CircularBufferSize> m_cmdbufs = {}; video::ISemaphore::SWaitInfo m_currentImageAcquire = {}; - core::smart_refctd_ptr m_currentSessionDS; + core::smart_refctd_ptr m_currentSessionDS = {}; uint64_t m_presentCount = 0; }; diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index f2a85a357..afa3df6ef 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -239,8 +239,8 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes sensors[1].constants.height = 360; sensors[1].mutableDefaults.cropOffsetX = 0; sensors[1].mutableDefaults.cropOffsetY = 0; - sensors[1].mutableDefaults.cropWidth = 0; - sensors[1].mutableDefaults.cropHeight = 0; + sensors[1].mutableDefaults.cropWidth = sensors[1].mutableDefaults.cropWidth; + sensors[1].mutableDefaults.cropHeight = sensors[1].mutableDefaults.cropHeight; } { sensors[2].mutableDefaults.cropWidth = 5120; @@ -250,6 +250,7 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes sensors[2].constants.width = sensors[2].mutableDefaults.cropWidth+2*sensors[2].mutableDefaults.cropOffsetX; sensors[2].constants.height = sensors[2].mutableDefaults.cropHeight+2*sensors[2].mutableDefaults.cropOffsetY; } + sensors.erase(sensors.begin()); for (const auto& sensor : sensors) m_sessionQueue.push( scene_daily_pt->createSession({ diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index e40741a08..ccb8d595d 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -296,8 +296,8 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& IGPURayTracingPipeline::SCreationParams creationParams[RenderModeCount] = {}; using creation_flags_e = IGPURayTracingPipeline::SCreationParams::FLAGS; auto flags = creation_flags_e::NO_NULL_MISS_SHADERS; + IGPURayTracingPipeline::SShaderSpecInfo missShaders[RenderModeCount] = {}; { - IGPURayTracingPipeline::SShaderSpecInfo missShaders[RenderModeCount] = {}; for (uint8_t m=0; m1.0) - { targetResolution[i] = minResolution[i]; - m_pushConstants.regular.scale[i] = tmp; - } } + // pad with darkness on the dimension thats too big + const double newAspectRatio = float64_t(targetResolution.x)/float64_t(targetResolution.y); + if (newAspectRatio>originalAspectRatio) + m_pushConstants.regular.scale[1] *= newAspectRatio/originalAspectRatio; + else + m_pushConstants.regular.scale[0] *= originalAspectRatio/newAspectRatio; // `CWindowPresenter::create` aspect ratio ranges and min/max relationships help us stay valid assert(all(minResolution<=targetResolution)&&all(targetResolution<=maxResolution)); } From 1db3221d1e11a9781d1d1c8dfacd59510204faf2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 12:03:26 +0700 Subject: [PATCH 199/219] init quantized sequence test --- 74_QuantizedSequenceTests/CMakeLists.txt | 68 ++++++++++++++++++ .../CQuantizedSequenceTester.h | 59 +++++++++++++++ .../app_resources/common.hlsl | 42 +++++++++++ .../quantizedSequenceTest.comp.hlsl | 19 +++++ 74_QuantizedSequenceTests/main.cpp | 71 +++++++++++++++++++ CMakeLists.txt | 2 + 6 files changed, 261 insertions(+) create mode 100644 74_QuantizedSequenceTests/CMakeLists.txt create mode 100644 74_QuantizedSequenceTests/CQuantizedSequenceTester.h create mode 100644 74_QuantizedSequenceTests/app_resources/common.hlsl create mode 100644 74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl create mode 100644 74_QuantizedSequenceTests/main.cpp diff --git a/74_QuantizedSequenceTests/CMakeLists.txt b/74_QuantizedSequenceTests/CMakeLists.txt new file mode 100644 index 000000000..feb9bf602 --- /dev/null +++ b/74_QuantizedSequenceTests/CMakeLists.txt @@ -0,0 +1,68 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/quantizedSequenceTest.comp.hlsl", + "KEY": "quantizedSequenceTest", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h new file mode 100644 index 000000000..f98111780 --- /dev/null +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -0,0 +1,59 @@ +#ifndef _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_TESTER_INCLUDED_ + +#define GLM_FORCE_RADIANS +#include +#include +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + +#include "nbl/examples/examples.hpp" +#include "app_resources/common.hlsl" +#include "nbl/examples/Tester/ITester.h" +#include +#include + +using namespace nbl; + +class CQuantizedSequenceTester final : public ITester +{ + using base_t = ITester; + +public: + CQuantizedSequenceTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; + +private: + QuantizedSequenceInputTestValues generateInputTestValues() override + { + std::uniform_real_distribution realDistribution(-1.0f, 1.0f); + std::uniform_real_distribution realDistribution01(0.0f, 1.0f); + std::uniform_int_distribution uint32Distribution(0, std::numeric_limits::max()); + + QuantizedSequenceInputTestValues testInput; + testInput.uintVec3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + + return testInput; + } + + QuantizedSequenceTestValues determineExpectedResults(const QuantizedSequenceInputTestValues& testInput) override + { + QuantizedSequenceTestValues expected; + + { + for (uint32_t i = 0; i < 3; i++) + expected.uintVec3[i] = testInput.uintVec3[i] >> 11u; + } + + return expected; + } + + void verifyTestResults(const QuantizedSequenceTestValues& expectedTestValues, const QuantizedSequenceTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override + { + verifyTestValue("get uint3", expectedTestValues.uintVec3, testValues.uintVec3, testIteration, seed, testType); + } + +}; + +#endif diff --git a/74_QuantizedSequenceTests/app_resources/common.hlsl b/74_QuantizedSequenceTests/app_resources/common.hlsl new file mode 100644 index 000000000..d83324b7a --- /dev/null +++ b/74_QuantizedSequenceTests/app_resources/common.hlsl @@ -0,0 +1,42 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_COMMON_INCLUDED_ + +#include + +using namespace nbl::hlsl; +struct QuantizedSequenceInputTestValues +{ + uint32_t3 uintVec3; + + uint32_t4 scrambleKey; +}; + +struct QuantizedSequenceTestValues +{ + uint32_t3 uintVec3; + + // pre decode scramble + float32_t3 vec3_predecode; + + // post decode scramble + uint32_t3 uintVec3_postdecode; +}; + +struct QuantizedSequenceTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(QuantizedSequenceInputTestValues) input, NBL_REF_ARG(QuantizedSequenceTestValues) output) + { + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uintVec3); + for (uint32_t i = 0; i < 3; i++) + output.uintVec3[i] = qs.get(i); + } + + } +}; + +#endif diff --git a/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl b/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl new file mode 100644 index 000000000..50a58bdde --- /dev/null +++ b/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl @@ -0,0 +1,19 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "common.hlsl" +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(256, 1, 1)] +[shader("compute")] +void main() +{ + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + QuantizedSequenceTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/74_QuantizedSequenceTests/main.cpp b/74_QuantizedSequenceTests/main.cpp new file mode 100644 index 000000000..5ddcfafe8 --- /dev/null +++ b/74_QuantizedSequenceTests/main.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "app_resources/common.hlsl" + +#include "CQuantizedSequenceTester.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +//using namespace glm; + +class QuantizedSequenceTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + QuantizedSequenceTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + { + CQuantizedSequenceTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"quantizedSequenceTest">(m_device.get()); + + CQuantizedSequenceTester quantizedSequenceTester(8); + quantizedSequenceTester.setupPipeline(pplnSetupData); + quantizedSequenceTester.performTestsAndVerifyResults("QuantizedSequenceTestLog.txt"); + } + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override {} + + bool keepRunning() override { return false; } +}; + +NBL_MAIN_FUNC(QuantizedSequenceTest) diff --git a/CMakeLists.txt b/CMakeLists.txt index ff22c4523..9a294a023 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,8 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(73_GeometryInspector) endif() + add_subdirectory(74_QuantizedSequenceTests) + # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From c40279336fabd344a84a333ab8459f03a5132c37 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 15:38:38 +0700 Subject: [PATCH 200/219] test create, get/set in qs --- .../CQuantizedSequenceTester.h | 26 ++++++++- .../app_resources/common.hlsl | 57 +++++++++++++++++-- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h index f98111780..878d21ac7 100644 --- a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -32,7 +32,10 @@ class CQuantizedSequenceTester final : public ITester uint32Distribution(0, std::numeric_limits::max()); QuantizedSequenceInputTestValues testInput; - testInput.uintVec3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.scalar = uint32Distribution(getRandomEngine()); + testInput.uvec2 = uint32_t2(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.uvec3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.uvec4 = uint32_t4(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); return testInput; } @@ -40,18 +43,35 @@ class CQuantizedSequenceTester final : public ITester> 16u; + } + { + for (uint32_t i = 0; i < 3; i++) + expected.uintDim3[i] = testInput.uvec3[i] >> 22u; + } + { + for (uint32_t i = 0; i < 4; i++) + expected.uintDim4[i] = testInput.uvec4[i] >> 24u; + } + expected.uintVec2_Dim2 = testInput.uvec2; { for (uint32_t i = 0; i < 3; i++) - expected.uintVec3[i] = testInput.uintVec3[i] >> 11u; + expected.uintVec2_Dim3[i] = testInput.uvec3[i] >> 11u; } + expected.uintVec3_Dim3 = testInput.uvec3; + expected.uintVec4_Dim4 = testInput.uvec4; + return expected; } void verifyTestResults(const QuantizedSequenceTestValues& expectedTestValues, const QuantizedSequenceTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { - verifyTestValue("get uint3", expectedTestValues.uintVec3, testValues.uintVec3, testIteration, seed, testType); + verifyTestValue("get uint3", expectedTestValues.uintVec2_Dim3, testValues.uintVec2_Dim3, testIteration, seed, testType); } }; diff --git a/74_QuantizedSequenceTests/app_resources/common.hlsl b/74_QuantizedSequenceTests/app_resources/common.hlsl index d83324b7a..beafa48bd 100644 --- a/74_QuantizedSequenceTests/app_resources/common.hlsl +++ b/74_QuantizedSequenceTests/app_resources/common.hlsl @@ -10,14 +10,26 @@ using namespace nbl::hlsl; struct QuantizedSequenceInputTestValues { - uint32_t3 uintVec3; + uint32_t scalar; + uint32_t2 uvec2; + uint32_t3 uvec3; + uint32_t4 uvec4; uint32_t4 scrambleKey; }; struct QuantizedSequenceTestValues { - uint32_t3 uintVec3; + uint32_t uintDim1; + uint32_t2 uintDim2; + uint32_t3 uintDim3; + uint32_t4 uintDim4; + + uint32_t2 uintVec2_Dim2; + uint32_t3 uintVec2_Dim3; + + uint32_t3 uintVec3_Dim3; + uint32_t4 uintVec4_Dim4; // pre decode scramble float32_t3 vec3_predecode; @@ -31,11 +43,46 @@ struct QuantizedSequenceTestExecutor void operator()(NBL_CONST_REF_ARG(QuantizedSequenceInputTestValues) input, NBL_REF_ARG(QuantizedSequenceTestValues) output) { { - sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uintVec3); + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.scalar); + output.uintDim1 = qs.get(0); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec2); + for (uint32_t i = 0; i < 2; i++) + output.uintDim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); + for (uint32_t i = 0; i < 3; i++) + output.uintDim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec4); + for (uint32_t i = 0; i < 4; i++) + output.uintDim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec2); + for (uint32_t i = 0; i < 2; i++) + output.uintVec2_Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); + for (uint32_t i = 0; i < 3; i++) + output.uintVec2_Dim3[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); for (uint32_t i = 0; i < 3; i++) - output.uintVec3[i] = qs.get(i); + output.uintVec3_Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec4); + for (uint32_t i = 0; i < 4; i++) + output.uintVec4_Dim4[i] = qs.get(i); } - } }; From 0d2afe0a4a877dde76817bb661b6d5804ac2b876 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 16:53:59 +0700 Subject: [PATCH 201/219] add tests for encode/decode --- .../CQuantizedSequenceTester.h | 39 +++++++++++++++++-- .../app_resources/common.hlsl | 20 ++++++++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h index 878d21ac7..09546306c 100644 --- a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -27,8 +27,7 @@ class CQuantizedSequenceTester final : public ITester realDistribution(-1.0f, 1.0f); - std::uniform_real_distribution realDistribution01(0.0f, 1.0f); + std::uniform_real_distribution realDistribution(0.0f, 1.0f); std::uniform_int_distribution uint32Distribution(0, std::numeric_limits::max()); QuantizedSequenceInputTestValues testInput; @@ -37,6 +36,10 @@ class CQuantizedSequenceTester final : public ITester> 11u; + expected.unorm3_predecode = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 21u) - 1u; + uint32_t3 stored, scrambleKey; + for (uint32_t i = 0; i < 3; i++) + { + stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> 11u; + scrambleKey[i] = testInput.scrambleKey3[i] >> 11u; + } + expected.unorm3_postdecode = float32_t3(stored ^ scrambleKey) * bit_cast(0x35000004u); + } + return expected; } void verifyTestResults(const QuantizedSequenceTestValues& expectedTestValues, const QuantizedSequenceTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { - verifyTestValue("get uint3", expectedTestValues.uintVec2_Dim3, testValues.uintVec2_Dim3, testIteration, seed, testType); + verifyTestValue("get uint from dim 1", expectedTestValues.uintDim1, testValues.uintDim1, testIteration, seed, testType); + verifyTestValue("get uint2 from dim 1", expectedTestValues.uintDim2, testValues.uintDim2, testIteration, seed, testType); + verifyTestValue("get uint3 from dim 1", expectedTestValues.uintDim3, testValues.uintDim3, testIteration, seed, testType); + verifyTestValue("get uint4 from dim 1", expectedTestValues.uintDim4, testValues.uintDim4, testIteration, seed, testType); + + verifyTestValue("get uint2 from dim 2", expectedTestValues.uintVec2_Dim2, testValues.uintVec2_Dim2, testIteration, seed, testType); + verifyTestValue("get uint3 from dim 2", expectedTestValues.uintVec2_Dim3, testValues.uintVec2_Dim3, testIteration, seed, testType); + + verifyTestValue("get uint3 from dim 3", expectedTestValues.uintVec3_Dim3, testValues.uintVec3_Dim3, testIteration, seed, testType); + verifyTestValue("get uint4 from dim 4", expectedTestValues.uintVec4_Dim4, testValues.uintVec4_Dim4, testIteration, seed, testType); + + verifyTestValue("encode/decode unorm3 from uint2 (fullwidth)", expectedTestValues.unorm3_predecode, testValues.unorm3_predecode, testIteration, seed, testType); + verifyTestValue("encode/decode unorm3 from uint2", expectedTestValues.unorm3_postdecode, testValues.unorm3_postdecode, testIteration, seed, testType); } }; diff --git a/74_QuantizedSequenceTests/app_resources/common.hlsl b/74_QuantizedSequenceTests/app_resources/common.hlsl index beafa48bd..b3a0849dd 100644 --- a/74_QuantizedSequenceTests/app_resources/common.hlsl +++ b/74_QuantizedSequenceTests/app_resources/common.hlsl @@ -15,7 +15,9 @@ struct QuantizedSequenceInputTestValues uint32_t3 uvec3; uint32_t4 uvec4; - uint32_t4 scrambleKey; + float32_t3 unorm3; + + uint32_t3 scrambleKey3; }; struct QuantizedSequenceTestValues @@ -32,16 +34,17 @@ struct QuantizedSequenceTestValues uint32_t4 uintVec4_Dim4; // pre decode scramble - float32_t3 vec3_predecode; + float32_t3 unorm3_predecode; // post decode scramble - uint32_t3 uintVec3_postdecode; + float32_t3 unorm3_postdecode; }; struct QuantizedSequenceTestExecutor { void operator()(NBL_CONST_REF_ARG(QuantizedSequenceInputTestValues) input, NBL_REF_ARG(QuantizedSequenceTestValues) output) { + // test get/set/create { sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.scalar); output.uintDim1 = qs.get(0); @@ -83,6 +86,17 @@ struct QuantizedSequenceTestExecutor for (uint32_t i = 0; i < 4; i++) output.uintVec4_Dim4[i] = qs.get(i); } + + // test encode/decode + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_predecode = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_postdecode = qs.template decode(key); + } } }; From 533a90e027fc263b6091fbc00eecd29a37f1d254 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 16:55:25 +0700 Subject: [PATCH 202/219] removed cmake builtin resource block --- 74_QuantizedSequenceTests/CMakeLists.txt | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/74_QuantizedSequenceTests/CMakeLists.txt b/74_QuantizedSequenceTests/CMakeLists.txt index feb9bf602..a8dfb6781 100644 --- a/74_QuantizedSequenceTests/CMakeLists.txt +++ b/74_QuantizedSequenceTests/CMakeLists.txt @@ -5,24 +5,6 @@ endif() nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() - if(MSVC) target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") else() From 53667051b8dbc53ab8273df1b716b7faa9d97b54 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 15:37:20 +0700 Subject: [PATCH 203/219] test create/set/get uint16 types --- .../CQuantizedSequenceTester.h | 64 +++++++++++++++---- .../app_resources/common.hlsl | 59 +++++++++++++++++ 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h index 09546306c..5dbaf6472 100644 --- a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -29,9 +29,15 @@ class CQuantizedSequenceTester final : public ITester realDistribution(0.0f, 1.0f); std::uniform_int_distribution uint32Distribution(0, std::numeric_limits::max()); + std::uniform_int_distribution uint16Distribution(0, std::numeric_limits::max()); QuantizedSequenceInputTestValues testInput; - testInput.scalar = uint32Distribution(getRandomEngine()); + testInput.scalar = uint16Distribution(getRandomEngine()); + testInput.u16vec2 = uint32_t2(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + testInput.u16vec3 = uint32_t3(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + testInput.u16vec4 = uint32_t4(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + + testInput.scalar16 = uint32Distribution(getRandomEngine()); testInput.uvec2 = uint32_t2(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); testInput.uvec3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); testInput.uvec4 = uint32_t4(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); @@ -46,6 +52,7 @@ class CQuantizedSequenceTester final : public ITester> 8u; + } + { + for (uint32_t i = 0; i < 3; i++) + expected.u16Dim3[i] = testInput.u16vec3[i] >> 11u; + } + { + for (uint32_t i = 0; i < 4; i++) + expected.u16Dim4[i] = testInput.u16vec4[i] >> 12u; + } + + expected.u16Vec2_Dim2 = testInput.u16vec2; + { + for (uint32_t i = 0; i < 4; i++) + expected.u16Vec2_Dim4[i] = testInput.u16vec4[i] >> 8u; + } + + expected.u16Vec3_Dim3 = testInput.u16vec3; + expected.u16Vec4_Dim4 = testInput.u16vec4; + + // test encode/decode { const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; uint32_t3 stored; @@ -92,19 +123,30 @@ class CQuantizedSequenceTester final : public ITester qs = sampling::QuantizedSequence::create(input.scalar16); + output.u16Dim1 = qs.get(0); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec2); + for (uint32_t i = 0; i < 2; i++) + output.u16Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec3); + for (uint32_t i = 0; i < 3; i++) + output.u16Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Dim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec2); + for (uint32_t i = 0; i < 2; i++) + output.u16Vec2_Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Vec2_Dim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec3); + for (uint32_t i = 0; i < 3; i++) + output.u16Vec3_Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Vec4_Dim4[i] = qs.get(i); + } + // test encode/decode { sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); From fd49d0a66fc43c6c1e282b1fd3a8943e2c584af4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 29 Jan 2026 09:41:56 +0100 Subject: [PATCH 204/219] cleanup --- 40_PathTracer/CMakeLists.txt | 13 +++++-------- 40_PathTracer/app_resources/present/default.hlsl | 4 ---- .../src/renderer/present/CWindowPresenter.cpp | 2 +- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 09dc15db1..8c0fbae51 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -1,20 +1,17 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - +include(common) set(NBL_INCLUDE_SERACH_DIRECTORIES "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/src" ) -list(APPEND NBL_LIBRARIES +set(NBL_LIBRARIES "${NBL_EXT_MITSUBA_LOADER_LIB}" + Nabla::ext::FullScreenTriangle imguizmo "${NBL_EXT_IMGUI_UI_LIB}" ) -list(APPEND NBL_EXAMPLE_SOURCES +set(NBL_EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/io/CSceneLoader.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CSession.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CScene.cpp" @@ -22,7 +19,7 @@ list(APPEND NBL_EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/resolve/CBasicRWMCResolver.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/present/CWindowPresenter.cpp" ) -nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "${}" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl index 993e24056..d063292da 100644 --- a/40_PathTracer/app_resources/present/default.hlsl +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -38,7 +38,3 @@ float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 } return float32_t4(gSensorTextures[pc.imageIndex].SampleLevel(gSensorSamplers[0],uv,0.f).rgb*tint,1.0f); } - -// Precompile the FullScreenTriangle vertex shader along with this fragment shader. -#define NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT present_fri_ext -#include diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index 44164463e..ec1711f03 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -163,7 +163,7 @@ bool CWindowPresenter::init_impl(CRenderer* renderer) .entryPoint = "present_default" }; - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPln(shader, "present_fri_ext"); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPln(assMan, device, logger.get().get()); if (!fsTriProtoPln) { logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); return false; } m_present = fsTriProtoPln.createPipeline(fragSpec, layout.get(), getRenderpass()); From 8485356fc4263232746b517d4eca602d56a16816 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 16:56:08 +0700 Subject: [PATCH 205/219] added more encode/decode tests --- .../CQuantizedSequenceTester.h | 165 +++++++++++++++++- .../app_resources/common.hlsl | 101 ++++++++++- 2 files changed, 252 insertions(+), 14 deletions(-) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h index 5dbaf6472..b8ab12145 100644 --- a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -42,9 +42,15 @@ class CQuantizedSequenceTester final : public ITester(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t1 stored; + stored[0] = uint32_t(testInput.unorm1[0] * multiplier); + expected.unorm1_post_u32 = float32_t1(stored ^ testInput.scrambleKey1) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 16u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * fullWidthMultiplier) >> discardBits; + expected.unorm2_pre_u32 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 16u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t2 stored, scrambleKey; + for (uint32_t i = 0; i < 2; i++) + { + stored[i] = uint32_t(testInput.unorm2[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey2[i] >> discardBits; + } + expected.unorm2_post_u32 = float32_t2(stored ^ scrambleKey) * bit_cast(0x37800080u); + } + { + const uint32_t bitsPerComponent = 10u; + const uint32_t discardBits = 32u - bitsPerComponent; const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; uint32_t3 stored; for (uint32_t i = 0; i < 3; i++) - stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier) >> 11u; - expected.unorm3_predecode = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier) >> discardBits; + expected.unorm3_pre_u32 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); } { - const uint32_t multiplier = (1u << 21u) - 1u; + const uint32_t bitsPerComponent = 10u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; uint32_t3 stored, scrambleKey; for (uint32_t i = 0; i < 3; i++) { - stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> 11u; - scrambleKey[i] = testInput.scrambleKey3[i] >> 11u; + stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey3[i] >> discardBits; } - expected.unorm3_postdecode = float32_t3(stored ^ scrambleKey) * bit_cast(0x35000004u); + expected.unorm3_post_u32 = float32_t3(stored ^ scrambleKey) * bit_cast(0x3a802008u); + } + { + const uint32_t bitsPerComponent = 8u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * fullWidthMultiplier) >> discardBits; + expected.unorm4_pre_u32 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 8u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t4 stored, scrambleKey; + for (uint32_t i = 0; i < 4; i++) + { + stored[i] = uint32_t(testInput.unorm4[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey4[i] >> discardBits; + } + expected.unorm4_post_u32 = float32_t4(stored ^ scrambleKey) * bit_cast(0x3b808081u); + } + + // test encode/decode uint32_tN storage, dim == N + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * fullWidthMultiplier); + expected.unorm2_pre_u32t2 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * multiplier); + expected.unorm2_post_u32t2 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier); + expected.unorm3_pre_u32t3 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * multiplier); + expected.unorm3_post_u32t3 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * fullWidthMultiplier); + expected.unorm4_pre_u32t4 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * multiplier); + expected.unorm4_post_u32t4 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + + // test encode/decode uint32_t2 storage, dim 3 + { + const uint32_t bitsPerComponent = 21u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier) >> discardBits; + expected.unorm3_pre_u32t2 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 21u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t3 stored, scrambleKey; + for (uint32_t i = 0; i < 3; i++) + { + stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey3[i] >> discardBits; + } + expected.unorm3_post_u32t2 = float32_t3(stored ^ scrambleKey) * bit_cast(0x35000004u); } return expected; @@ -145,8 +276,24 @@ class CQuantizedSequenceTester final : public ITester qs = sampling::QuantizedSequence::template encode(input.unorm1); + output.unorm1_pre_u32 = qs.template decode(input.scrambleKey1); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm1); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey1[0]); + output.unorm1_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + output.unorm2_pre_u32 = qs.template decode(input.scrambleKey2); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey2); + output.unorm2_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_pre_u32 = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + output.unorm4_pre_u32 = qs.template decode(input.scrambleKey4); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey4); + output.unorm4_post_u32 = qs.template decode(key); + } + + // test encode/decode uint32_tN storage, dim == N + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + output.unorm2_pre_u32t2 = qs.template decode(input.scrambleKey2); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey2); + output.unorm2_post_u32t2 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_pre_u32t3 = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_post_u32t3 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + output.unorm4_pre_u32t4 = qs.template decode(input.scrambleKey4); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey4); + output.unorm4_post_u32t4 = qs.template decode(key); + } + + // test encode/decode uint32_t2 storage, dim 3 { sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); - output.unorm3_predecode = qs.template decode(input.scrambleKey3); + output.unorm3_pre_u32t2 = qs.template decode(input.scrambleKey3); } { sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); - output.unorm3_postdecode = qs.template decode(key); + output.unorm3_post_u32t2 = qs.template decode(key); } } }; From 1e132d93944ededa39d938631067aace325f456e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 17:36:49 +0700 Subject: [PATCH 206/219] have ITester interface return if test pass --- common/include/nbl/examples/Tester/ITester.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 5482aea63..66cef6888 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -174,7 +174,7 @@ class ITester m_queue = m_device->getQueue(m_queueFamily, 0); } - void performTestsAndVerifyResults(const std::string& logFileName) + bool performTestsAndVerifyResults(const std::string& logFileName) { m_logFile.open(logFileName, std::ios::out | std::ios::trunc); if (!m_logFile.is_open()) @@ -201,12 +201,13 @@ class ITester core::vector cpuTestResults = performCpuTests(inputTestValues); core::vector gpuTestResults = performGpuTests(inputTestValues); - verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); + bool pass = verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); reloadSeed(); m_logFile.close(); + return pass; } virtual ~ITester() @@ -230,7 +231,7 @@ class ITester reloadSeed(); }; - virtual void verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; + virtual bool verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; virtual InputTestValues generateInputTestValues() = 0; @@ -307,13 +308,14 @@ class ITester } template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + bool verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference = 0.0) { if (compareTestValues(expectedVal, testVal, maxAllowedDifference)) - return; + return true; printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; } template @@ -370,13 +372,15 @@ class ITester return output; } - void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + bool verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) { + bool pass = true; for (int i = 0; i < m_testIterationCount; ++i) { - verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU); - verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU); + pass = verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU) && pass; + pass = verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU) && pass; } + return pass; } void reloadSeed() From 88e0fd1f75baf4bdd3ba8f9da039c1eb65f250d3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 17:37:20 +0700 Subject: [PATCH 207/219] refactor quantized sequence example --- .../CQuantizedSequenceTester.h | 85 ++++++++++--------- 74_QuantizedSequenceTests/main.cpp | 4 +- 2 files changed, 47 insertions(+), 42 deletions(-) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h index b8ab12145..d7d655db8 100644 --- a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -252,48 +252,51 @@ class CQuantizedSequenceTester final : public ITester Date: Thu, 29 Jan 2026 17:45:42 +0700 Subject: [PATCH 208/219] refactor old examples to return pass test --- 22_CppCompat/CIntrinsicsTester.h | 101 +++++++++-------- 22_CppCompat/CTgmathTester.h | 151 +++++++++++++------------ 22_CppCompat/main.cpp | 7 +- 59_QuaternionTests/CQuaternionTester.h | 35 +++--- 59_QuaternionTests/main.cpp | 3 +- 74_QuantizedSequenceTests/main.cpp | 3 +- 6 files changed, 154 insertions(+), 146 deletions(-) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index 00e343d90..2c5003b7b 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -200,60 +200,61 @@ class CIntrinsicsTester final : public ITestergetQueue(0, 0); m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 245a0ec13..4efc78358 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -131,43 +131,46 @@ class CQuaternionTester final : public ITester - void verifyScaledVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + bool verifyScaledVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) { if (nbl::hlsl::testing::orientationCompare(expectedVal, testVal, maxRelativeDifference) && nbl::hlsl::testing::vectorLengthCompare(expectedVal, testVal, maxAbsoluteDifference, maxRelativeDifference)) - return; + return true; printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; } template - void verifyVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + bool verifyVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference, const bool testOrientation) { if (compareVectorTestValues(expectedVal, testVal, maxAllowedDifference, testOrientation)) - return; + return true; printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; } template requires concepts::FloatingPointLikeVectorial diff --git a/59_QuaternionTests/main.cpp b/59_QuaternionTests/main.cpp index 07f44b9d6..00a60aef8 100644 --- a/59_QuaternionTests/main.cpp +++ b/59_QuaternionTests/main.cpp @@ -51,7 +51,8 @@ class QuaternionTest final : public application_templates::MonoDeviceApplication CQuaternionTester quaternionTester(8); quaternionTester.setupPipeline(pplnSetupData); - quaternionTester.performTestsAndVerifyResults("QuaternionTestLog.txt"); + if (!quaternionTester.performTestsAndVerifyResults("QuaternionTestLog.txt")) + return false; } // In contrast to fences, we just need one semaphore to rule all dispatches diff --git a/74_QuantizedSequenceTests/main.cpp b/74_QuantizedSequenceTests/main.cpp index 49ecea620..dbba8a35f 100644 --- a/74_QuantizedSequenceTests/main.cpp +++ b/74_QuantizedSequenceTests/main.cpp @@ -51,8 +51,7 @@ class QuantizedSequenceTest final : public application_templates::MonoDeviceAppl CQuantizedSequenceTester quantizedSequenceTester(8); quantizedSequenceTester.setupPipeline(pplnSetupData); - bool pass = quantizedSequenceTester.performTestsAndVerifyResults("QuantizedSequenceTestLog.txt"); - if (!pass) + if (!quantizedSequenceTester.performTestsAndVerifyResults("QuantizedSequenceTestLog.txt")) return false; } From f9e3554fcc0e105f7e5fa742ed91816125455cd6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 29 Jan 2026 12:57:04 +0100 Subject: [PATCH 209/219] update examples using fri ext --- 24_ColorSpaceTest/CMakeLists.txt | 1 + 30_ComputeShaderPathTracer/CMakeLists.txt | 1 + 71_RayTracingPipeline/CMakeLists.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index da95b3d8e..71b1dde16 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -4,6 +4,7 @@ if(NOT RES) endif() nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::FullScreenTriangle) if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) diff --git a/30_ComputeShaderPathTracer/CMakeLists.txt b/30_ComputeShaderPathTracer/CMakeLists.txt index 07b0fd396..1a0b0e9bd 100644 --- a/30_ComputeShaderPathTracer/CMakeLists.txt +++ b/30_ComputeShaderPathTracer/CMakeLists.txt @@ -11,6 +11,7 @@ if(NBL_BUILD_IMGUI) list(APPEND NBL_LIBRARIES imtestengine "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index ca361af31..250f7444e 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -11,6 +11,7 @@ if(NBL_BUILD_IMGUI) list(APPEND NBL_LIBRARIES imtestengine "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") From 63f0079781fe189c672297343b30308333646294 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 30 Jan 2026 10:41:15 +0700 Subject: [PATCH 210/219] fix examples broken by ITester change that weren't changed properly --- 14_Mortons/CTester.h | 240 +++++++++++++------------ 14_Mortons/main.cpp | 7 +- 22_CppCompat/CIntrinsicsTester.h | 1 + 22_CppCompat/CTgmathTester.h | 1 + 59_QuaternionTests/CQuaternionTester.h | 1 + 5 files changed, 129 insertions(+), 121 deletions(-) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index e25fa58a2..6b46ff54b 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -253,151 +253,153 @@ class CTester final : public ITester return expected; } - void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override + bool verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { + bool pass = true; // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); - verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType); + pass = verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType) && pass; + pass = verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType) && pass; // Morton Plus - verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); - verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); - verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); - verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); + pass = verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType) && pass; // Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); + pass = verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType) && pass; - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); + pass = verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType) && pass; - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); + pass = verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType) && pass; // Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); - verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType); + pass = verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType) && pass; // Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); + pass = verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType) && pass; + + pass = verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType) && pass; // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); // Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType) && pass; // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType) && pass; // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType) && pass; // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); // Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); + pass = verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType) && pass; - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); + pass = verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType) && pass; - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); + pass = verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType) && pass; // Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); + pass = verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType) && pass; - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); + pass = verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType) && pass; - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); + pass = verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType) && pass; // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType) && pass; // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType) && pass; //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); + pass = verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType) && pass; + pass = verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType) && pass; // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); + return pass; } }; @@ -473,17 +475,19 @@ class CTester2 final : public ITester(pplnSetupData)); - mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt"); + pass = mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt") && pass; } - return true; + return pass; } void onAppTerminated_impl() override diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index 2c5003b7b..a18177ab9 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -255,6 +255,7 @@ class CIntrinsicsTester final : public ITester From 301fb402e6d0d3c204b1da67e920283d6f9abca5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 30 Jan 2026 13:55:31 +0700 Subject: [PATCH 211/219] use &= to make bool usage look better --- 14_Mortons/CTester.h | 224 +++++++++--------- 14_Mortons/main.cpp | 4 +- 22_CppCompat/CIntrinsicsTester.h | 98 ++++---- 22_CppCompat/CTgmathTester.h | 148 ++++++------ 22_CppCompat/main.cpp | 4 +- 59_QuaternionTests/CQuaternionTester.h | 22 +- .../CQuantizedSequenceTester.h | 80 +++---- 7 files changed, 290 insertions(+), 290 deletions(-) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 6b46ff54b..2e81ef564 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -257,147 +257,147 @@ class CTester final : public ITester { bool pass = true; // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. - pass = verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType) && pass; - pass = verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType) && pass; + pass &= verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); + pass &= verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); + pass &= verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); + pass &= verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType); + pass &= verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType); + pass &= verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType); + pass &= verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType); + pass &= verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType); // Morton Plus - pass = verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); // Morton Minus - pass = verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); // Morton coordinate-wise equality - pass = verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType); // Morton coordinate-wise unsigned inequality - pass = verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType) && pass; - - pass = verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType) && pass; - - pass = verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); + + pass &= verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); + + pass &= verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); // Morton coordinate-wise signed inequality - pass = verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); // Morton left-shift - pass = verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); // Morton unsigned right-shift - pass = verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); // Morton signed right-shift - pass = verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); - pass = verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); - pass = verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType) && pass; - pass = verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType) && pass; + pass &= verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); return pass; } @@ -478,15 +478,15 @@ class CTester2 final : public ITester(pplnSetupData)); - pass = mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt") && pass; + pass &= mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt"); } return pass; diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index a18177ab9..724bac2e8 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -203,58 +203,58 @@ class CIntrinsicsTester final : public ITester Date: Fri, 30 Jan 2026 22:08:12 +0100 Subject: [PATCH 212/219] correct a small bug --- 40_PathTracer/main.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp index afa3df6ef..c86be6909 100644 --- a/40_PathTracer/main.cpp +++ b/40_PathTracer/main.cpp @@ -235,20 +235,21 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes // quick test code nbl::core::vector sensors(3,scene_daily_pt->getSensors().front()); { - sensors[1].constants.width = 640; - sensors[1].constants.height = 360; + sensors[1].mutableDefaults.cropWidth = 640; + sensors[1].mutableDefaults.cropHeight = 360; sensors[1].mutableDefaults.cropOffsetX = 0; sensors[1].mutableDefaults.cropOffsetY = 0; - sensors[1].mutableDefaults.cropWidth = sensors[1].mutableDefaults.cropWidth; - sensors[1].mutableDefaults.cropHeight = sensors[1].mutableDefaults.cropHeight; } { sensors[2].mutableDefaults.cropWidth = 5120; sensors[2].mutableDefaults.cropHeight = 2880; sensors[2].mutableDefaults.cropOffsetX = 128; sensors[2].mutableDefaults.cropOffsetY = 128; - sensors[2].constants.width = sensors[2].mutableDefaults.cropWidth+2*sensors[2].mutableDefaults.cropOffsetX; - sensors[2].constants.height = sensors[2].mutableDefaults.cropHeight+2*sensors[2].mutableDefaults.cropOffsetY; + } + for (auto i=1; i<3; i++) + { + sensors[i].constants.width = sensors[i].mutableDefaults.cropWidth+2*sensors[i].mutableDefaults.cropOffsetX; + sensors[i].constants.height = sensors[i].mutableDefaults.cropHeight+2*sensors[i].mutableDefaults.cropOffsetY; } sensors.erase(sensors.begin()); for (const auto& sensor : sensors) From 870dc8517033585b25f6f9f139f4611fad8f2c35 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 31 Jan 2026 14:10:27 +0100 Subject: [PATCH 213/219] adjust to comments --- 50.IESViewer/App.hpp | 64 ++++--- 50.IESViewer/AppEvent.cpp | 30 ++-- 50.IESViewer/AppInit.cpp | 107 ++++++++---- 50.IESViewer/AppRender.cpp | 125 ++++++++++---- 50.IESViewer/AppUI.cpp | 161 +++++++----------- 50.IESViewer/CMakeLists.txt | 2 + 50.IESViewer/CSimpleIESRenderer.hpp | 2 +- 50.IESViewer/IES.cpp | 61 ------- 50.IESViewer/IES.hpp | 87 +++++++++- 50.IESViewer/app_resources/common.hlsl | 17 +- 50.IESViewer/app_resources/ies.unified.hlsl | 123 +++++++------ 50.IESViewer/main.cpp | 8 +- .../include/nbl/examples/cameras/CCamera.hpp | 10 +- common/include/nbl/examples/examples.hpp | 16 +- .../geometry/CGeometryCreatorScene.hpp | 114 +++++++------ .../geometry/CSimpleDebugRenderer.hpp | 12 +- .../examples/shaders/geometry/unified.hlsl | 4 +- 17 files changed, 525 insertions(+), 418 deletions(-) diff --git a/50.IESViewer/App.hpp b/50.IESViewer/App.hpp index d5e05c6cb..4912b4b2c 100644 --- a/50.IESViewer/App.hpp +++ b/50.IESViewer/App.hpp @@ -31,8 +31,8 @@ concept AppIESByteCount = std::unsigned_integral; template concept AppIESContainer = std::ranges::sized_range && (std::same_as, float> || - std::same_as, IESTextureInfo>); -static_assert(alignof(IESTextureInfo) == 4u, "IESTextureInfo must be 4 byte aligned"); + std::same_as, hlsl::ies::IESTextureInfo>); +static_assert(alignof(hlsl::ies::IESTextureInfo) == 4u, "IESTextureInfo must be 4 byte aligned"); template concept AppIESBufferCreationAllowed = AppIESByteCount || AppIESContainer; @@ -88,6 +88,7 @@ class IESWindowedApplication : public virtual SimpleWindowedApplication return false; ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + swapchainParams.sharedParams.imageUsage |= IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; if (!swapchainParams.deduceFormat(m_physicalDevice)) return logFail("Could not choose a Surface Format for the Swapchain!"); @@ -156,6 +157,7 @@ class IESWindowedApplication : public virtual SimpleWindowedApplication return; const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { renderFrame(nextPresentationTimestamp) }; + onPostRenderFrame(rendered[0]); m_surface->present(m_currentImageAcquire.imageIndex, rendered); if (rendered->semaphore) m_framesInFlight.emplace_back(smart_refctd_ptr(rendered->semaphore), rendered->value); @@ -163,6 +165,8 @@ class IESWindowedApplication : public virtual SimpleWindowedApplication inline bool keepRunning() override final { + if (m_exitRequested) + return false; if (m_surface->irrecoverable()) return false; @@ -186,9 +190,11 @@ class IESWindowedApplication : public virtual SimpleWindowedApplication oracle.reportBeginFrameRecord(); } inline const auto& getCurrentAcquire() const { return m_currentImageAcquire; } + inline void requestExit() { m_exitRequested = true; } virtual const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const = 0; virtual video::IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) = 0; + virtual void onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) {} const hlsl::uint16_t2 m_initialResolution; const asset::E_FORMAT m_depthFormat; @@ -205,6 +211,7 @@ class IESWindowedApplication : public virtual SimpleWindowedApplication core::deque m_framesInFlight; video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; video::CDumbPresentationOracle oracle; + bool m_exitRequested = false; }; class IESViewer final : public IESWindowedApplication, public BuiltinResourcesApplication @@ -213,6 +220,12 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp using asset_base_t = BuiltinResourcesApplication; public: + static constexpr inline uint32_t AppWindowWidth = 669u * 2u; + static constexpr inline uint32_t AppWindowHeight = AppWindowWidth; + static constexpr inline asset::E_FORMAT AppDepthBufferFormat = asset::EF_UNKNOWN; + static constexpr inline const char* MediaEntry = "../../media"; + static constexpr inline const char* InputJsonFile = "../inputs.json"; + IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD); bool onAppInitialized(smart_refctd_ptr&& system) override; @@ -220,6 +233,7 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp protected: const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; + void onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) override; private: smart_refctd_ptr m_graphicsPipeline; @@ -228,7 +242,6 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp bool m_running = true; std::vector m_assets; - size_t m_activeAssetIx = 0; size_t m_realFrameIx = 0; smart_refctd_ptr m_semaphore; @@ -242,18 +255,11 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp uint32_t m_plot3DWidth = 640u; uint32_t m_plot3DHeight = 640u; float m_plotRadius = 100.0f; - float m_cameraMoveSpeed = 1.0f; - float m_cameraRotateSpeed = 1.0f; - float m_cameraFovDeg = 60.0f; - bool m_cameraControlEnabled = false; - bool m_cameraControlApplied = false; - bool m_fullscreen3D = false; - bool m_wireframeEnabled = false; - bool m_showOctaMapPreview = true; - bool m_showHints = true; - bool m_plot2DRectValid = false; - hlsl::float32_t2 m_plot2DRectMin = hlsl::float32_t2(0.f, 0.f); - hlsl::float32_t2 m_plot2DRectMax = hlsl::float32_t2(0.f, 0.f); + bool m_ciMode = false; + bool m_ciScreenshotDone = false; + uint32_t m_ciFrameCounter = 0u; + static constexpr uint32_t CiFramesBeforeCapture = 10u; + system::path m_ciScreenshotPath; std::vector m_assetLabels; std::vector m_candelaDirty; @@ -265,11 +271,28 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp smart_refctd_ptr descriptor; } ui; - struct { - IES::E_MODE view = IES::EM_CDC; - bitflag sphere = - bitflag(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | hlsl::this_example::ies::ESM_FALSE_COLOR; - } mode; + struct UIState + { + size_t activeAssetIx = 0; + float cameraMoveSpeed = 1.0f; + float cameraRotateSpeed = 1.0f; + float cameraFovDeg = 60.0f; + bool cameraControlEnabled = false; + bool cameraControlApplied = false; + bool wireframeEnabled = false; + bool showOctaMapPreview = true; + bool showHints = true; + bool plot2DRectValid = false; + hlsl::float32_t2 plot2DRectMin = hlsl::float32_t2(0.f, 0.f); + hlsl::float32_t2 plot2DRectMax = hlsl::float32_t2(0.f, 0.f); + + struct + { + IES::E_MODE view = IES::EM_CDC; + bitflag sphere = + bitflag(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | hlsl::this_example::ies::ESM_FALSE_COLOR; + } mode; + } uiState; void processMouse(const IMouseEventChannel::range_t& events); void processKeyboard(const IKeyboardEventChannel::range_t& events); @@ -279,6 +302,7 @@ class IESViewer final : public IESWindowedApplication, public BuiltinResourcesAp bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); bool recreate3DPlotFramebuffers(uint32_t width, uint32_t height); void applyWindowMode(); + bool parseCommandLine(); template requires AppIESBufferCreationAllowed diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp index 3fa8d056e..cbd5ba042 100644 --- a/50.IESViewer/AppEvent.cpp +++ b/50.IESViewer/AppEvent.cpp @@ -13,16 +13,16 @@ void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { auto* cursorControl = m_window ? m_window->getCursorControl() : nullptr; - if (!cursorControl || !m_plot2DRectValid) + if (!cursorControl || !uiState.plot2DRectValid) continue; const auto cursor = cursorControl->getPosition(); const float cursorX = static_cast(cursor.x); const float cursorY = static_cast(cursor.y); - if (cursorX < m_plot2DRectMin.x || cursorX > m_plot2DRectMax.x || - cursorY < m_plot2DRectMin.y || cursorY > m_plot2DRectMax.y) + if (cursorX < uiState.plot2DRectMin.x || cursorX > uiState.plot2DRectMax.x || + cursorY < uiState.plot2DRectMin.y || cursorY > uiState.plot2DRectMax.y) continue; - auto& ies = m_assets[m_activeAssetIx]; + auto& ies = m_assets[uiState.activeAssetIx]; const auto& accessor = ies.getProfile()->getAccessor(); auto impulse = ev.scrollEvent.verticalScroll * 0.02f; @@ -37,26 +37,26 @@ void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& e { const auto ev = *it; - if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) - { + if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + { if (ev.keyCode == nbl::ui::EKC_UP_ARROW) - m_activeAssetIx = std::clamp(m_activeAssetIx + 1, 0, m_assets.size() - 1u); + uiState.activeAssetIx = std::clamp(uiState.activeAssetIx + 1, 0, m_assets.size() - 1u); else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) - m_activeAssetIx = std::clamp(m_activeAssetIx - 1, 0, m_assets.size() - 1u); + uiState.activeAssetIx = std::clamp(uiState.activeAssetIx - 1, 0, m_assets.size() - 1u); - auto& ies = m_assets[m_activeAssetIx]; + auto& ies = m_assets[uiState.activeAssetIx]; if (ev.keyCode == nbl::ui::EKC_C) - mode.view = IES::EM_CDC; + uiState.mode.view = IES::EM_CDC; else if (ev.keyCode == nbl::ui::EKC_V) - mode.view = IES::EM_OCTAHEDRAL_MAP; - else if (ev.keyCode == nbl::ui::EKC_ESCAPE && m_cameraControlEnabled) - m_cameraControlEnabled = false; + uiState.mode.view = IES::EM_OCTAHEDRAL_MAP; + else if (ev.keyCode == nbl::ui::EKC_ESCAPE && uiState.cameraControlEnabled) + uiState.cameraControlEnabled = false; else if (ev.keyCode == nbl::ui::EKC_SPACE) - m_cameraControlEnabled = !m_cameraControlEnabled; + uiState.cameraControlEnabled = !uiState.cameraControlEnabled; if (ev.keyCode == nbl::ui::EKC_Q) - m_running = false; + requestExit(); } } } diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index d0517e2c3..059dc6da3 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -2,8 +2,10 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "argparse/argparse.hpp" #include "App.hpp" #include +#include #include #include #include "AppInputParser.hpp" @@ -12,21 +14,46 @@ #include "nbl/ext/ImGui/ImGui.h" #include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/this_example/builtin/build/spirv/keys.hpp" -#define MEDIA_ENTRY "../../media" -#define INPUT_JSON_FILE "../inputs.json" + +bool IESViewer::parseCommandLine() +{ + argparse::ArgumentParser parser("IESViewer"); + parser.add_argument("--ci") + .help("Run in CI mode: capture a screenshot after a few frames and exit.") + .default_value(false) + .implicit_value(true); + + try + { + parser.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& e) + { + if (m_logger) + m_logger->log("Failed to parse arguments: %s", system::ILogger::ELL_ERROR, e.what()); + return false; + } + + m_ciMode = parser.get("--ci"); + if (m_ciMode) + m_ciScreenshotPath = localOutputCWD / "iesviewer_ci.png"; + return true; +} bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { + if (!parseCommandLine()) + return false; if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - const auto media = absolute(path(MEDIA_ENTRY)); + const auto media = absolute(path(MediaEntry)); AppInputParser::Output out; AppInputParser parser(system::logger_opt_ptr(m_logger.get())); - if (!parser.parse(out, INPUT_JSON_FILE, media.string())) + if (!parser.parse(out, InputJsonFile, media.string())) return false; m_logger->log("Loading IES m_assets..", system::ILogger::ELL_INFO); @@ -77,9 +104,9 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) { auto start = std::chrono::high_resolution_clock::now(); - auto textureInfos = createBuffer(m_assets.size() * sizeof(IESTextureInfo), "IES Textures Info", false); + auto textureInfos = createBuffer(m_assets.size() * sizeof(hlsl::ies::IESTextureInfo), "IES Textures Info", false); if(!textureInfos) return false; - auto* textureInfosMapped = static_cast(textureInfos->getBoundMemory().memory->getMappedPointer()); + auto* textureInfosMapped = static_cast(textureInfos->getBoundMemory().memory->getMappedPointer()); for (size_t i = 0u; i < m_assets.size(); ++i) { @@ -87,13 +114,14 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); const auto& resolution = accessor.properties.optimalIESResolution; - textureInfosMapped[i] = CIESProfile::texture_t::createInfo(accessor, resolution, ies.flatten, true); + textureInfosMapped[i] = CIESProfile::texture_t::create(accessor.properties.maxCandelaValue, resolution).info; ies.buffers.textureInfo.buffer = textureInfos; - ies.buffers.textureInfo.offset = i * sizeof(IESTextureInfo); + ies.buffers.textureInfo.offset = i * sizeof(hlsl::ies::IESTextureInfo); #define CREATE_VIEW(VIEW, FORMAT, NAME) \ if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; + // Filled later by the compute pass (CdcCS) when candela data is marked dirty. CREATE_VIEW(ies.views.candelaOctahedralMap, asset::EF_R16_UNORM, "IES Candela Octahedral Map Image: ") #define CREATE_BUFFER(BUFFER, DATA, NAME) \ @@ -161,7 +189,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_VERTEX | stage_flags_t::ESS_COMPUTE; //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier - #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = MAX_IES_IMAGES, .immutableSamplers = nullptr } + #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = hlsl::this_example::MaxIesImages, .immutableSamplers = nullptr } #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } static constexpr auto bindings = std::to_array ({ @@ -356,32 +384,46 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) // geometries for 3D scene { - CGeometryCreatorScene::f_geometry_override_t injector = [&](auto* creator, auto addGeometry) + struct IESGeometryScene final : public CGeometryCreatorScene { - std::set> seen; - for (auto i = 0u; i < m_assets.size(); ++i) - { - auto& ies = m_assets[i]; - const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; - std::pair key{resolution.x, resolution.y}; - if (!seen.insert(key).second) - continue; - - auto name = "Grid (" + std::to_string(resolution.x) + " x " + std::to_string(resolution.y) + ")"; // (**) used to assing polygons! - addGeometry(name.c_str(), creator->createGrid({ resolution.x, resolution.y })); - } + explicit IESGeometryScene(const std::vector& assets) : m_assets(&assets) {} + + protected: + core::vector addGeometries(asset::CGeometryCreator* creator) const override + { + core::vector entries; + if (!m_assets) + return entries; + + std::set> seen; + for (const auto& ies : *m_assets) + { + const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; + std::pair key{ resolution.x, resolution.y }; + if (!seen.insert(key).second) + continue; + + std::string name = "Grid (" + std::to_string(resolution.x) + " x " + std::to_string(resolution.y) + ")"; // (**) used to assign polygons! + entries.push_back({ std::move(name), creator->createGrid({ resolution.x, resolution.y }) }); + } + + return entries; + } + + private: + const std::vector* m_assets = nullptr; }; const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; - m_scene = CGeometryCreatorScene::create( + m_scene = CGeometryCreatorScene::create( { .transferQueue = getTransferUpQueue(), .utilities = m_utils.get(), .logger = m_logger.get(), - .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies, - .geometryOverride = injector + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies }, - CSimpleIESRenderer::DefaultPolygonGeometryPatch + CSimpleIESRenderer::DefaultPolygonGeometryPatch, + m_assets ); const auto& geoParams = m_scene->getInitParams(); @@ -416,7 +458,6 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) ); using core_vec_t = std::remove_cv_t>; - using core_mat_t = std::remove_cv_t>; const auto toCoreVec3 = [](const float32_t3& v) -> core_vec_t { return core_vec_t(v.x, v.y, v.z); @@ -429,13 +470,11 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) const auto& params = m_frameBuffers3D.front()->getCreationParameters(); const float aspect = float(params.width) / float(params.height); - const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); - core_mat_t coreProjection; - std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); - camera = Camera(toCoreVec3(cameraPosition), toCoreVec3(cameraTarget), coreProjection, 1.069f, 0.4f); - m_cameraMoveSpeed = camera.getMoveSpeed(); - m_cameraRotateSpeed = camera.getRotateSpeed(); - m_cameraControlApplied = !m_cameraControlEnabled; + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera = Camera(toCoreVec3(cameraPosition), toCoreVec3(cameraTarget), projectionMatrix, 1.069f, 0.4f); + uiState.cameraMoveSpeed = camera.getMoveSpeed(); + uiState.cameraRotateSpeed = camera.getRotateSpeed(); + uiState.cameraControlApplied = !uiState.cameraControlEnabled; } // imGUI diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 2d9239284..17937cfa8 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -6,6 +6,7 @@ #include #include #include "nbl/ext/ImGui/ImGui.h" +#include "nbl/ext/ScreenShot/ScreenShot.h" #include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" @@ -80,11 +81,8 @@ bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) } const float aspect = float(width) / float(height); - const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); - using core_mat_t = std::remove_cv_t>; - core_mat_t coreProjection; - std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); - camera.setProjectionMatrix(coreProjection); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); return true; } @@ -98,9 +96,9 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto* imgui = static_cast(ui.it.get()); const bool windowFocused = m_window->hasInputFocus() || m_window->hasMouseFocus(); - if (!windowFocused && m_cameraControlEnabled) - m_cameraControlEnabled = false; - const bool wantCameraControl = m_cameraControlEnabled && windowFocused; + if (!windowFocused && uiState.cameraControlEnabled) + uiState.cameraControlEnabled = false; + const bool wantCameraControl = uiState.cameraControlEnabled && windowFocused; uint32_t renderWidth = m_window->getWidth(); uint32_t renderHeight = m_window->getHeight(); @@ -116,11 +114,11 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi if (renderWidth == 0u || renderHeight == 0u || m_window->isMinimized()) return {}; - if (m_cameraControlApplied != wantCameraControl) + if (uiState.cameraControlApplied != wantCameraControl) { - m_cameraControlApplied = wantCameraControl; - const float moveSpeed = wantCameraControl ? m_cameraMoveSpeed : 0.0f; - const float rotateSpeed = wantCameraControl ? m_cameraRotateSpeed : 0.0f; + uiState.cameraControlApplied = wantCameraControl; + const float moveSpeed = wantCameraControl ? uiState.cameraMoveSpeed : 0.0f; + const float rotateSpeed = wantCameraControl ? uiState.cameraRotateSpeed : 0.0f; camera.setMoveSpeed(moveSpeed); camera.setRotateSpeed(rotateSpeed); } @@ -205,7 +203,7 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi const bool cursorInsideWindow = cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; - cursorControl->setVisible(!(cursorInsideWindow || m_cameraControlApplied)); + cursorControl->setVisible(!(cursorInsideWindow || uiState.cameraControlApplied)); ext::imgui::UI::SUpdateParameters params = { .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), @@ -218,25 +216,27 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi imgui->update(params); } - if (m_cameraControlApplied) + if (uiState.cameraControlApplied) { if (auto* cursor = m_window->getCursorControl()) cursor->setRelativePosition(m_window.get(), {0.5f, 0.5f}); } - auto& ies = m_assets[m_activeAssetIx]; + auto& ies = m_assets[uiState.activeAssetIx]; const auto* profile = ies.getProfile(); const auto& accessor = profile->getAccessor(); + const auto hCount = accessor.hAnglesCount(); + const auto vCount = accessor.vAnglesCount(); const auto pc = hlsl::this_example::ies::CdcPC { .hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(), .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), .dataBDA = ies.buffers.data->getDeviceAddress(), .txtInfoBDA = ies.buffers.textureInfo.buffer->getDeviceAddress(), - .mode = mode.view, - .texIx = (uint32_t)m_activeAssetIx, - .hAnglesCount = accessor.hAnglesCount(), - .vAnglesCount = accessor.vAnglesCount(), + .mode = uiState.mode.view, + .texIx = static_cast(uiState.activeAssetIx), + .hAnglesCount = hCount, + .vAnglesCount = vCount, .zAngleDegreeRotation = ies.zDegree, .properties = accessor.getProperties() }; @@ -255,8 +255,8 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi auto* image = ies.getActiveImage(IES::EM_OCTAHEDRAL_MAP); bool needCompute = true; - if (m_activeAssetIx < m_candelaDirty.size()) - needCompute = m_candelaDirty[m_activeAssetIx]; + if (uiState.activeAssetIx < m_candelaDirty.size()) + needCompute = m_candelaDirty[uiState.activeAssetIx]; if (needCompute) { @@ -266,12 +266,12 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi cb->bindComputePipeline(m_computePipeline.get()); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(pc), &pc); - const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / WORKGROUP_DIMENSION + 1u; + const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / hlsl::this_example::WorkgroupDimension + 1u; cb->dispatch(xGroups, xGroups, 1); IES::barrier(cb, image); cb->endDebugMarker(); - if (m_activeAssetIx < m_candelaDirty.size()) - m_candelaDirty[m_activeAssetIx] = false; + if (uiState.activeAssetIx < m_candelaDirty.size()) + m_candelaDirty[uiState.activeAssetIx] = false; } // Graphics @@ -326,13 +326,13 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi scissor2D.extent = { extent.width, plotHeight }; auto pc2D = pc; - pc2D.mode = mode.view; + pc2D.mode = uiState.mode.view; cb->setViewport(0u, 1u, &viewport2D); cb->setScissor(0u, 1u, &scissor2D); cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc2D), &pc2D); ext::FullScreenTriangle::recordDrawCall(cb); - if (m_showOctaMapPreview) + if (uiState.showOctaMapPreview) { viewport2D.y = static_cast(plotHeight); scissor2D.offset.y = static_cast(plotHeight); @@ -366,14 +366,14 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi float32_t4x4 viewProjMatrix; // TODO: get rid of legacy matrices { - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + viewMatrix = camera.getViewMatrix(); + viewProjMatrix = camera.getConcatenatedMatrix(); } const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); - const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = m_plotRadius, .ds = m_descriptors[0u].get(), .texID = (uint16_t)m_activeAssetIx, .mode = mode.sphere.value, .wireframe = m_wireframeEnabled }); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = m_plotRadius, .ds = m_descriptors[0u].get(), .texID = static_cast(uiState.activeAssetIx), .mode = uiState.mode.sphere.value, .wireframe = uiState.wireframeEnabled }); // tear down scene every frame - m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + m_activeAssetIx; + m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + uiState.activeAssetIx; m_renderer->render(cb, viewParams, iesParams); } cb->endRenderPass(); @@ -450,6 +450,71 @@ IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::mi return retval; } +void IESViewer::onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) +{ + if (!m_ciMode || m_ciScreenshotDone) + return; + + ++m_ciFrameCounter; + if (m_ciFrameCounter < CiFramesBeforeCapture) + return; + + m_ciScreenshotDone = true; + + if (!m_device || !m_surface || !m_assetMgr) + { + requestExit(); + return; + } + + // Ensure the last submitted frame is finished before we read back. + m_device->waitIdle(); + + auto* scRes = static_cast(m_surface->getSwapchainResources()); + auto* fb = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + if (!fb) + { + m_logger->log("CI screenshot failed: missing swapchain framebuffer.", system::ILogger::ELL_ERROR); + requestExit(); + return; + } + + auto colorView = fb->getCreationParameters().colorAttachments[0u]; + if (!colorView) + { + m_logger->log("CI screenshot failed: missing swapchain color attachment.", system::ILogger::ELL_ERROR); + requestExit(); + return; + } + + { + const auto usage = colorView->getCreationParameters().image->getCreationParameters().usage; + const bool hasTransferSrc = usage.hasFlags(asset::IImage::EUF_TRANSFER_SRC_BIT); + m_logger->log( + "CI screenshot source usage: 0x%llx (transfer_src=%s).", + system::ILogger::ELL_INFO, + static_cast(usage.value), + hasTransferSrc ? "yes" : "no"); + } + + const bool ok = ext::ScreenShot::createScreenShot( + m_device.get(), + getGraphicsQueue(), + nullptr, + colorView.get(), + m_assetMgr.get(), + m_ciScreenshotPath, + asset::IImage::LAYOUT::PRESENT_SRC, + asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); + + if (ok) + m_logger->log("CI screenshot saved to \"%s\".", system::ILogger::ELL_INFO, m_ciScreenshotPath.string().c_str()); + else + m_logger->log("CI screenshot failed to save.", system::ILogger::ELL_ERROR); + + requestExit(); +} + const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::getDefaultSubpassDependencies() const { // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 57591b697..75aa31887 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -37,7 +37,7 @@ void IESViewer::uiListener() const bool cursorInsideWindow = cursorControl && cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; - ImGui::GetIO().MouseDrawCursor = cursorInsideWindow && !m_cameraControlEnabled; + ImGui::GetIO().MouseDrawCursor = cursorInsideWindow && !uiState.cameraControlEnabled; const ImVec2 bottomSize(viewportSize.x, viewportSize.y); const ImVec2 bottomPos(viewportPos.x, viewportPos.y); const auto legendColor = [&](float v, bool useFalseColor) -> ImU32 @@ -52,7 +52,7 @@ void IESViewer::uiListener() }; const auto showHint = [&](const char* text) { - if (!m_showHints || !text || text[0] == '\0') + if (!uiState.showHints || !text || text[0] == '\0') return; if (!ImGui::IsItemHovered()) return; @@ -65,7 +65,7 @@ void IESViewer::uiListener() for (const auto& label : m_assetLabels) assetLabelPtrs.push_back(label.c_str()); - size_t activeIx = m_activeAssetIx; + size_t activeIx = uiState.activeAssetIx; if (activeIx >= m_assets.size()) activeIx = 0u; int activeIxUi = static_cast(activeIx); @@ -75,7 +75,7 @@ void IESViewer::uiListener() ImVec2 plotRectMax(0.f, 0.f); bool plotRectValid = false; bool plotHovered = false; - m_plot2DRectValid = false; + uiState.plot2DRectValid = false; auto& ies = m_assets[activeIx]; auto* profile = ies.getProfile(); @@ -86,7 +86,6 @@ void IESViewer::uiListener() const float upperBound = accessor.hAngles.back(); const bool singleAngle = (upperBound == lowerBound); - constexpr float kMinFlatten = 0.0f; constexpr size_t kSmallBufSize = 32; auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); @@ -95,85 +94,79 @@ void IESViewer::uiListener() if (m_plot3DWidth == 0u || m_plot3DHeight == 0u) return; const float aspect = float(m_plot3DWidth) / float(m_plot3DHeight); - const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(m_cameraFovDeg), aspect, 0.1f, 10000.0f); - using core_mat_t = std::remove_cv_t>; - core_mat_t coreProjection; - std::memcpy(coreProjection.pointer(), &projectionMatrix, sizeof(projectionMatrix)); - camera.setProjectionMatrix(coreProjection); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); }; auto draw3DControls = [&]() { - bool interpolateCandela = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + bool interpolateCandela = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) { if (interpolateCandela) - mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; else - mode.sphere &= static_cast( + uiState.mode.sphere &= static_cast( ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE ); } showHint("Interpolate candela values in the octahedral map."); - bool falseColor = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); + bool falseColor = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); if (ImGui::Checkbox("false color", &falseColor)) { if (falseColor) - mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; else - mode.sphere &= static_cast( + uiState.mode.sphere &= static_cast( ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR ); } showHint("Use false color palette for the 3D plot."); - bool showOctaMap = m_showOctaMapPreview; + bool showOctaMap = uiState.showOctaMapPreview; if (ImGui::Checkbox("octahedral map", &showOctaMap)) - m_showOctaMapPreview = showOctaMap; + uiState.showOctaMapPreview = showOctaMap; showHint("Show octahedral map preview under the 2D plot."); - bool showHints = m_showHints; + bool showHints = uiState.showHints; if (ImGui::Checkbox("show hints", &showHints)) - m_showHints = showHints; + uiState.showHints = showHints; showHint("Toggle help tooltips."); - bool cubePlot = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); + bool cubePlot = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); if (ImGui::Checkbox("cube plot", &cubePlot)) { if (cubePlot) - mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE; + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE; else - mode.sphere &= static_cast( + uiState.mode.sphere &= static_cast( ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE ); } showHint("Render the plot on a cube instead of a sphere."); - bool wireframe = m_wireframeEnabled; + bool wireframe = uiState.wireframeEnabled; if (ImGui::Checkbox("wireframe", &wireframe)) - m_wireframeEnabled = wireframe; + uiState.wireframeEnabled = wireframe; showHint("Show wireframe topology in the 3D plot."); - bool cameraControl = m_cameraControlEnabled; + bool cameraControl = uiState.cameraControlEnabled; if (ImGui::Checkbox("camera control (space)", &cameraControl)) - m_cameraControlEnabled = cameraControl; + uiState.cameraControlEnabled = cameraControl; showHint("Enable camera movement with mouse and keyboard."); - float flatten = ImClamp(ies.flatten, kMinFlatten, 1.0f); bool speedChanged = false; bool fovChanged = false; - bool flattenChanged = false; if (ImGui::BeginTable("##camera_controls", 2, ImGuiTableFlags_SizingStretchProp)) { float labelWidth = 0.0f; labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("move speed").x); labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("rotate speed").x); labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("fov").x); - labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("flatten").x); labelWidth += ImGui::GetStyle().CellPadding.x * 2.0f; labelWidth = ImMin(labelWidth, ImGui::GetContentRegionAvail().x * 0.6f); ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, labelWidth); @@ -194,63 +187,22 @@ void IESViewer::uiListener() return changed; }; - speedChanged |= sliderRow("move speed", &m_cameraMoveSpeed, 0.1f, 10.0f, "%.2f", "Camera movement speed."); - speedChanged |= sliderRow("rotate speed", &m_cameraRotateSpeed, 0.1f, 5.0f, "%.2f", "Camera rotation speed."); - fovChanged |= sliderRow("fov", &m_cameraFovDeg, 30.0f, 120.0f, "%.0f", "Camera field of view."); - - ImGui::TableNextRow(); - ImGui::TableSetColumnIndex(0); - ImGui::AlignTextToFramePadding(); - ImGui::TextUnformatted("flatten"); - showHint("Flatten the profile (0..1)."); - ImGui::TableSetColumnIndex(1); - const float inputWidth = ImMax(64.0f, ImGui::CalcTextSize("0.000").x + ImGui::GetStyle().FramePadding.x * 2.0f); - const float spacing = ImGui::GetStyle().ItemInnerSpacing.x; - float sliderWidth = ImGui::GetContentRegionAvail().x - inputWidth - spacing; - if (sliderWidth < 40.0f) - sliderWidth = ImGui::GetContentRegionAvail().x; - ImGui::SetNextItemWidth(sliderWidth); - flattenChanged |= ImGui::SliderFloat("##flatten", &flatten, kMinFlatten, 1.0f, "%.3f", ImGuiSliderFlags_AlwaysClamp); - showHint("Flatten the profile (0..1)."); - ImGui::SameLine(); - ImGui::SetNextItemWidth(inputWidth); - flattenChanged |= ImGui::InputFloat("##flatten_value", &flatten, 0.0f, 0.0f, "%.3f"); - showHint("Enter flatten value manually."); + speedChanged |= sliderRow("move speed", &uiState.cameraMoveSpeed, 0.1f, 10.0f, "%.2f", "Camera movement speed."); + speedChanged |= sliderRow("rotate speed", &uiState.cameraRotateSpeed, 0.1f, 5.0f, "%.2f", "Camera rotation speed."); + fovChanged |= sliderRow("fov", &uiState.cameraFovDeg, 30.0f, 120.0f, "%.0f", "Camera field of view."); ImGui::EndTable(); } - if (speedChanged && m_cameraControlEnabled) + if (speedChanged && uiState.cameraControlEnabled) { - camera.setMoveSpeed(m_cameraMoveSpeed); - camera.setRotateSpeed(m_cameraRotateSpeed); + camera.setMoveSpeed(uiState.cameraMoveSpeed); + camera.setRotateSpeed(uiState.cameraRotateSpeed); } if (fovChanged) updateCameraProjection(); - if (flattenChanged) - { - flatten = ImClamp(flatten, kMinFlatten, 1.0f); - ies.flatten = flatten; - if (m_activeAssetIx < m_candelaDirty.size()) - m_candelaDirty[m_activeAssetIx] = true; - auto* mapped = reinterpret_cast( - reinterpret_cast(ies.buffers.textureInfo.buffer->getBoundMemory().memory->getMappedPointer()) + - ies.buffers.textureInfo.offset); - const auto& resolution = accessor.properties.optimalIESResolution; - *mapped = CIESProfile::texture_t::createInfo(accessor, resolution, ies.flatten, true); - - auto bound = ies.buffers.textureInfo.buffer->getBoundMemory(); - if (bound.memory->haveToMakeVisible()) - { - const ILogicalDevice::MappedMemoryRange range( - bound.memory, - bound.offset + ies.buffers.textureInfo.offset, - sizeof(IESTextureInfo)); - m_device->flushMappedMemoryRanges(1, &range); - } - } }; const float panelMargin = 8.f; @@ -278,15 +230,20 @@ void IESViewer::uiListener() std::array bMax{}; std::array bAvg{}; std::array bAvgFull{}; + const auto hCount = accessor.hAnglesCount(); + const auto vCount = accessor.vAnglesCount(); std::snprintf(bAngle.data(), bAngle.size(), "%.3f deg", angle); - std::snprintf(bAngles.data(), bAngles.size(), "angles: %u x %u", accessor.hAnglesCount(), accessor.vAnglesCount()); + std::snprintf(bAngles.data(), bAngles.size(), "angles: %u x %u", hCount, vCount); std::snprintf(bRes.data(), bRes.size(), "resolution: %u x %u", resolution.x, resolution.y); std::snprintf(bMax.data(), bMax.size(), "max cd: %.3f", properties.maxCandelaValue); std::snprintf(bAvg.data(), bAvg.size(), "avg: %.3f", properties.avgEmmision); std::snprintf(bAvgFull.data(), bAvgFull.size(), "avg full: %.3f", properties.fullDomainAvgEmission); + const std::string symmetryLabel = nbl::system::to_string(properties.getSymmetry()); + const std::string typeLabel = nbl::system::to_string(properties.getType()); + const std::string versionLabel = nbl::system::to_string(properties.getVersion()); float leftWidth = 0.0f; - leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(IES::symmetryToRS(properties.getSymmetry())).x); - leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(IES::versionToRS(properties.getVersion())).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(symmetryLabel.c_str()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(versionLabel.c_str()).x); leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAngles.data()).x); leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bMax.data()).x); leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAvgFull.data()).x); @@ -339,8 +296,8 @@ void IESViewer::uiListener() rightText(right, rightHint); }; - row(IES::symmetryToRS(properties.getSymmetry()), IES::typeToRS(properties.getType()), "IES symmetry mode.", "IES photometric type."); - row(IES::versionToRS(properties.getVersion()), assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx], "IES standard/version.", "Active IES profile file."); + row(symmetryLabel.c_str(), typeLabel.c_str(), "IES symmetry mode.", "IES photometric type."); + row(versionLabel.c_str(), assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx], "IES standard/version.", "Active IES profile file."); row(bAngles.data(), bRes.data(), "Horizontal and vertical angle count.", "Octahedral map resolution."); row(bMax.data(), bAvg.data(), "Maximum candela value.", "Average candela value."); row(bAvgFull.data(), bAngle.data(), "Average candela over full domain.", "Current horizontal angle."); @@ -358,7 +315,8 @@ void IESViewer::uiListener() plotSize = ImVec2(plotSide, plotSide); ImVec2 plotPos = ImGui::GetCursorScreenPos(); { - const char* title = IES::modeToRS(mode.view); + const std::string modeLabel = nbl::system::to_string(uiState.mode.view); + const char* title = modeLabel.c_str(); const ImVec2 titleSize = ImGui::CalcTextSize(title); const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); @@ -370,9 +328,9 @@ void IESViewer::uiListener() ImGui::Image(info, plotSize, ImVec2(0.f, 0.f), ImVec2(1.f, 0.5f)); const ImVec2 itemMin = ImGui::GetItemRectMin(); const ImVec2 itemMax = ImGui::GetItemRectMax(); - m_plot2DRectMin = float32_t2(itemMin.x, itemMin.y); - m_plot2DRectMax = float32_t2(itemMax.x, itemMax.y); - m_plot2DRectValid = true; + uiState.plot2DRectMin = float32_t2(itemMin.x, itemMin.y); + uiState.plot2DRectMax = float32_t2(itemMax.x, itemMax.y); + uiState.plot2DRectValid = true; showHint("2D candlepower distribution curve."); ImDrawList* dl = ImGui::GetWindowDrawList(); @@ -437,7 +395,7 @@ void IESViewer::uiListener() } } - if (plotSize.x > 0.0f && plotSize.y > 0.0f && m_showOctaMapPreview) + if (plotSize.x > 0.0f && plotSize.y > 0.0f && uiState.showOctaMapPreview) { ImGui::Spacing(); { @@ -482,7 +440,7 @@ void IESViewer::uiListener() ImGui::End(); ies.zDegree = angle; - m_activeAssetIx = activeIx; + uiState.activeAssetIx = activeIx; // 3D plot { info.textureID += device_base_t::MaxFramesInFlight; @@ -519,7 +477,7 @@ void IESViewer::uiListener() const float barHeight = ImMax(80.0f, plotSize.y - margin * 2.0f); if (plotSize.x > barWidth + margin * 2.0f && plotSize.y > margin * 2.0f) { - const bool useFalseColorLegend = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); + const bool useFalseColorLegend = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); ImVec2 barMin(imgPos.x + plotSize.x - barWidth - margin, imgPos.y + margin); ImVec2 barMax(barMin.x + barWidth, barMin.y + barHeight); @@ -576,8 +534,7 @@ void IESViewer::uiListener() const float ndcX = u * 2.0f - 1.0f; const float ndcY = v * 2.0f - 1.0f; - float32_t4x4 viewProj; - std::memcpy(&viewProj, camera.getConcatenatedMatrix().pointer(), sizeof(viewProj)); + float32_t4x4 viewProj = camera.getConcatenatedMatrix(); const auto invViewProj = inverse(viewProj); const float32_t4 nearPoint(ndcX, ndcY, 0.0f, 1.0f); @@ -599,7 +556,7 @@ void IESViewer::uiListener() float32_t3 hitPos(0.f); bool hit = false; - const bool cubePlot = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); + const bool cubePlot = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); if (cubePlot) { float tmin = -1.0e20f; @@ -659,32 +616,30 @@ void IESViewer::uiListener() { using octahedral_t = math::OctahedralTransform; const float32_t3 dir = normalize(hitPos); - float32_t2 uv = octahedral_t::dirToNDC(dir) * 0.5f + float32_t2(0.5f, 0.5f); - const uint32_t resX = resolutionCandela.x; const uint32_t resY = resolutionCandela.y; if (resX > 0u && resY > 0u) { const float32_t2 res(static_cast(resX), static_cast(resY)); - const bool interpolateCandela = mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + const float32_t2 halfMinusHalfPixel = float32_t2(0.5f, 0.5f) - float32_t2(0.5f, 0.5f) / res; + float32_t2 uv = octahedral_t::dirToUV(dir, halfMinusHalfPixel); + const bool interpolateCandela = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); if (!interpolateCandela) { - const auto pixel = floor(uv * res + float32_t2(0.5f, 0.5f)); - uv = pixel / res; + const auto pixel = floor(uv * res); + uv = (pixel + float32_t2(0.5f, 0.5f)) / res; } - const auto info = CIESProfile::texture_t::createInfo(accessorCandela, resolutionCandela, iesCandela.flatten, true); - const float32_t2 scale = float32_t2(1.0f, 1.0f) - float32_t2(1.0f, 1.0f) / res; - const float32_t2 uvCorner = (uv - float32_t2(0.5f, 0.5f)) * scale + float32_t2(0.5f, 0.5f); - const float normalized = CIESProfile::texture_t::eval(accessorCandela, info, uvCorner); - candelaValue = info.maxValueRecip > 0.0f ? (normalized / info.maxValueRecip) : 0.0f; + const auto texture = CIESProfile::texture_t::create(accessorCandela.properties.maxCandelaValue, resolutionCandela); + const float normalized = texture.__call(accessorCandela, uv); + candelaValue = texture.info.maxValueRecip > 0.0f ? (normalized / texture.info.maxValueRecip) : 0.0f; candelaValid = true; } } } } - if (candelaValid && !m_cameraControlEnabled) + if (candelaValid && !uiState.cameraControlEnabled) { ImGui::BeginTooltip(); ImGui::Text("candela: %.3f cd", candelaValue); diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt index 76108928d..050d0a53c 100644 --- a/50.IESViewer/CMakeLists.txt +++ b/50.IESViewer/CMakeLists.txt @@ -15,6 +15,8 @@ set(LIBs nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp index a8cb39d65..d5614aa7a 100644 --- a/50.IESViewer/CSimpleIESRenderer.hpp +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -36,7 +36,7 @@ class CSimpleIESRenderer final : public core::IReferenceCounted inline auto computeForInstance(hlsl::float32_t3x4 world) const { using namespace nbl::hlsl; - hlsl::this_example::ies::SInstanceMatrices retval = { + hlsl::this_example::SInstanceMatrices retval = { .worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj),float64_t3x4(world))) }; const auto sub3x3 = mul(float64_t3x3(viewProj),float64_t3x3(world)); diff --git a/50.IESViewer/IES.cpp b/50.IESViewer/IES.cpp index 85d87983e..3c1df172c 100644 --- a/50.IESViewer/IES.cpp +++ b/50.IESViewer/IES.cpp @@ -25,64 +25,3 @@ video::IGPUImage* IES::getActiveImage(E_MODE mode) const return nullptr; } } - -const char* IES::modeToRS(E_MODE mode) -{ - switch (mode) - { - case IES::EM_CDC: - return "Candlepower Distribution Curve"; - case IES::EM_OCTAHEDRAL_MAP: - return "Candela Octahedral Map"; - default: - return "ERROR (mode)"; - } -} - -const char* IES::symmetryToRS(CIESProfile::properties_t::LuminairePlanesSymmetry symmetry) -{ - switch (symmetry) - { - case asset::CIESProfile::properties_t::ISOTROPIC: - return "ISOTROPIC"; - case asset::CIESProfile::properties_t::QUAD_SYMETRIC: - return "QUAD_SYMETRIC"; - case asset::CIESProfile::properties_t::HALF_SYMETRIC: - return "HALF_SYMETRIC"; - case asset::CIESProfile::properties_t::OTHER_HALF_SYMMETRIC: - return "OTHER_HALF_SYMMETRIC"; - case asset::CIESProfile::properties_t::NO_LATERAL_SYMMET: - return "NO_LATERAL_SYMMET"; - default: - return "ERROR (symmetry)"; - } -} - -const char* IES::typeToRS(CIESProfile::properties_t::PhotometricType type) -{ - switch (type) - { - case asset::CIESProfile::properties_t::TYPE_C: - return "TYPE_C"; - case asset::CIESProfile::properties_t::TYPE_B: - return "TYPE_B"; - case asset::CIESProfile::properties_t::TYPE_A: - return "TYPE_A"; - case asset::CIESProfile::properties_t::TYPE_NONE: - default: - return "TYPE_NONE"; - } -} - -const char* IES::versionToRS(CIESProfile::properties_t::Version version) -{ - switch (version) - { - case asset::CIESProfile::properties_t::V_1995: - return "V_1995"; - case asset::CIESProfile::properties_t::V_2002: - return "V_2002"; - default: - return "V_UNKNOWN"; - } -} diff --git a/50.IESViewer/IES.hpp b/50.IESViewer/IES.hpp index 330d9368d..da9f98b3f 100644 --- a/50.IESViewer/IES.hpp +++ b/50.IESViewer/IES.hpp @@ -6,6 +6,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/examples/examples.hpp" +#include "nbl/system/to_string.h" NBL_EXPOSE_NAMESPACES @@ -34,16 +35,10 @@ struct IES std::string key; float zDegree = 0.f; - float flatten = 0.0f; const asset::CIESProfile* getProfile() const; video::IGPUImage* getActiveImage(E_MODE mode) const; - static const char* modeToRS(E_MODE mode); - static const char* symmetryToRS(CIESProfile::properties_t::LuminairePlanesSymmetry symmetry); - static const char* typeToRS(CIESProfile::properties_t::PhotometricType type); - static const char* versionToRS(CIESProfile::properties_t::Version version); - template requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) @@ -115,4 +110,84 @@ struct IES } }; +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + static std::string __call(const IES::E_MODE mode) + { + switch (mode) + { + case IES::EM_CDC: + return "Candlepower Distribution Curve"; + case IES::EM_OCTAHEDRAL_MAP: + return "Candela Octahedral Map"; + default: + return "ERROR (mode)"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::LuminairePlanesSymmetry symmetry) + { + switch (symmetry) + { + case nbl::asset::CIESProfile::properties_t::ISOTROPIC: + return "ISOTROPIC"; + case nbl::asset::CIESProfile::properties_t::QUAD_SYMETRIC: + return "QUAD_SYMETRIC"; + case nbl::asset::CIESProfile::properties_t::HALF_SYMETRIC: + return "HALF_SYMETRIC"; + case nbl::asset::CIESProfile::properties_t::OTHER_HALF_SYMMETRIC: + return "OTHER_HALF_SYMMETRIC"; + case nbl::asset::CIESProfile::properties_t::NO_LATERAL_SYMMET: + return "NO_LATERAL_SYMMET"; + default: + return "ERROR (symmetry)"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::PhotometricType type) + { + switch (type) + { + case nbl::asset::CIESProfile::properties_t::TYPE_C: + return "TYPE_C"; + case nbl::asset::CIESProfile::properties_t::TYPE_B: + return "TYPE_B"; + case nbl::asset::CIESProfile::properties_t::TYPE_A: + return "TYPE_A"; + case nbl::asset::CIESProfile::properties_t::TYPE_NONE: + default: + return "TYPE_NONE"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::Version version) + { + switch (version) + { + case nbl::asset::CIESProfile::properties_t::V_1995: + return "V_1995"; + case nbl::asset::CIESProfile::properties_t::V_2002: + return "V_2002"; + default: + return "V_UNKNOWN"; + } + } +}; +} + #endif // _THIS_EXAMPLE_IES_HPP_ diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl index bc755befb..54a95b9d0 100644 --- a/50.IESViewer/app_resources/common.hlsl +++ b/50.IESViewer/app_resources/common.hlsl @@ -4,19 +4,17 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/ies/profile.hlsl" -#define QUANT_ERROR_ADMISSIBLE 1/1024 -#define WORKGROUP_SIZE 256u -#define WORKGROUP_DIMENSION 16u -#define MAX_IES_IMAGES 6969 - namespace nbl { namespace hlsl { namespace this_example { -namespace ies -{ + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float QuantErrorAdmissible = 1.0f / 1024.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = 256u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupDimension = 16u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIesImages = 6969u; struct SInstanceMatrices { @@ -24,6 +22,9 @@ struct SInstanceMatrices float32_t3x3 normal; }; +namespace ies +{ + struct CdcPC { uint64_t hAnglesBDA; @@ -51,7 +52,7 @@ enum E_SPHERE_MODE : uint16_t struct SpherePC { NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; - SInstanceMatrices matrices; + this_example::SInstanceMatrices matrices; uint32_t positionView : 16; uint32_t normalView : 16; float32_t radius; diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index 7aafbc1f8..a0235cfe6 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -1,16 +1,16 @@ #include "common.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/ies/texture.hlsl" #include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" #include "false_color.hlsl" using namespace nbl::hlsl; +using namespace nbl::hlsl::this_example; using namespace nbl::hlsl::this_example::ies; using namespace nbl::hlsl::ext::FullScreenTriangle; -[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MAX_IES_IMAGES]; -[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MAX_IES_IMAGES]; +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MaxIesImages]; +[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MaxIesImages]; [[vk::binding(0 + 100, 0)]] SamplerState generalSampler; [[vk::binding(0, 1)]] Buffer utbs[SpherePC::DescriptorCount]; @@ -18,34 +18,29 @@ using namespace nbl::hlsl::ext::FullScreenTriangle; struct Accessor { - using key_t = uint32_t; - using key_t2 = vector; - using value_t = float32_t; - - static key_t vAnglesCount() { return pc.cdc.vAnglesCount; } - static key_t hAnglesCount() { return pc.cdc.hAnglesCount; } - - template) - static inline value_t vAngle(T j) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.vAnglesBDA) + j).deref().load(); } + using angle_t = float32_t; + using candela_t = float32_t; - template) - static inline value_t hAngle(T i) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.hAnglesBDA) + i).deref().load(); } + candela_t value(const uint32_t2 ij) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.dataBDA) + pc.cdc.vAnglesCount * ij.x + ij.y).deref().load(); } + angle_t vAngle(const uint32_t idx) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.vAnglesBDA) + idx).deref().load(); } + angle_t hAngle(const uint32_t idx) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.hAnglesBDA) + idx).deref().load(); } + uint32_t vAnglesCount() { return pc.cdc.vAnglesCount; } + uint32_t hAnglesCount() { return pc.cdc.hAnglesCount; } - template) - static inline value_t value(T ij) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.dataBDA) + vAnglesCount() * ij.x + ij.y).deref().load(); } - - static inline nbl::hlsl::ies::ProfileProperties getProperties() { return pc.cdc.properties; } + nbl::hlsl::ies::ProfileProperties getProperties() { return pc.cdc.properties; } }; +#include "nbl/builtin/hlsl/ies/texture.hlsl" + struct SInterpolants { float32_t4 ndc : SV_Position; float32_t3 latDir : COLOR1; - float32_t2 uv : TEXCOORD0; + float32_t2 uv : TEXCOORD0; }; using octahedral_t = math::OctahedralTransform; -using texture_t = nbl::hlsl::ies::Texture; +using texture_t = nbl::hlsl::ies::SProceduralTexture; [shader("vertex")] SInterpolants SphereVS(uint32_t vIx : SV_VertexID) @@ -53,10 +48,11 @@ SInterpolants SphereVS(uint32_t vIx : SV_VertexID) uint32_t2 res; inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); - const float32_t2 inv = float32_t2(1.f, 1.f) / float32_t2(res - 1u); - const float32_t2 uv = float32_t2(vIx % res.x, vIx / res.x) * inv; + const float32_t2 resF = float32_t2(res); + const float32_t2 uv = (float32_t2(vIx % res.x, vIx / res.x) + float32_t2(0.5f, 0.5f)) / resF; + const float32_t2 halfMinusHalfPixel = float32_t2(0.5f, 0.5f) - float32_t2(0.5f, 0.5f) / resF; - const float32_t3 dir = octahedral_t::uvToDir(uv); + const float32_t3 dir = octahedral_t::uvToDir(uv, halfMinusHalfPixel); float32_t3 pos = dir; const bool useCube = (pc.sphere.mode & ESM_CUBE) != 0; if (useCube) @@ -85,44 +81,43 @@ float32_t4 SpherePS(SInterpolants input) : SV_Target0 const bool dontInterpolateUV = (pc.sphere.mode & ESM_OCTAHEDRAL_UV_INTERPOLATE) == 0; if (dontInterpolateUV) { - float32_t2 pixel = floor(uv * float32_t2(res) + 0.5f); - uv = pixel / float32_t2(res); + float32_t2 pixel = floor(uv * float32_t2(res)); + uv = (pixel + float32_t2(0.5f, 0.5f)) / float32_t2(res); } - float32_t2 scale = 1.0f - 1.0f / float32_t2(res); - float32_t2 uvCorner = (uv - 0.5f) * scale + 0.5f; - - float32_t I = inIESCandelaImage[pc.sphere.texIx].SampleLevel(generalSampler, uvCorner, 0.0f).r; + float32_t I = inIESCandelaImage[pc.sphere.texIx].SampleLevel(generalSampler, uv, 0.0f).r; const bool useFalseColor = (pc.sphere.mode & ESM_FALSE_COLOR) != 0; float32_t3 col = useFalseColor ? falseColor(I) : float32_t3(I, I, I); return float32_t4(col, 1.0f); } -[numthreads(WORKGROUP_DIMENSION, WORKGROUP_DIMENSION, 1)] +[numthreads(WorkgroupDimension, WorkgroupDimension, 1)] [shader("compute")] void CdcCS(uint32_t3 ID : SV_DispatchThreadID) { - uint32_t2 destinationSize; - outIESCandelaImage[pc.cdc.texIx].GetDimensions(destinationSize.x, destinationSize.y); - const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); - if (all(pixelCoordinates < destinationSize)) - { - Accessor accessor; texture_t txt; - typename texture_t::SInfo info = (nbl::hlsl::bda::__ptr::create(pc.cdc.txtInfoBDA) + pc.cdc.texIx).deref_restrict().load(); - outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = txt.eval(accessor, info, pixelCoordinates); - } + uint32_t2 destinationSize; + outIESCandelaImage[pc.cdc.texIx].GetDimensions(destinationSize.x, destinationSize.y); + const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); + if (all(pixelCoordinates < destinationSize)) + { + Accessor accessor; + texture_t txt; + nbl::hlsl::ies::IESTextureInfo info = (nbl::hlsl::bda::__ptr::create(pc.cdc.txtInfoBDA) + pc.cdc.texIx).deref().load(); + txt.info = info; + outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = txt.__call(accessor, pixelCoordinates); + } } float32_t plot(float32_t cand, float32_t pct, float32_t bold) { - return smoothstep(pct-0.005*bold, pct, cand) - smoothstep(pct, pct+0.005*bold, cand); + return smoothstep(pct - 0.005f * bold, pct, cand) - smoothstep(pct, pct + 0.005f * bold, cand); } // vertical cut of IES (i.e. cut by plane x = 0) -float32_t f(float32_t2 uv) +float32_t f(float32_t2 uv) { - float32_t3 dir = normalize(float32_t3(uv.x, 0.001, uv.y)); + float32_t3 dir = normalize(float32_t3(uv.x, 0.001f, uv.y)); if (pc.cdc.zAngleDegreeRotation != 0.f) { float32_t rad = radians(pc.cdc.zAngleDegreeRotation); @@ -139,9 +134,9 @@ float32_t f(float32_t2 uv) uint32_t2 res; inIESCandelaImage[pc.cdc.texIx].GetDimensions(res.x, res.y); float32_t2 halfMinusHalfPixel = 0.5f - 0.5f / float32_t2(res); - float32_t2 uvCorner = octahedral_t::toCornerSampledUV(dir, halfMinusHalfPixel); + float32_t2 uvOcta = octahedral_t::dirToUV(dir, halfMinusHalfPixel); - return inIESCandelaImage[pc.cdc.texIx].SampleLevel(generalSampler, uvCorner, 0u).x; + return inIESCandelaImage[pc.cdc.texIx].SampleLevel(generalSampler, uvOcta, 0u).x; } #include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" @@ -149,24 +144,24 @@ float32_t f(float32_t2 uv) [shader("pixel")] float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 { - switch (pc.cdc.mode) - { - case 0: - { - float32_t2 ndc = input.uv * 2.f - 1.f; - float32_t dist = length(ndc) * 1.015625f; - float32_t p = plot(dist, 1.0f, 0.75f); - float32_t3 col = float32_t3(p, p, p); - - float32_t normalizedStrength = f(ndc); - if (dist < normalizedStrength) - col += float32_t3(1.0f, 0.0f, 0.0f); - - return float32_t4(col, 1.0f); - } - case 1: - return float32_t4(inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); - default: - return float32_t4(0.f, 0.f, 0.f, 0.f); - } + switch (pc.cdc.mode) + { + case 0: + { + float32_t2 ndc = input.uv * 2.f - 1.f; + float32_t dist = length(ndc) * 1.015625f; + float32_t p = plot(dist, 1.0f, 0.75f); + float32_t3 col = float32_t3(p, p, p); + + float32_t normalizedStrength = f(ndc); + if (dist < normalizedStrength) + col += float32_t3(1.0f, 0.0f, 0.0f); + + return float32_t4(col, 1.0f); + } + case 1: + return float32_t4(inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); + default: + return float32_t4(0.f, 0.f, 0.f, 0.f); + } } diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp index 0659c2fae..ca44888ef 100644 --- a/50.IESViewer/main.cpp +++ b/50.IESViewer/main.cpp @@ -4,15 +4,9 @@ #include "App.hpp" -// TODO -#define APP_WINDOW_WIDTH 669*2u -#define APP_WINDOW_HEIGHT APP_WINDOW_WIDTH - -#define APP_DEPTH_BUFFER_FORMAT EF_UNKNOWN - IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ APP_WINDOW_WIDTH, APP_WINDOW_HEIGHT }, APP_DEPTH_BUFFER_FORMAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + device_base_t({ AppWindowWidth, AppWindowHeight }, AppDepthBufferFormat, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { } diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 9262fc242..6ca4517aa 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -208,7 +208,8 @@ class Camera if(keysDown[k]) { auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - lastVirtualUpTimeStamp).count(); - assert(timeDiff >= 0); + if (timeDiff < 0) + timeDiff = 0; perActionDt[k] += timeDiff; } @@ -217,8 +218,9 @@ class Camera const auto ev = *eventIt; // accumulate the periods for which a key was down - const auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); - assert(timeDiff >= 0); + auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); + if (timeDiff < 0) + timeDiff = 0; // handle camera movement for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) @@ -332,4 +334,4 @@ class Camera std::chrono::microseconds nextPresentationTimeStamp, lastVirtualUpTimeStamp; }; -#endif \ No newline at end of file +#endif diff --git a/common/include/nbl/examples/examples.hpp b/common/include/nbl/examples/examples.hpp index 134fe9b33..d40950501 100644 --- a/common/include/nbl/examples/examples.hpp +++ b/common/include/nbl/examples/examples.hpp @@ -22,13 +22,13 @@ #define NBL_EXPOSE_NAMESPACES \ using namespace nbl; \ -using namespace core; \ -using namespace hlsl; \ -using namespace system; \ -using namespace asset; \ -using namespace ui; \ -using namespace video; \ -using namespace scene; \ +using namespace nbl::core; \ +using namespace nbl::hlsl; \ +using namespace nbl::system; \ +using namespace nbl::asset; \ +using namespace nbl::ui; \ +using namespace nbl::video; \ +using namespace nbl::scene; \ using namespace nbl::examples; -#endif // _NBL_EXAMPLES_HPP_ \ No newline at end of file +#endif // _NBL_EXAMPLES_HPP_ diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp index fe846f76e..1bcbd1fd1 100644 --- a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -3,6 +3,7 @@ #include +#include #include "nbl/asset/utils/CGeometryCreator.h" namespace nbl::examples @@ -17,8 +18,11 @@ class CGeometryCreatorScene : public core::IReferenceCounted using namespace nbl::video public: - using f_add_geometry_t = std::function&&)>; - using f_geometry_override_t = std::function; + struct SGeometryEntry + { + std::string name; + core::smart_refctd_ptr geometry; + }; struct SCreateParams { @@ -26,9 +30,46 @@ class CGeometryCreatorScene : public core::IReferenceCounted video::IUtilities* utilities; system::ILogger* logger; std::span addtionalBufferOwnershipFamilies = {}; - f_geometry_override_t geometryOverride = nullptr; }; - static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) + + // Creates and initializes a scene. Override addGeometries() to supply custom meshes. + template + static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch, Args&&... args) + { + static_assert(std::is_base_of_v); + auto scene = core::smart_refctd_ptr(new SceneT(std::forward(args)...), core::dont_grab); + if (!scene->initialize(std::move(params), geometryPatch)) + return nullptr; + return scene; + } + + // + struct SInitParams + { + core::vector> geometries; + core::vector geometryNames; + }; + const SInitParams& getInitParams() const {return m_init;} + + protected: + inline CGeometryCreatorScene() = default; + + // Override to supply custom geometries, names are used as UI labels + virtual core::vector addGeometries(asset::CGeometryCreator* creator) const + { + core::vector entries; + entries.push_back({ "Cube", creator->createCube({ 1.f,1.f,1.f }) }); + entries.push_back({ "Rectangle", creator->createRectangle({ 1.5f,3.f }) }); + entries.push_back({ "Disk", creator->createDisk(2.f, 30) }); + entries.push_back({ "Sphere", creator->createSphere(2, 16, 16) }); + entries.push_back({ "Cylinder", creator->createCylinder(2, 2, 20) }); + entries.push_back({ "Cone", creator->createCone(2, 3, 10) }); + entries.push_back({ "Icosphere", creator->createIcoSphere(1, 4, true) }); + entries.push_back({ "Grid", creator->createGrid({ 32u, 32u }) }); + return entries; + } + + inline bool initialize(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) { EXPOSE_NABLA_NAMESPACES; auto* logger = params.logger; @@ -36,52 +77,35 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (!params.transferQueue) { logger->log("Pass a non-null `IQueue* transferQueue`!",ILogger::ELL_ERROR); - return nullptr; + return false; } if (!params.utilities) { logger->log("Pass a non-null `IUtilities* utilities`!",ILogger::ELL_ERROR); - return nullptr; + return false; } SInitParams init = {}; core::vector> geometries; // create out geometries { - f_add_geometry_t addGeometry = [&init,&geometries](const auto name, auto&& geom)->void - { - init.geometryNames.emplace_back(name); - geometries.push_back(std::move(geom)); - }; - auto creator = core::make_smart_refctd_ptr(); - /* TODO: others - ReferenceObjectCpu {.meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }, - ReferenceObjectCpu {.meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }, - ReferenceObjectCpu {.meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }, - ReferenceObjectCpu {.meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }, - ReferenceObjectCpu {.meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }, - ReferenceObjectCpu {.meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }, - ReferenceObjectCpu {.meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }, - ReferenceObjectCpu {.meta = {.type = OT_ICOSPHERE, .name = "Icoshpere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) } - */ - - if (params.geometryOverride) - params.geometryOverride(creator.get(), addGeometry); - else + auto entries = addGeometries(creator.get()); + if (entries.empty()) + return false; + + init.geometryNames.reserve(entries.size()); + geometries.reserve(entries.size()); + for (auto& entry : entries) { - addGeometry("Cube", creator->createCube({ 1.f,1.f,1.f })); - addGeometry("Rectangle", creator->createRectangle({ 1.5f,3.f })); - addGeometry("Disk", creator->createDisk(2.f, 30)); - addGeometry("Sphere", creator->createSphere(2, 16, 16)); - addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); - addGeometry("Cone", creator->createCone(2, 3, 10)); - addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); - addGeometry("Grid", creator->createGrid({ 32u, 32u })); + if (!entry.geometry) + continue; + init.geometryNames.emplace_back(entry.name); + geometries.push_back(std::move(entry.geometry)); } if (geometries.empty()) - return nullptr; + return false; } init.geometries.reserve(init.geometryNames.size()); @@ -120,7 +144,7 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (!reservation) { logger->log("Failed to reserve GPU objects for CPU->GPU conversion!",ILogger::ELL_ERROR); - return nullptr; + return false; } // convert @@ -157,7 +181,7 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (future.copy()!=IQueue::RESULT::SUCCESS) { logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return nullptr; + return false; } } @@ -180,23 +204,13 @@ class CGeometryCreatorScene : public core::IReferenceCounted } } - return smart_refctd_ptr(new CGeometryCreatorScene(std::move(init)),dont_grab); + m_init = std::move(init); + return true; } - // - struct SInitParams - { - core::vector> geometries; - core::vector geometryNames; - }; - const SInitParams& getInitParams() const {return m_init;} - - protected: - inline CGeometryCreatorScene(SInitParams&& _init) : m_init(std::move(_init)) {} - SInitParams m_init; #undef EXPOSE_NABLA_NAMESPACES }; } -#endif \ No newline at end of file +#endif diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp index e87dc5c1d..6e5c24614 100644 --- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp +++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp @@ -168,8 +168,8 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted params[pipeline_e::BasicTriangleList].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; params[pipeline_e::BasicTriangleFan].vertexShader = {.shader=shader.get(),.entryPoint="BasicVS"}; params[pipeline_e::BasicTriangleFan].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; - params[pipeline_e::BasicTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "BasicVS" }; - params[pipeline_e::BasicTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "BasicFSSnake" }; + params[pipeline_e::GridSnakeStrip].vertexShader = { .shader = shader.get(),.entryPoint = "BasicVS" }; + params[pipeline_e::GridSnakeStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "BasicFSSnake" }; params[pipeline_e::Cone].vertexShader = {.shader=shader.get(),.entryPoint="ConeVS"}; params[pipeline_e::Cone].fragmentShader = {.shader=shader.get(),.entryPoint="ConeFS"}; for (auto i=0; i(0.5f),1.f); } +// Debug fragment shader for grid triangle-strips ("snake" order). It alternates +// triangle shading to visualize strip winding and connectivity. [shader("pixel")] float32_t4 BasicFSSnake(SInterpolants input, uint primID : SV_PrimitiveID) : SV_Target0 { @@ -77,4 +79,4 @@ float32_t4 ConeFS(SInterpolants input) : SV_Target0 { const float32_t3 normal = reconstructGeometricNormal(input.meta); return float32_t4(normalize(normal)*0.5f+promote(0.5f),1.f); -} \ No newline at end of file +} From b784970abd76f1feabb76902ec922e9edf37ef0c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 1 Feb 2026 20:39:50 +0100 Subject: [PATCH 214/219] cleanup --- 09_GeometryCreator/main.cpp | 4 ++-- common/include/nbl/examples/cameras/CCamera.hpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index 06521a6d2..d62a10bca 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -2,7 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include +#include #include #include "common.hpp" @@ -267,4 +267,4 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes } }; -NBL_MAIN_FUNC(GeometryCreatorApp) \ No newline at end of file +NBL_MAIN_FUNC(GeometryCreatorApp) diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 9262fc242..221e8d42d 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -14,6 +14,7 @@ #include #include +#include class Camera { @@ -332,4 +333,4 @@ class Camera std::chrono::microseconds nextPresentationTimeStamp, lastVirtualUpTimeStamp; }; -#endif \ No newline at end of file +#endif From bf34d4e03023903dc10fbe03ef5670c83eab3df9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 12:24:54 +0100 Subject: [PATCH 215/219] pull master, resolve conflicts (+ adjust) --- 50.IESViewer/AppInit.cpp | 2 +- 50.IESViewer/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 059dc6da3..6f8a8db27 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -257,7 +257,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = { - {.shader = shaders.ies.get(), .entryPoint = "main", .entries = &specConstants }, + {.shader = shaders.ies.get(), .entryPoint = "__nbl__hlsl__ext__FullScreenTriangle__vertex_main", .entries = &specConstants }, {.shader = shaders.ies.get(), .entryPoint = "CdcPS" } }; diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt index 050d0a53c..2bf83045f 100644 --- a/50.IESViewer/CMakeLists.txt +++ b/50.IESViewer/CMakeLists.txt @@ -11,6 +11,7 @@ set(LIBs imtestengine imguizmo "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle ) nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") From e7c20f7715895288c7dca16f3f271f207ac11ca4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 12:32:48 +0100 Subject: [PATCH 216/219] use precompiled fri ext --- 50.IESViewer/AppInit.cpp | 7 +++++-- 50.IESViewer/app_resources/ies.unified.hlsl | 2 -- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index 6f8a8db27..a2d514b7d 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -169,12 +169,15 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) struct { - smart_refctd_ptr ies, imgui; + smart_refctd_ptr ies, imgui, fullScreenTriangleVS; } shaders; { auto start = std::chrono::high_resolution_clock::now(); CREATE_SHADER(shaders.ies, "ies.unified") CREATE_SHADER(shaders.imgui, "imgui.unified") + shaders.fullScreenTriangleVS = ext::FullScreenTriangle::ProtoPipeline::createDefaultVertexShader(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!shaders.fullScreenTriangleVS) + return logFail("Failed to create FullScreenTriangle vertex shader!"); auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); auto took = std::to_string(elapsed.count()); m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); @@ -257,7 +260,7 @@ bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = { - {.shader = shaders.ies.get(), .entryPoint = "__nbl__hlsl__ext__FullScreenTriangle__vertex_main", .entries = &specConstants }, + {.shader = shaders.fullScreenTriangleVS.get(), .entryPoint = "__nbl__hlsl__ext__FullScreenTriangle__vertex_main", .entries = &specConstants }, {.shader = shaders.ies.get(), .entryPoint = "CdcPS" } }; diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl index a0235cfe6..fb89b2ed5 100644 --- a/50.IESViewer/app_resources/ies.unified.hlsl +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -139,8 +139,6 @@ float32_t f(float32_t2 uv) return inIESCandelaImage[pc.cdc.texIx].SampleLevel(generalSampler, uvOcta, 0u).x; } -#include "nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl" - [shader("pixel")] float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 { From f90ce30d643ba03c6e0585f2db015009e27aa912 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 18:55:38 +0100 Subject: [PATCH 217/219] adjust to comments --- 09_GeometryCreator/main.cpp | 1 - 71_RayTracingPipeline/main.cpp | 5 +++-- common/include/nbl/examples/cameras/CCamera.hpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index d62a10bca..6e34a9064 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -2,7 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include #include #include "common.hpp" diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 43a7f5fd0..f6b64c5ca 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -1057,7 +1057,8 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui return transform; }; - hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::math::linalg::rotation_mat(core::radians(-90.0f), { 1,0,0 })); + const auto planeRotation = hlsl::math::quaternion::create(hlsl::float32_t3(1.f, 0.f, 0.f), core::radians(-90.0f)); + hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::_static_cast(planeRotation)); // triangles geometries auto geometryCreator = make_smart_refctd_ptr(); @@ -1503,4 +1504,4 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui bool m_useIndirectCommand = false; }; -NBL_MAIN_FUNC(RaytracingPipelineApp) \ No newline at end of file +NBL_MAIN_FUNC(RaytracingPipelineApp) diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 221e8d42d..2a4e1f5fd 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -14,7 +14,7 @@ #include #include -#include +#include class Camera { From fcb43e613a9f9ec8ea6c4ad2835410cb5d907237 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 5 Feb 2026 17:04:46 +0100 Subject: [PATCH 218/219] fix interaction between two merges --- 50.IESViewer/AppInit.cpp | 2 +- 50.IESViewer/AppRender.cpp | 2 +- 50.IESViewer/AppUI.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp index a2d514b7d..79338e8ec 100644 --- a/50.IESViewer/AppInit.cpp +++ b/50.IESViewer/AppInit.cpp @@ -12,7 +12,7 @@ #include "app_resources/common.hlsl" #include "app_resources/imgui.opts.hlsl" #include "nbl/ext/ImGui/ImGui.h" -#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" #include "nbl/this_example/builtin/build/spirv/keys.hpp" bool IESViewer::parseCommandLine() diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp index 17937cfa8..a06b2702a 100644 --- a/50.IESViewer/AppRender.cpp +++ b/50.IESViewer/AppRender.cpp @@ -8,7 +8,7 @@ #include "nbl/ext/ImGui/ImGui.h" #include "nbl/ext/ScreenShot/ScreenShot.h" #include "app_resources/common.hlsl" -#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) { diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp index 75aa31887..c376f7730 100644 --- a/50.IESViewer/AppUI.cpp +++ b/50.IESViewer/AppUI.cpp @@ -13,7 +13,7 @@ #include "app_resources/common.hlsl" #include "app_resources/false_color.hlsl" #include "app_resources/imgui.opts.hlsl" -#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/math/octahedral.hlsl" From 2b034eb4a796e043d882e9e6335070466e7a871f Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 18 Feb 2026 02:41:47 +0300 Subject: [PATCH 219/219] huge shader refactor, more debug UI, also: - Added bilinear and biquadratic samplers - Added a modified version of Urena 2003 with better pre-computation - Fixes after merge from master - Shaders precompiled with permutations for runtime changing of sampling modes without register overhead etc. - removed a lot of code duplications --- .../app_resources/hlsl/Drawing.hlsl | 125 ++- .../app_resources/hlsl/Sampling.hlsl | 355 ------- .../hlsl/SolidAngleVis.frag.hlsl | 199 ---- .../hlsl/benchmark/benchmark.comp.hlsl | 113 ++- .../app_resources/hlsl/common.hlsl | 84 +- .../app_resources/hlsl/gpu_common.hlsl | 26 +- .../hlsl/parallelogram_sampling.hlsl | 727 ++++++-------- .../app_resources/hlsl/pyramid_sampling.hlsl | 568 +++++++++++ .../hlsl/pyramid_sampling/bilinear.hlsl | 86 ++ .../hlsl/pyramid_sampling/biquadratic.hlsl | 158 +++ .../hlsl/pyramid_sampling/urena.hlsl | 87 ++ .../{RayVis.frag.hlsl => ray_vis.frag.hlsl} | 141 ++- .../app_resources/hlsl/silhouette.hlsl | 355 ++++--- .../hlsl/solid_angle_vis.frag.hlsl | 305 ++++++ .../app_resources/hlsl/triangle_sampling.hlsl | 241 +++++ .../app_resources/hlsl/utils.hlsl | 33 +- 73_SolidAngleVisualizer/include/common.hpp | 1 - 73_SolidAngleVisualizer/main.cpp | 925 +++++++++--------- .../include/nbl/examples/cameras/CCamera.hpp | 139 +-- 19 files changed, 2863 insertions(+), 1805 deletions(-) delete mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl delete mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl rename 73_SolidAngleVisualizer/app_resources/hlsl/{RayVis.frag.hlsl => ray_vis.frag.hlsl} (68%) create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl create mode 100644 73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index fa2a93b45..4338bd958 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -1,5 +1,8 @@ -#ifndef _DEBUG_HLSL_ -#define _DEBUG_HLSL_ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ #include "common.hlsl" #include "gpu_common.hlsl" @@ -210,6 +213,7 @@ float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWid return color; } +#ifdef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ float32_t4 drawClippedSilhouetteVertices(float32_t2 ndc, ClippedSilhouette silhouette, float32_t aaWidth) { float32_t4 color = 0; @@ -235,6 +239,7 @@ float32_t4 drawClippedSilhouetteVertices(float32_t2 ndc, ClippedSilhouette silho } return color; } +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ float32_t4 drawRing(float32_t2 ndc, float32_t aaWidth) { @@ -378,6 +383,120 @@ float32_t4 drawFaces(float32_t3x4 modelMatrix, float32_t3 spherePos, float32_t a return color; } +// ============================================================================ +// Spherical geometry drawing helpers (for pyramid visualization) +// ============================================================================ + +// Draw a great circle where dot(p, axis) = 0 +// Used to visualize caliper planes +float32_t4 drawGreatCirclePlane( + float32_t3 axis, + float32_t3 spherePos, + float32_t aaWidth, + float32_t3 color, + float32_t width = 0.005f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // Distance from the great circle plane + float32_t distFromPlane = abs(dot(fragDir, axis)); + + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, distFromPlane); + + return float32_t4(color * alpha, alpha); +} + +// Draw lune boundaries - two small circles at dot(p, axis) = offset ± halfWidth +// halfWidth and offset are in sin-space (not radians) +float32_t4 drawLuneBoundary(float32_t3 axis, float32_t halfWidth, float32_t offset, float32_t3 spherePos, float32_t aaWidth, float32_t3 color, float32_t lineWidth = 0.004f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // The lune boundaries are where dot(p, axis) = offset ± halfWidth + float32_t dotWithAxis = dot(fragDir, axis); + + // Draw both boundaries of the lune (accounting for offset) + float32_t upperBound = offset + halfWidth; + float32_t lowerBound = offset - halfWidth; + float32_t distFromUpperBoundary = abs(dotWithAxis - upperBound); + float32_t distFromLowerBoundary = abs(dotWithAxis - lowerBound); + + float32_t alphaUpper = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromUpperBoundary); + float32_t alphaLower = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromLowerBoundary); + + float32_t alpha = max(alphaUpper, alphaLower); + + return float32_t4(color * alpha, alpha); +} + +// Draw axis direction markers (dots at +/- axis from center) +float32_t4 drawAxisMarkers( + float32_t3 axis, + float32_t3 center, + float32_t2 ndc, + float32_t aaWidth, + float32_t3 color, + float32_t extent = 0.25f) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + // Positive axis endpoint + float32_t3 axisEndPos = normalize(center + axis * extent); + float32_t3 axisEndPosCircle = sphereToCircle(axisEndPos); + result += drawCorner(axisEndPosCircle, ndc, aaWidth, 0.025f, 0.0f, color); + + // Negative axis endpoint (smaller, dimmer) + float32_t3 axisEndNeg = normalize(center - axis * extent); + float32_t3 axisEndNegCircle = sphereToCircle(axisEndNeg); + result += drawCorner(axisEndNegCircle, ndc, aaWidth, 0.015f, 0.0f, color * 0.5f); + + return result; +} + +// ============================================================================ +// Visualization +// ============================================================================ + +// Draw half of a great circle (the visible half of a lune boundary) +float32_t4 drawGreatCircleHalf(float32_t3 normal, float32_t3 spherePos, float32_t3 axis3, float32_t aaWidth, float32_t3 color, float32_t thickness) +{ + // Point is on great circle if dot(point, normal) ≈ 0 + // Only draw the half where dot(point, axis3) > 0 (toward silhouette) + float32_t dist = abs(dot(spherePos, normal)); + float32_t sideFade = smoothstep(-0.1f, 0.1f, dot(spherePos, axis3)); + float32_t alpha = (1.0f - smoothstep(thickness - aaWidth, thickness + aaWidth, dist)) * sideFade; + return float32_t4(color * alpha, alpha); +} + +// Visualize the best caliper edge (the edge that determined axis1) +float32_t4 visualizeBestCaliperEdge(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t bestEdgeIdx, uint32_t count, float32_t3 spherePos, float32_t aaWidth) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + if (bestEdgeIdx >= count) + return result; + + uint32_t nextIdx = (bestEdgeIdx + 1 < count) ? bestEdgeIdx + 1 : 0; + float32_t3 v0 = vertices[bestEdgeIdx]; + float32_t3 v1 = vertices[nextIdx]; + + // Draw the best caliper edge with a thicker, gold line + float32_t3 pts[2] = {v0, v1}; + float32_t3 highlightColor = float32_t3(1.0f, 0.8f, 0.0f); + float32_t alpha = drawGreatCircleArc(spherePos, pts, aaWidth, 0.008f); + result += float32_t4(highlightColor * alpha, alpha); + + return result; +} + #endif // VISUALIZE_SAMPLES #if DEBUG_DATA @@ -472,4 +591,4 @@ void validateEdgeVisibility(float32_t3x4 modelMatrix, uint32_t sil, uint32_t ver } #endif // DEBUG_DATA -#endif // _DEBUG_HLSL_ +#endif // _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl deleted file mode 100644 index cefa65267..000000000 --- a/73_SolidAngleVisualizer/app_resources/hlsl/Sampling.hlsl +++ /dev/null @@ -1,355 +0,0 @@ -#ifndef _SAMPLING_HLSL_ -#define _SAMPLING_HLSL_ - -// Include the spherical triangle utilities -#include "gpu_common.hlsl" -#include "parallelogram_sampling.hlsl" -#include -#include -#include -#include -#include - -using namespace nbl::hlsl; - -// Maximum number of triangles we can have after clipping -// Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles -// With clipping, one more edge. 7 - 2 = 5 max triangles because fanning from one vertex -#define MAX_TRIANGLES 5 - -// Minimal cached sampling data - only what's needed for selection -struct SamplingData -{ - uint32_t count; // Number of valid triangles - uint32_t samplingMode; // Mode used during build - float32_t totalWeight; // Sum of all triangle weights - float32_t3 faceNormal; // Face normal (only used for projected mode) - float32_t triangleSolidAngles[MAX_TRIANGLES]; // Weight per triangle (for selection) - uint32_t triangleIndices[MAX_TRIANGLES]; // Vertex index i (forms triangle with v0, vi, vi+1) -}; - -float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) -{ - return float32_t2( - float32_t(rnd()) * 2.3283064365386963e-10, - float32_t(rnd()) * 2.3283064365386963e-10); -} - -float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float32_t3 v2, float32_t3 N) -{ - // 1. Get edge normals (unit vectors) - // We use the cross product of the vertices (unit vectors on sphere) - float32_t3 n0 = cross(v0, v1); - float32_t3 n1 = cross(v1, v2); - float32_t3 n2 = cross(v2, v0); - - // 2. Normalize edge normals (magnitude is sin of the arc length) - float32_t l0 = length(n0); - float32_t l1 = length(n1); - float32_t l2 = length(n2); - - // Guard against degenerate triangles - if (l0 < 1e-7 || l1 < 1e-7 || l2 < 1e-7) - return 0.0f; - - n0 /= l0; - n1 /= l1; - n2 /= l2; - - // 3. Get arc lengths (angles in radians) - float32_t a = asin(clamp(l0, -1.0f, 1.0f)); // side v0-v1 - float32_t b = asin(clamp(l1, -1.0f, 1.0f)); // side v1-v2 - float32_t c = asin(clamp(l2, -1.0f, 1.0f)); // side v2-v0 - - // Handle acos/asin quadrant if dot product is negative - if (dot(v0, v1) < 0) - a = 3.14159265 - a; - if (dot(v1, v2) < 0) - b = 3.14159265 - b; - if (dot(v2, v0) < 0) - c = 3.14159265 - c; - - // 4. Compute projected solid angle - float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); - - // Return the absolute value of the total - return abs(Gamma); -} - -// Build sampling data once - cache only weights for triangle selection -SamplingData buildSamplingDataFromSilhouette(ClippedSilhouette silhouette, uint32_t samplingMode) -{ - SamplingData data; - data.count = 0; - data.totalWeight = 0.0f; - data.samplingMode = samplingMode; - data.faceNormal = float32_t3(0, 0, 0); - - if (silhouette.count < 3) - return data; - - const float32_t3 v0 = silhouette.vertices[0]; - const float32_t3 origin = float32_t3(0, 0, 0); - - // Compute face normal ONCE before the loop - silhouette is planar! - if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - float32_t3 v1 = silhouette.vertices[1]; - float32_t3 v2 = silhouette.vertices[2]; - data.faceNormal = normalize(cross(v1 - v0, v2 - v0)); - } - - // Build fan triangulation from v0 - NBL_UNROLL - for (uint32_t i = 1; i < silhouette.count - 1; i++) - { - float32_t3 v1 = silhouette.vertices[i]; - float32_t3 v2 = silhouette.vertices[i + 1]; - - shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); - - // Skip degenerate triangles - if (shapeTri.pyramidAngles()) - continue; - - // Calculate triangle solid angle - float32_t solidAngle; - if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - // scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) - float32_t3 cos_vertices = clamp( - (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * - shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, - float32_t3(-1.0f, -1.0f, -1.0f), - float32_t3(1.0f, 1.0f, 1.0f)); - solidAngle = shapeTri.projectedSolidAngleOfTriangle(data.faceNormal, shapeTri.cos_sides, shapeTri.csc_sides, cos_vertices); - } - else - { - solidAngle = shapeTri.solidAngleOfTriangle(); - } - - if (solidAngle <= 0.0f) - continue; - - // Store only what's needed for weighted selection - data.triangleSolidAngles[data.count] = solidAngle; - data.triangleIndices[data.count] = i; - data.totalWeight += solidAngle; - data.count++; - } - -#if DEBUG_DATA - // Validate no antipodal edges exist (would create spherical lune) - for (uint32_t i = 0; i < silhouette.count; i++) - { - uint32_t j = (i + 1) % silhouette.count; - float32_t3 n1 = normalize(silhouette.vertices[i]); - float32_t3 n2 = normalize(silhouette.vertices[j]); - - if (dot(n1, n2) < -0.99f) - { - DebugDataBuffer[0].sphericalLuneDetected = 1; - assert(false && "Spherical lune detected: antipodal silhouette edge"); - } - } - DebugDataBuffer[0].maxTrianglesExceeded = (data.count > MAX_TRIANGLES); - DebugDataBuffer[0].triangleCount = data.count; - DebugDataBuffer[0].totalSolidAngles = data.totalWeight; - for (uint32_t tri = 0; tri < data.count; tri++) - { - DebugDataBuffer[0].solidAngles[tri] = data.triangleSolidAngles[tri]; - } -#endif - - return data; -} - -// Sample using cached selection weights, but recompute geometry on-demand -float32_t3 sampleFromData(SamplingData data, ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) -{ - selectedIdx = 0; - - // Handle empty or invalid data - if (data.count == 0 || data.totalWeight <= 0.0f) - { - pdf = 0.0f; - return float32_t3(0, 0, 1); - } - - // Select triangle using cached weighted random selection - float32_t targetWeight = xi.x * data.totalWeight; - float32_t cumulativeWeight = 0.0f; - float32_t prevCumulativeWeight = 0.0f; - - NBL_UNROLL - for (uint32_t i = 0; i < data.count; i++) - { - prevCumulativeWeight = cumulativeWeight; - cumulativeWeight += data.triangleSolidAngles[i]; - - if (targetWeight <= cumulativeWeight) - { - selectedIdx = i; - break; - } - } - - // Remap xi.x to [0,1] within selected triangle's solidAngle interval - float32_t triSolidAngle = data.triangleSolidAngles[selectedIdx]; - float32_t u = (targetWeight - prevCumulativeWeight) / max(triSolidAngle, 1e-7f); - - // Reconstruct the selected triangle geometry - uint32_t vertexIdx = data.triangleIndices[selectedIdx]; - float32_t3 v0 = silhouette.vertices[0]; - float32_t3 v1 = silhouette.vertices[vertexIdx]; - float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; - - float32_t3 faceNormal = normalize(cross(v1 - v0, v2 - v0)); - - float32_t3 origin = float32_t3(0, 0, 0); - - shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); - - // Compute vertex angles once - float32_t3 cos_vertices = clamp( - (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * - shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, - float32_t3(-1.0f, -1.0f, -1.0f), - float32_t3(1.0f, 1.0f, 1.0f)); - float32_t3 sin_vertices = sqrt(float32_t3(1.0f, 1.0f, 1.0f) - cos_vertices * cos_vertices); - - // Sample based on mode - float32_t3 direction; - float32_t rcpPdf; - - if (data.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - sampling::ProjectedSphericalTriangle samplingTri = - sampling::ProjectedSphericalTriangle::create(shapeTri); - - direction = samplingTri.generate( - rcpPdf, - triSolidAngle, - cos_vertices, - sin_vertices, - shapeTri.cos_sides[0], - shapeTri.cos_sides[2], - shapeTri.csc_sides[1], - shapeTri.csc_sides[2], - faceNormal, - false, - float32_t2(u, xi.y)); - triSolidAngle = rcpPdf; // projected solid angle returned as rcpPdf - } - else - { - sampling::SphericalTriangle samplingTri = - sampling::SphericalTriangle::create(shapeTri); - - direction = samplingTri.generate( - triSolidAngle, - cos_vertices, - sin_vertices, - shapeTri.cos_sides[0], - shapeTri.cos_sides[2], - shapeTri.csc_sides[1], - shapeTri.csc_sides[2], - float32_t2(u, xi.y)); - } - - // Calculate PDF - float32_t trianglePdf = 1.0f / triSolidAngle; - float32_t selectionProb = triSolidAngle / data.totalWeight; - pdf = trianglePdf * selectionProb; - - return normalize(direction); -} - -#if VISUALIZE_SAMPLES - -float32_t4 visualizeSamples(float32_t2 screenUV, float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth, ClippedSilhouette silhouette, SAMPLING_MODE samplingMode, uint32_t frameIndex, SamplingData samplingData, uint32_t numSamples -#if DEBUG_DATA - , - inout RWStructuredBuffer DebugDataBuffer -#endif -) -{ - float32_t4 accumColor = 0; - - if (silhouette.count == 0) - return 0; - - float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen - float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner - bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); - - ParallelogramSilhouette paraSilhouette = buildParallelogram(silhouette, ndc, spherePos, aaWidth, accumColor); - -#if DEBUG_DATA - DebugDataBuffer[0].sampleCount = numSamples; -#endif - for (uint32_t i = 0; i < numSamples; i++) - { - - // Hash the invocation to offset the grid - uint32_t offset = i * 747796405u + 2891336453u; - uint32_t idx = (offset) & 63u; // Keep within 64 samples - float32_t2 xi = float32_t2( - (float32_t(idx & 7u) + 0.5) / 8.0f, - (float32_t(idx >> 3u) + 0.5) / 8.0f); - - float32_t pdf; - uint32_t index = 0; - float32_t3 sampleDir; - if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || - samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, index); - } - else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) - { - bool valid; - sampleDir = sampleFromParallelogram(paraSilhouette, xi, pdf, valid); - if (!valid) - { - pdf = 0.0f; - sampleDir = float32_t3(0, 0, 1); - } - } -#if DEBUG_DATA - DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); -#endif - - float32_t dist3D = distance(sampleDir, normalize(spherePos)); - float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); - - if (alpha3D > 0.0f && !isInsidePSS) - { - float32_t3 sampleColor = colorLUT[index].rgb; - accumColor += float32_t4(sampleColor * alpha3D, alpha3D); - } - - if (isInsidePSS) - { - // Map the raw xi to the PSS square dimensions - float32_t2 xiPixelPos = pssPos + xi * pssSize; - float32_t dist2D = distance(screenUV, xiPixelPos); - - float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); - if (alpha2D > 0.0f) - { - float32_t3 sampleColor = colorLUT[index].rgb; - accumColor += float32_t4(sampleColor * alpha2D, alpha2D); - } - } - } - - // just the outline of the PSS - if (isInsidePSS && accumColor.a < 0.1) - accumColor = float32_t4(0.1, 0.1, 0.1, 1.0); - - return accumColor; -} -#endif // VISUALIZE_SAMPLES -#endif // _SAMPLING_HLSL_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl deleted file mode 100644 index bd9312733..000000000 --- a/73_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ /dev/null @@ -1,199 +0,0 @@ -#pragma wave shader_stage(fragment) - -#include "common.hlsl" -#include - -using namespace nbl::hlsl; -using namespace ext::FullScreenTriangle; - -#if DEBUG_DATA -[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; // TODO: move below other includes -#endif - -#include "utils.hlsl" -#include "Drawing.hlsl" -#include "Sampling.hlsl" -#include "silhouette.hlsl" -[[vk::push_constant]] struct PushConstants pc; - -void setDebugData(uint32_t sil, uint32_t3 region, uint32_t configIndex) -{ -#if DEBUG_DATA - DebugDataBuffer[0].region = uint32_t3(region); - DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); - DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); - for (uint32_t i = 0; i < 6; i++) - { - DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); - } - DebugDataBuffer[0].silhouette = sil; -#endif -} - -void computeCubeGeo() -{ - for (uint32_t i = 0; i < 8; i++) - corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; - - for (uint32_t f = 0; f < 6; f++) - { - faceCenters[f] = float32_t3(0, 0, 0); - for (uint32_t v = 0; v < 4; v++) - faceCenters[f] += corners[faceToCorners[f][v]]; - faceCenters[f] /= 4.0f; - } -} - -void validateSilhouetteEdges(uint32_t sil, uint32_t vertexCount, inout uint32_t silEdgeMask) -{ -#if DEBUG_DATA - { - for (uint32_t i = 0; i < vertexCount; i++) - { - uint32_t vIdx = i % vertexCount; - uint32_t v1Idx = (i + 1) % vertexCount; - - uint32_t v0Corner = getSilhouetteVertex(sil, vIdx); - uint32_t v1Corner = getSilhouetteVertex(sil, v1Idx); - // Mark edge as part of silhouette - for (uint32_t e = 0; e < 12; e++) - { - uint32_t2 edge = allEdges[e]; - if ((edge.x == v0Corner && edge.y == v1Corner) || - (edge.x == v1Corner && edge.y == v0Corner)) - { - silEdgeMask |= (1u << e); - } - } - } - validateEdgeVisibility(pc.modelMatrix, sil, vertexCount, silEdgeMask); - } -#endif -} - -void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 spherePos) -{ - ndc = vx.uv * 2.0f - 1.0f; - float32_t aspect = pc.viewport.z / pc.viewport.w; - ndc.x *= aspect; - - float32_t2 normalized = ndc / CIRCLE_RADIUS; - float32_t r2 = dot(normalized, normalized); - - if (r2 <= 1.0f) - { - spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); - } - else - { - float32_t uv2Plus1 = r2 + 1.0f; - spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; - } - spherePos = normalize(spherePos); -} - -[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 -{ - float32_t4 color = float32_t4(0, 0, 0, 0); - for (uint32_t i = 0; i < 1; i++) - { - float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); - float32_t3 spherePos; - float32_t2 ndc; - computeSpherePos(vx, ndc, spherePos); -#if !FAST || DEBUG_DATA - computeCubeGeo(); -#endif - uint32_t3 region; - uint32_t configIndex; - uint32_t vertexCount; - uint32_t sil = computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); - - uint32_t silEdgeMask = 0; // TODO: take from 'fast' computeSilhouette() -#if DEBUG_DATA - validateSilhouetteEdges(sil, vertexCount, silEdgeMask); -#endif - ClippedSilhouette silhouette; - -#if VISUALIZE_SAMPLES - color += computeSilhouette(pc.modelMatrix, vertexCount, sil, spherePos, aaWidth, silhouette); -#else - computeSilhouette(pc.modelMatrix, vertexCount, sil, silhouette); -#endif - - SamplingData samplingData; - ParallelogramSilhouette paraSilhouette; - if (pc.samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || - pc.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - samplingData = buildSamplingDataFromSilhouette(silhouette, pc.samplingMode); - } - else - { - - paraSilhouette = buildParallelogram(silhouette -#if VISUALIZE_SAMPLES - , - ndc, spherePos, aaWidth, color -#endif - ); - } - -#if VISUALIZE_SAMPLES - - // For debugging: Draw a small indicator of which faces are found - // color += drawVisibleFaceOverlay(pc.modelMatrix, spherePos, region, aaWidth); - - // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); - - // Draw clipped silhouette vertices - // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); - color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); - // color += drawCorners(pc.modelMatrix, ndc, aaWidth, 0.05f); - color += drawRing(ndc, aaWidth); - - // Draw samples on sphere - color += visualizeSamples(vx.uv, spherePos, ndc, aaWidth, silhouette, pc.samplingMode, pc.frameIndex, samplingData, pc.sampleCount -#if DEBUG_DATA - , - DebugDataBuffer -#endif - ); - - if (all(vx.uv >= float32_t2(0.f, 0.97f)) && all(vx.uv <= float32_t2(0.03f, 1.0f))) - { - return float32_t4(colorLUT[configIndex], 1.0f); - } -#else - // Hash the invocation to offset the grid - uint32_t offset = 747796405u + 2891336453u; - uint32_t idx = (offset) & 63u; // Keep within 64 samples - float32_t2 xi = float32_t2( - (float32_t(idx & 7u) + 0.5) / 8.0f, - (float32_t(idx >> 3u) + 0.5) / 8.0f); - - float32_t pdf; - uint32_t index = 0; - float32_t3 sampleDir; - if (pc.samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || - pc.samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) - { - sampleDir = sampleFromData(samplingData, silhouette, xi, pdf, index); - } - else if (pc.samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) - { - bool valid; - sampleDir = sampleFromParallelogram(paraSilhouette, xi, pdf, valid); - if (!valid) - { - pdf = 0.0f; - sampleDir = float32_t3(0, 0, 1); - } - } - color += float4(sampleDir * 0.02f / pdf, 1.0f); -#endif // VISUALIZE_SAMPLES - setDebugData(sil, region, configIndex); - } - - return color; -} \ No newline at end of file diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl index 0ea7c2afb..3b49d17ca 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl @@ -1,37 +1,22 @@ -//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h #pragma shader_stage(compute) #include "app_resources/hlsl/common.hlsl" -// doesn't change Z coordinate -float32_t3 sphereToCircle(float32_t3 spherePoint) -{ - if (spherePoint.z >= 0.0f) - { - return float32_t3(spherePoint.xy, spherePoint.z); - } - else - { - float32_t r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); - float32_t uv2Plus1 = r2 + 1.0f; - return float32_t3((spherePoint.xy * uv2Plus1 / 2.0f), spherePoint.z); - } -} - -#undef DEBUG_DATA // Avoid conflict with DebugDataBuffer in this file -#undef VISUALIZE_SAMPLES - #include "app_resources/hlsl/benchmark/common.hlsl" #include "app_resources/hlsl/silhouette.hlsl" -#include "app_resources/hlsl/Sampling.hlsl" #include "app_resources/hlsl/parallelogram_sampling.hlsl" +#include "app_resources/hlsl/pyramid_sampling.hlsl" +#include "app_resources/hlsl/triangle_sampling.hlsl" using namespace nbl::hlsl; [[vk::binding(0, 0)]] RWByteAddressBuffer outputBuffer; [[vk::push_constant]] BenchmarkPushConstants pc; +static const SAMPLING_MODE benchmarkMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + [numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] [shader("compute")] void main(uint32_t3 invocationID : SV_DispatchThreadID) @@ -43,43 +28,101 @@ using namespace nbl::hlsl; uint32_t3 region; uint32_t configIndex; uint32_t vertexCount; - uint32_t sil = computeRegionAndConfig(perturbedMatrix, region, configIndex, vertexCount); + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(perturbedMatrix, region, configIndex, vertexCount); + + ClippedSilhouette silhouette = (ClippedSilhouette)0; + silhouette.compute(perturbedMatrix, vertexCount, sil); - ClippedSilhouette silhouette; - computeSilhouette(perturbedMatrix, vertexCount, sil, silhouette); float32_t pdf; uint32_t triIdx; + uint32_t validSampleCount = 0; float32_t3 sampleDir = float32_t3(0.0, 0.0, 0.0); - if (pc.benchmarkMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || - pc.benchmarkMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + + bool sampleValid; + if (benchmarkMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + benchmarkMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) { - SamplingData samplingData; - samplingData = buildSamplingDataFromSilhouette(silhouette, pc.benchmarkMode); + TriangleFanSampler samplingData; + samplingData = TriangleFanSampler::create(silhouette, benchmarkMode); - for (uint32_t i = 0; i < 64; i++) + for (uint32_t i = 0; i < pc.sampleCount; i++) { float32_t2 xi = float32_t2( (float32_t(i & 7u) + 0.5f) / 8.0f, (float32_t(i >> 3u) + 0.5f) / 8.0f); - sampleDir += sampleFromData(samplingData, silhouette, xi, pdf, triIdx); + sampleDir += samplingData.sample(silhouette, xi, pdf, triIdx); + validSampleCount++; } } - else if (pc.benchmarkMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + else if (benchmarkMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) { // Precompute parallelogram for sampling - ParallelogramSilhouette paraSilhouette = buildParallelogram(silhouette); - for (uint32_t i = 0; i < 64; i++) + silhouette.normalize(); + SilEdgeNormals silEdgeNormals; + Parallelogram parallelogram = Parallelogram::create(silhouette, silEdgeNormals); + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += parallelogram.sample(silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + { + // Precompute spherical pyramid and Urena sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + UrenaSampler urena = UrenaSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += urena.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + { + // Precompute spherical pyramid and biquadratic sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BiquadraticSampler biquad = BiquadraticSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += biquad.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + // Precompute spherical pyramid and bilinear sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BilinearSampler bilin = BilinearSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) { float32_t2 xi = float32_t2( (float32_t(i & 7u) + 0.5f) / 8.0f, (float32_t(i >> 3u) + 0.5f) / 8.0f); - bool valid; - sampleDir += sampleFromParallelogram(paraSilhouette, xi, pdf, valid); + sampleDir += bilin.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; } } const uint32_t offset = sizeof(uint32_t) * invocationID.x; - outputBuffer.Store(offset, pdf + triIdx + asuint(sampleDir.x) + asuint(sampleDir.y) + asuint(sampleDir.z)); + outputBuffer.Store(offset, pdf + validSampleCount + triIdx + asuint(sampleDir.x) + asuint(sampleDir.y) + asuint(sampleDir.z)); } diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index 9e4954ebc..d63ec3c6a 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -1,9 +1,10 @@ -#ifndef _SOLID_ANGLE_VIS_COMMON_HLSL_ -#define _SOLID_ANGLE_VIS_COMMON_HLSL_ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -#define DEBUG_DATA 01 -#define VISUALIZE_SAMPLES 01 #define FAST 1 @@ -16,65 +17,83 @@ namespace nbl { TRIANGLE_SOLID_ANGLE, TRIANGLE_PROJECTED_SOLID_ANGLE, - PROJECTED_PARALLELOGRAM_SOLID_ANGLE + PROJECTED_PARALLELOGRAM_SOLID_ANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR, + Count }; struct ResultData { - uint32_t parallelogramDoesNotBound; - float32_t parallelogramArea; - uint32_t failedVertexIndex; - uint32_t edgeIsConvex[4]; - - uint32_t parallelogramVerticesInside; - uint32_t parallelogramEdgesInside; - uint32_t failedEdgeIndex; - float32_t2 failedVertexUV; - float32_t3 failedPoint; - uint32_t failedEdgeSample; - float32_t2 failedEdgeUV; - float32_t2 parallelogramCorners[4]; - + // Silhouette uint32_t3 region; uint32_t silhouetteIndex; - uint32_t silhouetteVertexCount; uint32_t silhouette; uint32_t positiveVertCount; uint32_t edgeVisibilityMismatch; - uint32_t clipMask; uint32_t clipCount; uint32_t rotatedSil; uint32_t wrapAround; - uint32_t rotatedClipMask; uint32_t rotateAmount; - uint32_t maxTrianglesExceeded; - uint32_t sphericalLuneDetected; - uint32_t vertices[6]; - uint32_t clippedSilhouetteVertexCount; float32_t3 clippedSilhouetteVertices[7]; uint32_t clippedSilhouetteVerticesIndices[7]; + // Parallelogram + uint32_t parallelogramDoesNotBound; + float32_t parallelogramArea; + uint32_t failedVertexIndex; + uint32_t edgeIsConvex[4]; + uint32_t parallelogramVerticesInside; + uint32_t parallelogramEdgesInside; + float32_t2 parallelogramCorners[4]; + + // spherical triangle + uint32_t maxTrianglesExceeded; + uint32_t sphericalLuneDetected; uint32_t triangleCount; float32_t solidAngles[5]; float32_t totalSolidAngles; - uint32_t sampleOutsideSilhouette; - // Sampling ray visualization data uint32_t sampleCount; - float32_t4 rayData[64]; // xyz = direction, w = PDF + float32_t4 rayData[512]; // xyz = direction, w = PDF + + // Pyramid sampling debug data + float32_t3 pyramidAxis1; // First caliper axis direction + float32_t3 pyramidAxis2; // Second caliper axis direction + float32_t3 pyramidCenter; // Silhouette center direction + float32_t pyramidHalfWidth1; // Half-width along axis1 (sin-space) + float32_t pyramidHalfWidth2; // Half-width along axis2 (sin-space) + float32_t pyramidOffset1; // Center offset along axis1 + float32_t pyramidOffset2; // Center offset along axis2 + float32_t pyramidSolidAngle; // Bounding region solid angle + uint32_t pyramidBestEdge; // Which edge produced best caliper + uint32_t pyramidSpansHemisphere; // Warning: silhouette >= hemisphere + float32_t pyramidMin1; // Min dot product along axis1 + float32_t pyramidMax1; // Max dot product along axis1 + float32_t pyramidMin2; // Min dot product along axis2 + float32_t pyramidMax2; // Max dot product along axis2 + uint32_t axis2BiggerThanAxis1; + + // Sampling stats + uint32_t validSampleCount; + uint32_t threadCount; // Used as a hack for fragment shader, as dividend for validSampleCount }; +#ifdef __HLSL_VERSION + [[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; +#endif + struct PushConstants { float32_t3x4 modelMatrix; float32_t4 viewport; - SAMPLING_MODE samplingMode; uint32_t sampleCount; uint32_t frameIndex; }; @@ -84,6 +103,7 @@ namespace nbl float32_t4x4 viewProjMatrix; float32_t3x4 viewMatrix; float32_t3x4 modelMatrix; + float32_t3x4 invModelMatrix; float32_t4 viewport; uint32_t frameIndex; }; @@ -91,7 +111,7 @@ namespace nbl struct BenchmarkPushConstants { float32_t3x4 modelMatrix; - SAMPLING_MODE benchmarkMode; + uint32_t sampleCount; }; static const float32_t3 colorLUT[27] = { @@ -113,4 +133,4 @@ namespace nbl #endif // __HLSL_VERSION } } -#endif // _SOLID_ANGLE_VIS_COMMON_HLSL_ +#endif // _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl index 040883956..142471493 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl @@ -1,15 +1,20 @@ -#ifndef GPU_COMMON_HLSL -#define GPU_COMMON_HLSL +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ -static const float32_t CIRCLE_RADIUS = 1.0f; +#include "utils.hlsl" + +static const float32_t CIRCLE_RADIUS = 0.5f; static const float32_t INV_CIRCLE_RADIUS = 1.0f / CIRCLE_RADIUS; // --- Geometry Utils --- -struct ClippedSilhouette -{ - float32_t3 vertices[7]; // Max 7 vertices after clipping, unnormalized - uint32_t count; -}; +#define MAX_SILHOUETTE_VERTICES 7 + +// Special index values for clip points +static const uint32_t CLIP_POINT_A = 23; // Clip point between last positive and first negative +static const uint32_t CLIP_POINT_B = 24; // Clip point between last negative and first positive static const float32_t3 constCorners[8] = { float32_t3(-0.5f, -0.5f, -0.5f), float32_t3(0.5f, -0.5f, -0.5f), float32_t3(-0.5f, 0.5f, -0.5f), float32_t3(0.5f, 0.5f, -0.5f), @@ -70,7 +75,7 @@ static const uint32_t silhouettes[27][7] = { {4, 2, 6, 7, 3, 0, 0}, // 10: Light Orange {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange {4, 1, 3, 7, 5, 0, 0}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 7, 3, 2}, // 13: Light Pink {4, 0, 4, 6, 2, 0, 0}, // 14: Deep Rose {6, 0, 1, 3, 7, 5, 4}, // 15: Purple {4, 0, 1, 5, 4, 0, 0}, // 16: Light Purple @@ -166,4 +171,5 @@ float32_t3 getVertex(float32_t3x4 modelMatrix, uint32_t vertexIdx) return corners[vertexIdx]; #endif } -#endif // GPU_COMMON_HLSL + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl index ea9bebcb3..cd02171af 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl @@ -1,535 +1,418 @@ -#ifndef _PARALLELOGRAM_SAMPLING_HLSL_ -#define _PARALLELOGRAM_SAMPLING_HLSL_ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ #include #include +#include "silhouette.hlsl" +#include "drawing.hlsl" -#define MAX_SILHOUETTE_VERTICES 7 #define MAX_CURVE_APEXES 2 -#define GET_PROJ_VERT(i) vertices[i].xy *CIRCLE_RADIUS +#define GET_PROJ_VERT(i) silhouette.vertices[i].xy *CIRCLE_RADIUS // ============================================================================ -// Core structures +// Minimum bounding rectangle on projected sphere // ============================================================================ - struct Parallelogram { float16_t2 corner; float16_t2 axisDir; float16_t width; float16_t height; -}; - -struct PrecomputedSilhouette -{ - float16_t3 edgeNormals[MAX_SILHOUETTE_VERTICES]; // 10.5 floats instead of 21 - uint32_t count; -}; - -struct ParallelogramSilhouette -{ - Parallelogram para; - PrecomputedSilhouette silhouette; -}; - -// ============================================================================ -// Silhouette helpers -// ============================================================================ - -PrecomputedSilhouette precomputeSilhouette(NBL_CONST_REF_ARG(ClippedSilhouette) sil) -{ - PrecomputedSilhouette result; - result.count = sil.count; - - float32_t3 v0 = sil.vertices[0]; - float32_t3 v1 = sil.vertices[1]; - float32_t3 v2 = sil.vertices[2]; - result.edgeNormals[0] = float16_t3(cross(v0, v1)); - result.edgeNormals[1] = float16_t3(cross(v1, v2)); + // ======================================================================== + // Projection helpers + // ======================================================================== - if (sil.count > 3) + static float32_t3 circleToSphere(float32_t2 circlePoint) { - float32_t3 v3 = sil.vertices[3]; - result.edgeNormals[2] = float16_t3(cross(v2, v3)); - - if (sil.count > 4) - { - float32_t3 v4 = sil.vertices[4]; - result.edgeNormals[3] = float16_t3(cross(v3, v4)); + float32_t2 xy = circlePoint / CIRCLE_RADIUS; + float32_t xy_len_sq = dot(xy, xy); + return float32_t3(xy, sqrt(1.0f - xy_len_sq)); + } - if (sil.count > 5) - { - float32_t3 v5 = sil.vertices[5]; - result.edgeNormals[4] = float16_t3(cross(v4, v5)); + // ======================================================================== + // Curve evaluation helpers + // ======================================================================== - if (sil.count > 6) - { - float32_t3 v6 = sil.vertices[6]; - result.edgeNormals[5] = float16_t3(cross(v5, v6)); - result.edgeNormals[6] = float16_t3(cross(v6, v0)); - } - else - { - result.edgeNormals[5] = float16_t3(cross(v5, v0)); - result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); - } - } - else - { - result.edgeNormals[4] = float16_t3(cross(v4, v0)); - result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); - } - } - else - { - result.edgeNormals[3] = float16_t3(cross(v3, v0)); - result.edgeNormals[4] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); - } - } - else + static float32_t2 evalCurvePoint(float32_t3 S, float32_t3 E, float32_t t) { - result.edgeNormals[2] = float16_t3(cross(v2, v0)); - result.edgeNormals[3] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[4] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[5] = float16_t3(0.0f, 0.0f, 0.0f); - result.edgeNormals[6] = float16_t3(0.0f, 0.0f, 0.0f); + float32_t3 v = S + t * (E - S); + float32_t invLen = rsqrt(dot(v, v)); + return v.xy * (invLen * CIRCLE_RADIUS); } - return result; -} + static float32_t2 evalCurveTangent(float32_t3 S, float32_t3 E, float32_t t) + { + float32_t3 v = S + t * (E - S); + float32_t vLenSq = dot(v, v); -bool isInsideSilhouetteFast(float32_t3 dir, NBL_CONST_REF_ARG(PrecomputedSilhouette) sil) -{ - float16_t3 d = float16_t3(dir); - half maxDot = dot(d, sil.edgeNormals[0]); - maxDot = max(maxDot, dot(d, sil.edgeNormals[1])); - maxDot = max(maxDot, dot(d, sil.edgeNormals[2])); - maxDot = max(maxDot, dot(d, sil.edgeNormals[3])); - maxDot = max(maxDot, dot(d, sil.edgeNormals[4])); - maxDot = max(maxDot, dot(d, sil.edgeNormals[5])); - maxDot = max(maxDot, dot(d, sil.edgeNormals[6])); - return maxDot <= half(0.0f); -} -float32_t3 circleToSphere(float32_t2 circlePoint) -{ - float32_t2 xy = circlePoint / CIRCLE_RADIUS; - float32_t xy_len_sq = dot(xy, xy); + if (vLenSq < 1e-12f) + return normalize(E.xy - S.xy); - // if (xy_len_sq >= 1.0f) - // return float32_t3(0, 0, 0); + float32_t3 p = v * rsqrt(vLenSq); + float32_t3 vPrime = E - S; + float32_t2 tangent2D = (vPrime - p * dot(p, vPrime)).xy; - return float32_t3(xy, sqrt(1.0f - xy_len_sq)); -} + float32_t len = length(tangent2D); + return (len > 1e-7f) ? tangent2D / len : normalize(E.xy - S.xy); + } -bool isEdgeConvex(float32_t3 S, float32_t3 E) -{ - return nbl::hlsl::cross2D(S.xy, E.xy) < -1e-6f; -} + // Get both endpoint tangents (shares SdotE computation) + static void getProjectedTangents(float32_t3 S, float32_t3 E, out float32_t2 t0, out float32_t2 t1) + { + float32_t SdotE = dot(S, E); -// ============================================================================ -// Curve evaluation helpers -// ============================================================================ + float32_t2 tangent0_2D = (E - S * SdotE).xy; + float32_t2 tangent1_2D = (E * SdotE - S).xy; -// Evaluate curve point at t using rsqrt -float32_t2 evalCurvePoint(float32_t3 S, float32_t3 E, float32_t t) -{ - float32_t3 v = S + t * (E - S); - float32_t invLen = rsqrt(dot(v, v)); - return v.xy * (invLen * CIRCLE_RADIUS); -} + float32_t len0Sq = dot(tangent0_2D, tangent0_2D); + float32_t len1Sq = dot(tangent1_2D, tangent1_2D); -// Evaluate tangent at arbitrary t -float32_t2 evalCurveTangent(float32_t3 S, float32_t3 E, float32_t t) -{ - float32_t3 v = S + t * (E - S); - float32_t vLenSq = dot(v, v); + const float32_t eps = 1e-14f; - if (vLenSq < 1e-12f) - return normalize(E.xy - S.xy); + if (len0Sq > eps && len1Sq > eps) + { + t0 = tangent0_2D * rsqrt(len0Sq); + t1 = tangent1_2D * rsqrt(len1Sq); + return; + } - float32_t3 p = v * rsqrt(vLenSq); - float32_t3 vPrime = E - S; - float32_t2 tangent2D = (vPrime - p * dot(p, vPrime)).xy; + // Rare fallback path + float32_t2 diff = E.xy - S.xy; + float32_t diffLenSq = dot(diff, diff); + float32_t2 fallback = diffLenSq > eps ? diff * rsqrt(diffLenSq) : float32_t2(1.0f, 0.0f); - float32_t len = length(tangent2D); - return (len > 1e-7f) ? tangent2D / len : normalize(E.xy - S.xy); -} + t0 = len0Sq > eps ? tangent0_2D * rsqrt(len0Sq) : fallback; + t1 = len1Sq > eps ? tangent1_2D * rsqrt(len1Sq) : fallback; + } -// Get both endpoint tangents efficiently (shares SdotE computation) -void getProjectedTangents(float32_t3 S, float32_t3 E, out float32_t2 t0, out float32_t2 t1) -{ - float32_t SdotE = dot(S, E); + // Compute apex with clamping to prevent apex explosion + static void computeApexClamped(float32_t2 p0, float32_t2 p1, float32_t2 t0, float32_t2 t1, out float32_t2 apex) + { + float32_t denom = t0.x * t1.y - t0.y * t1.x; + float32_t2 center = (p0 + p1) * 0.5f; - float32_t2 tangent0_2D = (E - S * SdotE).xy; - float32_t2 tangent1_2D = (E * SdotE - S).xy; + if (abs(denom) < 1e-6f) + { + apex = center; + return; + } - float32_t len0Sq = dot(tangent0_2D, tangent0_2D); - float32_t len1Sq = dot(tangent1_2D, tangent1_2D); + float32_t2 dp = p1 - p0; + float32_t s = (dp.x * t1.y - dp.y * t1.x) / denom; + apex = p0 + s * t0; - const float32_t eps = 1e-14f; + float32_t2 toApex = apex - center; + float32_t distSq = dot(toApex, toApex); + float32_t maxDistSq = CIRCLE_RADIUS * CIRCLE_RADIUS * 4.0f; - if (len0Sq > eps && len1Sq > eps) - { - t0 = tangent0_2D * rsqrt(len0Sq); - t1 = tangent1_2D * rsqrt(len1Sq); - return; + if (distSq > maxDistSq) + { + apex = center + toApex * (CIRCLE_RADIUS * 2.0f * rsqrt(distSq)); + } } - // Rare fallback path - float32_t2 diff = E.xy - S.xy; - float32_t diffLenSq = dot(diff, diff); - float32_t2 fallback = diffLenSq > eps ? diff * rsqrt(diffLenSq) : float32_t2(1.0f, 0.0f); - - t0 = len0Sq > eps ? tangent0_2D * rsqrt(len0Sq) : fallback; - t1 = len1Sq > eps ? tangent1_2D * rsqrt(len1Sq) : fallback; -} + // ======================================================================== + // Bounding box computation (rotating calipers) + // + // testEdgeForAxis and computeBoundsForAxis are + // templated on a bool to select between two precision levels: + // + // Accurate=false (used by tryCaliperDir, O(N^2) total calls): + // Tests vertices + edge midpoints only. Cheap (just dot products) and + // sufficient for *ranking* candidate axes, even though it may + // underestimate the true extent of convex edges. + // + // Accurate=true (used by buildForAxis, called once): + // Also computes tangent-line apex intersections for convex edges to + // find the true extremum. Great circle arcs that project as convex + // curves can bulge beyond their endpoints; the apex (tangent + // evaluation + line intersection + clamping) captures this but is + // ~4x more expensive per edge. + // + // The fast path gives the same relative ranking of axes (the + // approximation error is consistent across candidates), so the + // cheapest axis found by Fast is also the cheapest under Accurate. + // ======================================================================== + + static void testPoint(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, float32_t2 pt, float32_t2 dir, float32_t2 perpDir) + { + float32_t projAlong = dot(pt, dir); + float32_t projPerp = dot(pt, perpDir); -// Compute apex with clamping to prevent apex explosion -void computeApexClamped(float32_t2 p0, float32_t2 p1, float32_t2 t0, float32_t2 t1, out float32_t2 apex) -{ - float32_t denom = t0.x * t1.y - t0.y * t1.x; - float32_t2 center = (p0 + p1) * 0.5f; + minAlong = min(minAlong, projAlong); + maxAlong = max(maxAlong, projAlong); + minPerp = min(minPerp, projPerp); + maxPerp = max(maxPerp, projPerp); + } - if (abs(denom) < 1e-6f) + // Accurate=false (Fast): tests vertex + midpoint only. Used O(N^2) times for axis ranking. + // Accurate=true: also computes tangent-line apex for convex edges. Used once for final rect. + template + static void testEdgeForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) { - apex = center; - return; - } + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + const float32_t2 projectedVertex = GET_PROJ_VERT(I); - float32_t2 dp = p1 - p0; - float32_t s = (dp.x * t1.y - dp.y * t1.x) / denom; - apex = p0 + s * t0; + testPoint(minAlong, maxAlong, minPerp, maxPerp, projectedVertex, dir, perpDir); - float32_t2 toApex = apex - center; - float32_t distSq = dot(toApex, toApex); - float32_t maxDistSq = CIRCLE_RADIUS * CIRCLE_RADIUS * 4.0f; + bool isN3 = (n3Mask & (1u << I)) != 0; - if (distSq > maxDistSq) - { - apex = center + toApex * (CIRCLE_RADIUS * 2.0f * rsqrt(distSq)); - } -} + if (Accurate) + { + bool isConvex = (convexMask & (1u << I)) != 0; -void testPoint(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, float32_t2 pt, float32_t2 axisDir, float32_t2 perpDir) -{ - float32_t projAlong = dot(pt, axisDir); - float32_t projPerp = dot(pt, perpDir); - - minAlong = min(minAlong, projAlong); - maxAlong = max(maxAlong, projAlong); - minPerp = min(minPerp, projPerp); - maxPerp = max(maxPerp, projPerp); -} - -template -void testEdgeForAxisFast(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, - uint32_t count, uint32_t n3Mask, float32_t2 axisDir, float32_t2 perpDir, - const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) -{ - const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; + if (!isN3 && !isConvex) + return; - testPoint(minAlong, maxAlong, minPerp, maxPerp, GET_PROJ_VERT(I), axisDir, perpDir); + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + float32_t2 midPoint = evalCurvePoint(S, E, 0.5f); - if (n3Mask & (1u << I)) - { - float32_t2 midPoint = evalCurvePoint(vertices[I], vertices[nextIdx], 0.5f); - testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, axisDir, perpDir); - } -} + if (isN3) + { + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } -float32_t computeBoundingBoxAreaForAxisFast(NBL_CONST_REF_ARG(float32_t3) vertices[MAX_SILHOUETTE_VERTICES], uint32_t n3Mask, uint32_t count, float32_t2 axisDir) -{ - float32_t2 perpDir = float32_t2(-axisDir.y, axisDir.x); + if (isConvex) + { + float32_t2 t0, endTangent; + getProjectedTangents(S, E, t0, endTangent); - float32_t minAlong = 1e10f; - float32_t maxAlong = -1e10f; - float32_t minPerp = 1e10f; - float32_t maxPerp = -1e10f; + if (dot(t0, perpDir) > 0.0f) + { + float32_t2 apex0; + if (isN3) + { + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + computeApexClamped(projectedVertex, midPoint, t0, tangentAtMid, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + + if (dot(tangentAtMid, perpDir) > 0.0f) + { + float32_t2 apex1; + computeApexClamped(midPoint, E.xy * CIRCLE_RADIUS, tangentAtMid, endTangent, apex1); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex1, dir, perpDir); + } + } + else + { + computeApexClamped(projectedVertex, E.xy * CIRCLE_RADIUS, t0, endTangent, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + } + } + } + } + else + { + if (isN3) + { + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[I], silhouette.vertices[nextIdx], 0.5f); + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } + } + } - testEdgeForAxisFast<0>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - testEdgeForAxisFast<1>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - testEdgeForAxisFast<2>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - if (count > 3) + // Unrolled bounding box computation for a given axis direction. + // Accurate=false: fast path for axis ranking during candidate selection. + // Accurate=true: tight bounds with apex computation for the final rectangle. + template + static void computeBoundsForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) { - testEdgeForAxisFast<3>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - if (count > 4) + testEdgeForAxis<0, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<1, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<2, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 3) { - testEdgeForAxisFast<4>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - if (count > 5) + testEdgeForAxis<3, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 4) { - testEdgeForAxisFast<5>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); - if (count > 6) + testEdgeForAxis<4, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 5) { - testEdgeForAxisFast<6>(minAlong, maxAlong, minPerp, maxPerp, count, n3Mask, axisDir, perpDir, vertices); + testEdgeForAxis<5, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 6) + { + testEdgeForAxis<6, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + } } } } } - return (maxAlong - minAlong) * (maxPerp - minPerp); -} - -void tryCaliperDir(inout float32_t bestArea, inout float32_t2 bestDir, const float32_t2 dir, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t n3Mask, uint32_t count) -{ - float32_t area = computeBoundingBoxAreaForAxisFast(vertices, n3Mask, count, dir); - - if (area < bestArea) + static void tryCaliperDir(inout float32_t bestArea, inout float32_t2 bestDir, const float32_t2 dir, const ClippedSilhouette silhouette, uint32_t n3Mask) { - bestArea = area; - bestDir = dir; - } -} + float32_t2 perpDir = float32_t2(-dir.y, dir.x); -template -inline void processEdge(inout float32_t bestArea, inout float32_t2 bestDir, inout uint32_t convexMask, inout uint32_t n3Mask, uint32_t count, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) -{ - const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; - float32_t3 S = vertices[I]; - float32_t3 E = vertices[nextIdx]; + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; - float32_t2 t0, t1; - getProjectedTangents(S, E, t0, t1); + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, 0, n3Mask, dir, perpDir); - tryCaliperDir(bestArea, bestDir, t0, vertices, n3Mask, count); - - if (isEdgeConvex(S, E)) - { - convexMask |= (1u << I); - tryCaliperDir(bestArea, bestDir, t1, vertices, n3Mask, count); - - if (dot(t0, t1) < 0.5f) + float32_t area = (maxAlong - minAlong) * (maxPerp - minPerp); + if (area < bestArea) { - n3Mask |= (1u << I); - float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); - tryCaliperDir(bestArea, bestDir, tangentAtMid, vertices, n3Mask, count); + bestArea = area; + bestDir = dir; } } -} - -template -inline void testEdgeForAxisAccurate(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, uint32_t count, uint32_t convexMask, uint32_t n3Mask, - float32_t2 axisDir, float32_t2 perpDir, const float32_t3 vertices[MAX_SILHOUETTE_VERTICES]) -{ - const uint32_t nextIdx = (I + 1 < count) ? I + 1 : 0; - float32_t2 projectedVertex = vertices[I].xy * CIRCLE_RADIUS; - - testPoint(minAlong, maxAlong, minPerp, maxPerp, projectedVertex, axisDir, perpDir); - bool isN3 = (n3Mask & (1u << I)) != 0; - bool isConvex = (convexMask & (1u << I)) != 0; - - if (!isN3 && !isConvex) - return; - - float32_t3 S = vertices[I]; - float32_t3 E = vertices[nextIdx]; - float32_t2 midPoint = evalCurvePoint(S, E, 0.5f); - - if (isN3) + template + static void processEdge(inout float32_t bestArea, inout float32_t2 bestDir, inout uint32_t convexMask, inout uint32_t n3Mask, const ClippedSilhouette silhouette, inout SilEdgeNormals precompSil) { - testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, axisDir, perpDir); - } + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + precompSil.edgeNormals[I] = float16_t3(cross(S, E)); - if (isConvex) - { - float32_t2 t0, endTangent; - getProjectedTangents(S, E, t0, endTangent); + float32_t2 t0, t1; + getProjectedTangents(S, E, t0, t1); - if (dot(t0, perpDir) > 0.0f) + tryCaliperDir(bestArea, bestDir, t0, silhouette, n3Mask); + + if (nbl::hlsl::cross2D(S.xy, E.xy) < -1e-6f) { - float32_t2 apex0; - if (isN3) - { - float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); - computeApexClamped(projectedVertex, midPoint, t0, tangentAtMid, apex0); - testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, axisDir, perpDir); + convexMask |= (1u << I); + tryCaliperDir(bestArea, bestDir, t1, silhouette, n3Mask); - if (dot(tangentAtMid, perpDir) > 0.0f) - { - float32_t2 apex1; - computeApexClamped(midPoint, E.xy * CIRCLE_RADIUS, tangentAtMid, endTangent, apex1); - testPoint(minAlong, maxAlong, minPerp, maxPerp, apex1, axisDir, perpDir); - } - } - else + if (dot(t0, t1) < 0.5f) { - computeApexClamped(projectedVertex, E.xy * CIRCLE_RADIUS, t0, endTangent, apex0); - testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, axisDir, perpDir); + n3Mask |= (1u << I); + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + tryCaliperDir(bestArea, bestDir, tangentAtMid, silhouette, n3Mask); } } } -} -Parallelogram buildParallelogramForAxisAccurate(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t convexMask, uint32_t n3Mask, uint32_t count, float32_t2 axisDir) -{ - float32_t2 perpDir = float32_t2(-axisDir.y, axisDir.x); + // ======================================================================== + // Factory methods + // ======================================================================== - float32_t minAlong = 1e10f; - float32_t maxAlong = -1e10f; - float32_t minPerp = 1e10f; - float32_t maxPerp = -1e10f; - - testEdgeForAxisAccurate<0>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - testEdgeForAxisAccurate<1>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - testEdgeForAxisAccurate<2>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - if (count > 3) + static Parallelogram buildForAxis(const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir) { - testEdgeForAxisAccurate<3>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - if (count > 4) - { - testEdgeForAxisAccurate<4>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - if (count > 5) - { - testEdgeForAxisAccurate<5>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - if (count > 6) - { - testEdgeForAxisAccurate<6>(minAlong, maxAlong, minPerp, maxPerp, count, convexMask, n3Mask, axisDir, perpDir, vertices); - } - } - } - } + float32_t2 perpDir = float32_t2(-dir.y, dir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; - Parallelogram result; - result.width = float16_t(maxAlong - minAlong); - result.height = float16_t(maxPerp - minPerp); - result.axisDir = float16_t2(axisDir); - result.corner = float16_t2(minAlong * axisDir + minPerp * float16_t2(-axisDir.y, axisDir.x)); + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); - return result; -} + Parallelogram result; + result.width = float16_t(maxAlong - minAlong); + result.height = float16_t(maxPerp - minPerp); + result.axisDir = float16_t2(dir); + result.corner = float16_t2(minAlong * dir + minPerp * float16_t2(-dir.y, dir.x)); -Parallelogram findMinimumBoundingBoxCurved(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t count + return result; + } + + // Silhouette vertices must be normalized before calling create() + static Parallelogram create(const ClippedSilhouette silhouette, out SilEdgeNormals precompSil #if VISUALIZE_SAMPLES - , - float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, - inout float32_t4 color + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, + inout float32_t4 color #endif -) -{ - uint32_t convexMask = 0; - uint32_t n3Mask = 0; - float32_t bestArea = 1e10f; - float32_t2 bestDir = float32_t2(1.0f, 0.0f); - - processEdge<0>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - processEdge<1>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - processEdge<2>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - if (count > 3) + ) { - processEdge<3>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - if (count > 4) + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + uint32_t convexMask = 0; + uint32_t n3Mask = 0; + float32_t bestArea = 1e10f; + float32_t2 bestDir = float32_t2(1.0f, 0.0f); + + processEdge<0>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<1>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<2>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 3) { - processEdge<4>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - if (count > 5) + processEdge<3>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 4) { - processEdge<5>(bestArea, bestDir, convexMask, n3Mask, count, vertices); - if (count > 6) + processEdge<4>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 5) { - processEdge<6>(bestArea, bestDir, convexMask, n3Mask, count, vertices); + processEdge<5>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 6) + { + processEdge<6>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + } } } } - } - tryCaliperDir(bestArea, bestDir, float32_t2(1.0f, 0.0f), vertices, n3Mask, count); - tryCaliperDir(bestArea, bestDir, float32_t2(0.0f, 1.0f), vertices, n3Mask, count); + tryCaliperDir(bestArea, bestDir, float32_t2(1.0f, 0.0f), silhouette, n3Mask); + tryCaliperDir(bestArea, bestDir, float32_t2(0.0f, 1.0f), silhouette, n3Mask); - Parallelogram best = buildParallelogramForAxisAccurate(vertices, convexMask, n3Mask, count, bestDir); + Parallelogram best = buildForAxis(silhouette, convexMask, n3Mask, bestDir); #if VISUALIZE_SAMPLES - for (uint32_t i = 0; i < count; i++) - { - if (convexMask & (1u << i)) + for (uint32_t i = 0; i < silhouette.count; i++) { - uint32_t nextIdx = (i + 1) % count; - float32_t2 p0 = vertices[i].xy * CIRCLE_RADIUS; - float32_t2 p1 = vertices[nextIdx].xy * CIRCLE_RADIUS; + if (convexMask & (1u << i)) + { + uint32_t nextIdx = (i + 1) % silhouette.count; + float32_t2 p0 = GET_PROJ_VERT(i); + float32_t2 p1 = GET_PROJ_VERT(nextIdx); - float32_t2 t0, endTangent; - getProjectedTangents(vertices[i], vertices[nextIdx], t0, endTangent); + float32_t2 t0, endTangent; + getProjectedTangents(silhouette.vertices[i], silhouette.vertices[nextIdx], t0, endTangent); - if (n3Mask & (1u << i)) - { - float32_t2 tangentAtMid = evalCurveTangent(vertices[i], vertices[nextIdx], 0.5f); - float32_t2 midPoint = evalCurvePoint(vertices[i], vertices[nextIdx], 0.5f); + if (n3Mask & (1u << i)) + { + float32_t2 tangentAtMid = evalCurveTangent(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); - float32_t2 apex0, apex1; - computeApexClamped(p0, midPoint, t0, tangentAtMid, apex0); - computeApexClamped(midPoint, p1, tangentAtMid, endTangent, apex1); + float32_t2 apex0, apex1; + computeApexClamped(p0, midPoint, t0, tangentAtMid, apex0); + computeApexClamped(midPoint, p1, tangentAtMid, endTangent, apex1); - color += drawCorner(float32_t3(apex0, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); - color += drawCorner(float32_t3(midPoint, 0.0f), ndc, aaWidth, 0.02, 0.0f, float32_t3(0, 1, 0)); - color += drawCorner(float32_t3(apex1, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0.5, 0)); - } - else - { - float32_t2 apex; - computeApexClamped(p0, p1, t0, endTangent, apex); - color += drawCorner(float32_t3(apex, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + color += drawCorner(float32_t3(apex0, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + color += drawCorner(float32_t3(midPoint, 0.0f), ndc, aaWidth, 0.02, 0.0f, float32_t3(0, 1, 0)); + color += drawCorner(float32_t3(apex1, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0.5, 0)); + } + else + { + float32_t2 apex; + computeApexClamped(p0, p1, t0, endTangent, apex); + color += drawCorner(float32_t3(apex, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + } } } - } #endif - - return best; -} -// ============================================================================ -// Main entry points -// ============================================================================ - -ParallelogramSilhouette buildParallelogram(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette -#if VISUALIZE_SAMPLES - , - float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, - inout float32_t4 color -#endif -) -{ - ParallelogramSilhouette result; - - // if (silhouette.count < 3) - // { - // result.para.corner = float32_t2(0, 0); - // result.para.edge0 = float32_t2(1, 0); - // result.para.edge1 = float32_t2(0, 1); - // result.para.area = 1.0f; - // return result; - // } - - result.para = findMinimumBoundingBoxCurved(silhouette.vertices, silhouette.count -#if VISUALIZE_SAMPLES - , - ndc, spherePos, aaWidth, color -#endif - ); - #if DEBUG_DATA - DebugDataBuffer[0].parallelogramArea = result.para.width * result.para.height; + DebugDataBuffer[0].parallelogramArea = best.width * best.height; #endif - result.silhouette = precomputeSilhouette(silhouette); - return result; -} + return best; + } -float32_t3 sampleFromParallelogram(NBL_CONST_REF_ARG(ParallelogramSilhouette) paraSilhouette, float32_t2 xi, out float32_t pdf, out bool valid) -{ - float16_t2 axisDir = paraSilhouette.para.axisDir; - float16_t2 perpDir = float16_t2(-axisDir.y, axisDir.x); + float32_t3 sample(NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + float16_t2 perpDir = float16_t2(-axisDir.y, axisDir.x); - float16_t2 circleXY = paraSilhouette.para.corner + - float16_t(xi.x) * paraSilhouette.para.width * axisDir + - float16_t(xi.y) * paraSilhouette.para.height * perpDir; + float16_t2 circleXY = corner + + float16_t(xi.x) * width * axisDir + + float16_t(xi.y) * height * perpDir; - float32_t3 direction = circleToSphere(circleXY); + float32_t3 direction = circleToSphere(circleXY); - valid = (direction.z > 0.0f) && isInsideSilhouetteFast(direction, paraSilhouette.silhouette); - pdf = valid ? (1.0f / (paraSilhouette.para.width * paraSilhouette.para.height)) : 0.0f; + valid = direction.z > 0.0f && silhouette.isInside(direction); + // PDF in solid angle measure: the rectangle is in circle-space (scaled by CIRCLE_RADIUS), + // and the orthographic projection Jacobian is dA_circle/dω = CIRCLE_RADIUS^2 * z + pdf = valid ? (CIRCLE_RADIUS * CIRCLE_RADIUS * direction.z / (float32_t(width) * float32_t(height))) : 0.0f; - return direction; -} + return direction; + } +}; -#endif // _PARALLELOGRAM_SAMPLING_HLSL_ +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl new file mode 100644 index 000000000..fab111b3e --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl @@ -0,0 +1,568 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ + +#include "gpu_common.hlsl" + +#include +#include +#include +#include + +#include "silhouette.hlsl" +#include "drawing.hlsl" + +// ============================================================================ +// Spherical Rectangle Bound via Rotating Calipers +// +// Bounds the silhouette with a spherical rectangle (intersection of two +// orthogonal lunes). Each lune is defined by two great circles (planes +// through the origin). The rectangle is parameterized for downstream +// samplers (Urena, bilinear, biquadratic) in pyramid_sampling/*.hlsl. +// +// Algorithm: +// 1. Rotating Calipers: Find the edge that minimizes the lune-width proxy +// dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) +// No per-edge normalization needed, scalar triple product suffices. +// +// 2. Build orthonormal frame from the minimum-width edge: +// - axis1 = normalize(cross(A, B)), pole of the primary lune +// - axis2, axis3 complete the frame via edge-based candidate search +// (tryPrimaryFrameCandidate), oriented toward silhouette center +// +// 3. Project vertices onto the frame as (x/z, y/z) +// to find the bounding rectangle extents (rectR0, rectExtents) +// +// 4. Fallback: if the primary frame leaves vertices near the z=0 plane, +// fix axis3 = camera forward (0,0,1) and search axis1/axis2 via +// tryFallbackFrameCandidate +// +// Key property: If all vertices are inside a great circle half-space, +// then all edges (geodesic arcs) are also inside. No edge extremum +// checking needed (unlike parallelogram_sampling which works in +// projected 2D space where arcs can bulge beyond vertices). +// ============================================================================ +// Spherical rectangle bound: stores the orthonormal frame and gnomonic +// projection extents. Consumed by UrenaSampler, BilinearSampler, BiquadraticSampler. +struct SphericalPyramid +{ + // Orthonormal frame for the bounding region + float32_t3 axis1; // Primary axis (from minimum-width edge's great circle normal) + float32_t3 axis2; // Secondary axis (perpendicular to axis1) + float32_t3 axis3; // Forward axis, toward silhouette (primary) or camera forward (fallback) + + // SphericalRectangle parameters (in the local frame where axis3 is Z) + float32_t3 rectR0; // Corner position in local frame + float32_t2 rectExtents; // Width (along axis1) and height (along axis2) + float32_t solidAngle; // Solid angle of the bounding region (steradians) + + // ======================================================================== + // Rotating Calipers - Minimum Width Edge Finding (Scalar Triple Product) + // ======================================================================== + + // Simplified metric: dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) + // This is a lune-area proxy, no per-edge normalization needed for comparison. + // Per-vertex cost: one dot product with precomputed edge normal. + // Per-edge cost: one cross product (replaces addition + rsqrt). + // + // Triangular column-major traversal (rotating calipers pattern): + // Vertex V_j checks against edges 0..j-2. + // V2 -> edge 0; V3 -> edges 0,1; V4 -> edges 0,1,2; etc. + // Total checks: (N-2)(N-1)/2 instead of N(N-2). + // + // Endpoints: dot(cross(A,B), A) = dot(cross(A,B), B) = 0, never affect max. + static void findMinimumWidthEdge(const ClippedSilhouette silhouette, out uint32_t bestEdge, out float32_t3 bestV0, out float32_t3 bestV1, out float32_t bestWidth, out SilEdgeNormals precompSil) + { + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + // Edge normals: cross(v[i], v[i+1]), inward-facing for CCW-from-origin winding + float32_t3 en0 = cross(silhouette.vertices[0], silhouette.vertices[1]); + precompSil.edgeNormals[0] = float16_t3(en0); + float32_t3 en1 = cross(silhouette.vertices[1], silhouette.vertices[2]); + precompSil.edgeNormals[1] = float16_t3(en1); + + // Per-edge max(dot(en_i, v_j)), positive = inside, maximum = widest vertex + float32_t maxDot0 = dot(silhouette.vertices[2], en0); // V2 vs edge 0 + + float32_t maxDot1 = 1e10f; + float32_t maxDot2 = 1e10f; + float32_t maxDot3 = 1e10f; + float32_t maxDot4 = 1e10f; + + if (silhouette.count > 3) + { + float32_t3 en2 = cross(silhouette.vertices[2], silhouette.vertices[3]); + precompSil.edgeNormals[2] = float16_t3(en2); + + // V3 vs edges 0, 1 + float32_t3 v3 = silhouette.vertices[3]; + maxDot0 = max(maxDot0, dot(v3, en0)); + maxDot1 = dot(v3, en1); + + if (silhouette.count > 4) + { + float32_t3 en3 = cross(silhouette.vertices[3], silhouette.vertices[4]); + precompSil.edgeNormals[3] = float16_t3(en3); + + // V4 vs edges 0, 1, 2 + float32_t3 v4 = silhouette.vertices[4]; + maxDot0 = max(maxDot0, dot(v4, en0)); + maxDot1 = max(maxDot1, dot(v4, en1)); + maxDot2 = dot(v4, en2); + + if (silhouette.count > 5) + { + float32_t3 en4 = cross(silhouette.vertices[4], silhouette.vertices[5]); + precompSil.edgeNormals[4] = float16_t3(en4); + + // V5 vs edges 0, 1, 2, 3 + float32_t3 v5 = silhouette.vertices[5]; + maxDot0 = max(maxDot0, dot(v5, en0)); + maxDot1 = max(maxDot1, dot(v5, en1)); + maxDot2 = max(maxDot2, dot(v5, en2)); + maxDot3 = dot(v5, en3); + + if (silhouette.count > 6) + { + // V6 vs edges 0, 1, 2, 3, 4 + float32_t3 v6 = silhouette.vertices[6]; + maxDot0 = max(maxDot0, dot(v6, en0)); + maxDot1 = max(maxDot1, dot(v6, en1)); + maxDot2 = max(maxDot2, dot(v6, en2)); + maxDot3 = max(maxDot3, dot(v6, en3)); + maxDot4 = dot(v6, en4); + } + } + } + } + + // Best edge: minimum maxDot, no per-edge normalization needed. + // Relative epsilon prevents tie-breaking flicker when two edges have + // nearly identical widths — the current winner is "sticky" unless a + // new edge is meaningfully better (0.1% narrower). + const float32_t EDGE_SELECT_EPS = 1e-3f; + + bestWidth = maxDot0; + bestEdge = 0; + bestV0 = silhouette.vertices[0]; + bestV1 = silhouette.vertices[1]; + + if (silhouette.count > 3) + { + bool better = maxDot1 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot1 : bestWidth; + bestEdge = better ? 1 : bestEdge; + bestV0 = better ? silhouette.vertices[1] : bestV0; + bestV1 = better ? silhouette.vertices[2] : bestV1; + + if (silhouette.count > 4) + { + better = maxDot2 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot2 : bestWidth; + bestEdge = better ? 2 : bestEdge; + bestV0 = better ? silhouette.vertices[2] : bestV0; + bestV1 = better ? silhouette.vertices[3] : bestV1; + + if (silhouette.count > 5) + { + better = maxDot3 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot3 : bestWidth; + bestEdge = better ? 3 : bestEdge; + bestV0 = better ? silhouette.vertices[3] : bestV0; + bestV1 = better ? silhouette.vertices[4] : bestV1; + + if (silhouette.count > 6) + { + better = maxDot4 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot4 : bestWidth; + bestEdge = better ? 4 : bestEdge; + bestV0 = better ? silhouette.vertices[4] : bestV0; + bestV1 = better ? silhouette.vertices[5] : bestV1; + } + } + } + } + + // Check the last 2 edges missed by the triangular traversal: + // Edge count-2: vertices[count-2] -> vertices[count-1], check V0..V[count-3] + // Edge count-1: vertices[count-1] -> vertices[0], check V1..V[count-2] + // Explicit per-count unrolling avoids the generic loop with runtime index comparisons. + { + // Penultimate edge: vertices[count-2] -> vertices[count-1] + const uint32_t penIdx = silhouette.count - 2; + float32_t3 enPen = cross(silhouette.vertices[penIdx], silhouette.vertices[penIdx + 1]); + precompSil.edgeNormals[penIdx] = float16_t3(enPen); + float32_t maxDotPen = dot(silhouette.vertices[0], enPen); + if (silhouette.count > 3) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[1], enPen)); + if (silhouette.count > 4) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[2], enPen)); + if (silhouette.count > 5) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[3], enPen)); + if (silhouette.count > 6) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[4], enPen)); + } + } + } + } + + bool betterPen = maxDotPen < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterPen ? maxDotPen : bestWidth; + bestEdge = betterPen ? penIdx : bestEdge; + bestV0 = betterPen ? silhouette.vertices[penIdx] : bestV0; + bestV1 = betterPen ? silhouette.vertices[penIdx + 1] : bestV1; + + // Last edge: vertices[count-1] -> vertices[0] (wrap-around) + const uint32_t lastIdx = silhouette.count - 1; + float32_t3 enLast = cross(silhouette.vertices[lastIdx], silhouette.vertices[0]); + precompSil.edgeNormals[lastIdx] = float16_t3(enLast); + float32_t maxDotLast = dot(silhouette.vertices[1], enLast); + if (silhouette.count > 3) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[2], enLast)); + if (silhouette.count > 4) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[3], enLast)); + if (silhouette.count > 5) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[4], enLast)); + if (silhouette.count > 6) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[5], enLast)); + } + } + } + } + + bool betterLast = maxDotLast < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterLast ? maxDotLast : bestWidth; + bestEdge = betterLast ? lastIdx : bestEdge; + bestV0 = betterLast ? silhouette.vertices[lastIdx] : bestV0; + bestV1 = betterLast ? silhouette.vertices[0] : bestV1; + } + } + + // ======================================================================== + // Template-Unrolled Projection Helpers + // ======================================================================== + + // Project a single vertex onto candidate axes, updating bounds and minZ in one fused pass + template + static void projectAndBound(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + float32_t3 v = vertices[I]; + float32_t x = dot(v, projAxis1); + float32_t y = dot(v, projAxis2); + float32_t z = dot(v, projAxis3); + minZ = min(minZ, z); + float32_t rcpZ = rcp(z); + float32_t projX = x * rcpZ; + float32_t projY = y * rcpZ; + bound.x = min(bound.x, projX); + bound.y = min(bound.y, projY); + bound.z = max(bound.z, projX); + bound.w = max(bound.w, projY); + } + + // Project all silhouette vertices (template-unrolled, fused bounds + minZ) + static void projectAllVertices(const ClippedSilhouette silhouette, float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + bound = float32_t4(1e10f, 1e10f, -1e10f, -1e10f); + minZ = 1e10f; + projectAndBound<0>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<1>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<2>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 3) + { + projectAndBound<3>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 4) + { + projectAndBound<4>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 5) + { + projectAndBound<5>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 6) + { + projectAndBound<6>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + } + } + } + } + } + + // ======================================================================== + // Template-Unrolled Frame Candidate Selection + // ======================================================================== + + // Try an edge as frame candidate for the primary path (axis1 fixed, find best axis2/axis3) + template + static void tryPrimaryFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis1, float32_t3 axis3Ref, + NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis2, + NBL_REF_ARG(float32_t3) bestAxis3, NBL_REF_ARG(bool) found, + NBL_REF_ARG(float32_t) bestMinZ, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + // Candidate axis2: perpendicular to edge, in plane perpendicular to axis1 + float32_t3 axis2Cand = cross(fixedAxis1, edge); + float32_t lenSq = dot(axis2Cand, axis2Cand); + if (lenSq < 1e-14f) + return; + axis2Cand *= rsqrt(lenSq); + + // Candidate axis3: completes the frame + float32_t3 axis3Cand = cross(fixedAxis1, axis2Cand); + + // Ensure axis3 points toward center (same hemisphere as reference) + if (dot(axis3Cand, axis3Ref) < 0.0f) + { + axis2Cand = -axis2Cand; + axis3Cand = -axis3Cand; + } + + // Fused: check all vertices have positive z AND compute bounding rect in one pass + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, fixedAxis1, axis2Cand, axis3Cand, bound, minZ); + + // Skip if any vertex would have z <= 0 + if (minZ <= 1e-6f) + return; + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis2 = axis2Cand; + bestAxis3 = axis3Cand; + bestMinZ = minZ; + bestBound = bound; + found = true; + } + } + + // Try an edge as frame candidate for the fallback path (axis3 fixed, find best axis1/axis2) + template + static void tryFallbackFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis3, NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis1, NBL_REF_ARG(float32_t3) bestAxis2, NBL_REF_ARG(uint32_t) bestEdge, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + float32_t3 edgeInPlane = edge - fixedAxis3 * dot(edge, fixedAxis3); + float32_t lenSq = dot(edgeInPlane, edgeInPlane); + if (lenSq < 1e-14f) + return; + + float32_t3 axis1Cand = edgeInPlane * rsqrt(lenSq); + float32_t3 axis2Cand = cross(fixedAxis3, axis1Cand); + + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, axis1Cand, axis2Cand, fixedAxis3, bound, minZ); + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis1 = axis1Cand; + bestAxis2 = axis2Cand; + bestBound = bound; + bestEdge = I; + } + } + + // ======================================================================== + // Visualization + // ======================================================================== + +#if VISUALIZE_SAMPLES + float32_t4 visualize(float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth) + { + float32_t4 color = float32_t4(0, 0, 0, 0); + + // Colors for visualization + float32_t3 boundColor1 = float32_t3(1.0f, 0.5f, 0.5f); // Light red for axis1 bounds + float32_t3 boundColor2 = float32_t3(0.5f, 0.5f, 1.0f); // Light blue for axis2 bounds + float32_t3 centerColor = float32_t3(1.0f, 1.0f, 0.0f); // Yellow for center + + float32_t x0 = rectR0.x; + float32_t x1 = rectR0.x + rectExtents.x; + float32_t y0 = rectR0.y; + float32_t y1 = rectR0.y + rectExtents.y; + float32_t z = rectR0.z; + + // Great circle normals for the 4 edges (in local frame, then transform to world) + float32_t3 bottomNormalLocal = normalize(float32_t3(0, -z, y0)); + float32_t3 topNormalLocal = normalize(float32_t3(0, z, -y1)); + float32_t3 leftNormalLocal = normalize(float32_t3(-z, 0, x0)); + float32_t3 rightNormalLocal = normalize(float32_t3(z, 0, -x1)); + + // Transform to world space + float32_t3 bottomNormal = bottomNormalLocal.x * axis1 + bottomNormalLocal.y * axis2 + bottomNormalLocal.z * axis3; + float32_t3 topNormal = topNormalLocal.x * axis1 + topNormalLocal.y * axis2 + topNormalLocal.z * axis3; + float32_t3 leftNormal = leftNormalLocal.x * axis1 + leftNormalLocal.y * axis2 + leftNormalLocal.z * axis3; + float32_t3 rightNormal = rightNormalLocal.x * axis1 + rightNormalLocal.y * axis2 + rightNormalLocal.z * axis3; + + // Draw the 4 bounding great circles + color += drawGreatCircleHalf(bottomNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(topNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(leftNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + color += drawGreatCircleHalf(rightNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + + // Draw center point (center of the rectangle projected onto sphere) + float32_t centerX = (x0 + x1) * 0.5f; + float32_t centerY = (y0 + y1) * 0.5f; + float32_t3 centerLocal = normalize(float32_t3(centerX, centerY, z)); + float32_t3 centerWorld = centerLocal.x * axis1 - centerLocal.y * axis2 + centerLocal.z * axis3; + + float32_t3 centerCircle = sphereToCircle(centerWorld); + color += drawCorner(centerCircle, ndc, aaWidth, 0.025f, 0.0f, centerColor); + + color += drawCorner(axis1, ndc, aaWidth, 0.025f, 0.0f, float32_t3(1.0f, 0.0f, 0.0f)); + color += drawCorner(axis2, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 1.0f, 0.0f)); + color += drawCorner(axis3, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 0.0f, 1.0f)); + + return color; + } +#endif // VISUALIZE_SAMPLES + + // ======================================================================== + // Factory + // ======================================================================== + + static SphericalPyramid create(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, NBL_REF_ARG(SilEdgeNormals) silEdgeNormals +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, inout float32_t4 color +#endif + ) + { + SphericalPyramid self; + + // Step 1: Find minimum-width edge using rotating calipers with lune metric + uint32_t bestEdge; + float32_t3 bestV0, bestV1; + float32_t minWidth; + findMinimumWidthEdge(silhouette, bestEdge, bestV0, bestV1, minWidth, silEdgeNormals); + + // Step 2: Build orthonormal frame from best edge + // axis1 = perpendicular to the best edge's great circle (primary caliper direction) + self.axis1 = normalize(cross(bestV0, bestV1)); + + // Compute centroid for reference direction + float32_t3 center = silhouette.getCenter(); + float32_t3 centerInPlane = center - self.axis1 * dot(center, self.axis1); + float32_t3 axis3Ref = normalize(centerInPlane); + + // Step 2b: Try each edge-aligned rotation around axis1 to find the axis2/axis3 + // orientation that keeps all vertices in the positive half-space with minimum + // bounding rectangle area + float32_t bestRectArea = 1e20f; + float32_t3 bestAxis2 = cross(axis3Ref, self.axis1); + float32_t3 bestAxis3 = axis3Ref; + bool foundValidFrame = false; + float32_t bestMinZ = 0.0f; + float32_t4 bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + tryPrimaryFrameCandidate<0>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<1>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<2>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 3) + { + tryPrimaryFrameCandidate<3, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 4) + { + tryPrimaryFrameCandidate<4, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 5) + { + tryPrimaryFrameCandidate<5, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 6) + { + tryPrimaryFrameCandidate<6, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + } + } + } + } + + self.axis2 = bestAxis2; + self.axis3 = bestAxis3; + + // Fallback: if the primary path failed (no valid frame found, or axis3 leaves + // vertices too close to the z=0 singularity), fix axis3 = camera forward and + // search for the best axis1/axis2 rotation around it. + if (!foundValidFrame || bestMinZ < 0.15f) + { + // Use camera forward as axis3 (all silhouette vertices have z > 0 by construction) + self.axis3 = float32_t3(0.0f, 0.0f, 1.0f); + + // Find optimal axis1/axis2 rotation around axis3 by trying each edge + float32_t bestFallbackArea = 1e20f; + // axis3 = (0,0,1), so cross((0,0,1), (1,0,0)) = (0,1,0), cross((0,0,1), (0,1,0)) = (-1,0,0) + self.axis1 = float32_t3(0.0f, 1.0f, 0.0f); + self.axis2 = float32_t3(-1.0f, 0.0f, 0.0f); + + tryFallbackFrameCandidate<0>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<1>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<2>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 3) + { + tryFallbackFrameCandidate<3, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 4) + { + tryFallbackFrameCandidate<4, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 5) + { + tryFallbackFrameCandidate<5, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 6) + { + tryFallbackFrameCandidate<6, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + } + } + } + } + } + + // Degenerate bounds check (single computation, after primary/fallback decision) + if (bounds.x >= bounds.z || bounds.y >= bounds.w) + bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + self.rectR0 = float32_t3(bounds.xy, 1.0f); + self.rectExtents = float32_t2(bounds.zw - bounds.xy); + +#if VISUALIZE_SAMPLES + color += drawCorner(center, ndc, aaWidth, 0.05f, 0.0f, float32_t3(1.0f, 0.0f, 1.0f)); + color += visualizeBestCaliperEdge(silhouette.vertices, bestEdge, silhouette.count, spherePos, aaWidth); + color += self.visualize(spherePos, ndc, aaWidth); +#endif + +#if DEBUG_DATA + DebugDataBuffer[0].pyramidAxis1 = self.axis1; + DebugDataBuffer[0].pyramidAxis2 = self.axis2; + DebugDataBuffer[0].pyramidCenter = center; + DebugDataBuffer[0].pyramidHalfWidth1 = (atan(bounds.z) - atan(bounds.x)) * 0.5f; + DebugDataBuffer[0].pyramidHalfWidth2 = (atan(bounds.w) - atan(bounds.y)) * 0.5f; + DebugDataBuffer[0].pyramidSolidAngle = self.solidAngle; + DebugDataBuffer[0].pyramidBestEdge = bestEdge; + DebugDataBuffer[0].pyramidMin1 = bounds.x; + DebugDataBuffer[0].pyramidMin2 = bounds.y; + DebugDataBuffer[0].pyramidMax1 = bounds.z; + DebugDataBuffer[0].pyramidMax2 = bounds.w; +#endif + + return self; + } +}; + +#include "pyramid_sampling/urena.hlsl" +#include "pyramid_sampling/bilinear.hlsl" +#include "pyramid_sampling/biquadratic.hlsl" + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl new file mode 100644 index 000000000..7d3319a7c --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl @@ -0,0 +1,86 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#include + +// ============================================================================ +// Bilinear Approximation Sampling (closed-form, faster than biquadratic) +// ============================================================================ +// +struct BilinearSampler +{ + nbl::hlsl::sampling::Bilinear sampler; + + float32_t rcpTotalIntegral; + float32_t rectArea; + + // Precompute bilinear sampler from pyramid + static BilinearSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BilinearSampler self; + + // 4 corner positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = 1 / (x^2 + y^2 + 1)^(3/2) [z = 1.0 in local frame] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1; + + float32_t d; + d = xx0 + yy0 + 1.0f; + const float32_t v00 = rsqrt(d) / d; // x0y0 + d = xx1 + yy0 + 1.0f; + const float32_t v10 = rsqrt(d) / d; // x1y0 + d = xx0 + yy1 + 1.0f; + const float32_t v01 = rsqrt(d) / d; // x0y1 + d = xx1 + yy1 + 1.0f; + const float32_t v11 = rsqrt(d) / d; // x1y1 + + // Bilinear layout: (x0y0, x0y1, x1y0, x1y1) + self.sampler = nbl::hlsl::sampling::Bilinear::create(float32_t4(v00, v01, v10, v11)); + + // Total integral = average of 4 corners (bilinear integral over unit square) + const float32_t totalIntegral = (v00 + v10 + v01 + v11) * 0.25f; + self.rcpTotalIntegral = 1.0f / max(totalIntegral, 1e-20f); + self.rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + + return self; + } + + // Sample a direction on the spherical pyramid using bilinear importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample UV from bilinear distribution (closed-form via quadratic formula) + float32_t rcpPdf; + float32_t2 uv = sampler.generate(rcpPdf, xi); + + // Step 2: UV to direction + // Bilinear sampler convention: u.y = first-sampled axis (X), u.x = second-sampled axis (Y) + const float32_t localX = pyramid.rectR0.x + uv.y * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + uv.x * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // PDF in solid angle space: 1 / (rcpPdf * dSA * rectArea) + // rcpPdf already = 1/pdfUV from Bilinear::generate, avoid redundant reciprocal + const float32_t dsa = rcpLen / dist2; + pdf = 1.0f / max(rcpPdf * dsa * rectArea, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl new file mode 100644 index 000000000..e75c89595 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl @@ -0,0 +1,158 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ + +// ============================================================================ +// Biquadratic Approximation Sampling (Hart et al. 2020) +// ============================================================================ +// +// Precomputed biquadratic sampler for importance sampling solid angle density. +// Build once from a SphericalPyramid, then call sample() per random pair. + +struct BiquadraticSampler +{ + // Column-major: cols[i] = (row0[i], row1[i], row2[i]) for fast sliceAtY via dot + float32_t3x3 cols; + + // Precomputed marginal (Y) polynomial: f(y) = c0 + y*(c1 + y*c2) + float32_t margC0, margC1, margC2, margIntegral; + + float32_t rcpTotalIntegral; + float32_t rcpIntegralTimesRcpArea; // rcpTotalIntegral / rectArea (fused for PDF computation) + + // Newton-Raphson CDF inversion for a quadratic PDF (2 iterations) + // Solves: c0*t + (c1/2)*t^2 + (c2/3)*t^3 = u * integral + // Returns sampled t and the PDF value at t (avoids redundant recomputation by caller). + // 2 iterations give ~4 decimal digits, should be sufficient for importance sampling with rejection? + static float32_t sampleQuadraticCDF(float32_t u, float32_t c0, float32_t c1, float32_t c2, float32_t integral, out float32_t lastPdfVal) + { + const float32_t target = u * integral; + const float32_t c1half = c1 * 0.5f; + const float32_t c2third = c2 * (1.0f / 3.0f); + float32_t t = u; + + // Iteration 1 + float32_t cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + // Iteration 2 + cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + return t; + } + + // Precompute biquadratic sampler from pyramid (call ONCE, reuse for all samples) + static BiquadraticSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BiquadraticSampler self; + + // 3x3 grid positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + 0.5f * pyramid.rectExtents.x; + const float32_t x2 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + 0.5f * pyramid.rectExtents.y; + const float32_t y2 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = rsqrt(x^2+y^2+1) / (x^2+y^2+1) [z = rectR0.z = 1.0] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1, xx2 = x2 * x2; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1, yy2 = y2 * y2; + + float32_t3 row0, row1, row2; + float32_t d; + + d = xx0 + yy0 + 1.0f; + row0.x = rsqrt(d) / d; + d = xx1 + yy0 + 1.0f; + row0.y = rsqrt(d) / d; + d = xx2 + yy0 + 1.0f; + row0.z = rsqrt(d) / d; + + d = xx0 + yy1 + 1.0f; + row1.x = rsqrt(d) / d; + d = xx1 + yy1 + 1.0f; + row1.y = rsqrt(d) / d; + d = xx2 + yy1 + 1.0f; + row1.z = rsqrt(d) / d; + + d = xx0 + yy2 + 1.0f; + row2.x = rsqrt(d) / d; + d = xx1 + yy2 + 1.0f; + row2.y = rsqrt(d) / d; + d = xx2 + yy2 + 1.0f; + row2.z = rsqrt(d) / d; + + // Store column-major for sliceAtY: cols[i] = (row0[i], row1[i], row2[i]) + self.cols[0] = float32_t3(row0.x, row1.x, row2.x); + self.cols[1] = float32_t3(row0.y, row1.y, row2.y); + self.cols[2] = float32_t3(row0.z, row1.z, row2.z); + + // Marginal along Y: Simpson's rule integral of each row + const float32_t3 marginal = float32_t3( + (row0.x + 4.0f * row0.y + row0.z) / 6.0f, + (row1.x + 4.0f * row1.y + row1.z) / 6.0f, + (row2.x + 4.0f * row2.y + row2.z) / 6.0f); + + // Precompute marginal polynomial: f(y) = c0 + y*(c1 + y*c2) + self.margC0 = marginal[0]; + self.margC1 = -3.0f * marginal[0] + 4.0f * marginal[1] - marginal[2]; + self.margC2 = 2.0f * (marginal[0] - 2.0f * marginal[1] + marginal[2]); + self.margIntegral = (marginal[0] + 4.0f * marginal[1] + marginal[2]) / 6.0f; + + self.rcpTotalIntegral = 1.0f / max(self.margIntegral, 1e-20f); + const float32_t rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + self.rcpIntegralTimesRcpArea = self.rcpTotalIntegral / max(rectArea, 1e-20f); + + return self; + } + + // Sample a direction on the spherical pyramid using biquadratic importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample Y from precomputed marginal polynomial + float32_t margPdfAtY; + const float32_t y = sampleQuadraticCDF(xi.y, margC0, margC1, margC2, margIntegral, margPdfAtY); + + // Step 2: Compute conditional X slice at sampled Y via Lagrange basis + const float32_t y2 = y * y; + const float32_t3 Ly = float32_t3(2.0f * y2 - 3.0f * y + 1.0f, -4.0f * y2 + 4.0f * y, 2.0f * y2 - y); + const float32_t3 slice = float32_t3(dot(cols[0], Ly), dot(cols[1], Ly), dot(cols[2], Ly)); + + // Step 3: Build conditional polynomial and sample X + const float32_t condC0 = slice[0]; + const float32_t condC1 = -3.0f * slice[0] + 4.0f * slice[1] - slice[2]; + const float32_t condC2 = 2.0f * (slice[0] - 2.0f * slice[1] + slice[2]); + const float32_t condIntegral = (slice[0] + 4.0f * slice[1] + slice[2]) / 6.0f; + float32_t condPdfAtX; + const float32_t x = sampleQuadraticCDF(xi.x, condC0, condC1, condC2, condIntegral, condPdfAtX); + + // Step 4: UV to direction + const float32_t localX = pyramid.rectR0.x + x * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + y * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * + rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // Step 5: PDF in solid angle space = condPdfAtX / (totalIntegral * dSA * rectArea) + // condPdfAtX is reused from the last Newton iteration + const float32_t dsa = rcpLen / dist2; + pdf = condPdfAtX * rcpIntegralTimesRcpArea / max(dsa, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl new file mode 100644 index 000000000..6709bf7da --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl @@ -0,0 +1,87 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ + +// ============================================================================ +// Sampling using Urena 2003 (SphericalRectangle) +// ============================================================================ + +struct UrenaSampler +{ + float32_t solidAngle; // Solid angle of the bounding region (steradians) + float32_t samplerK; // = 2*pi - q (angle offset for horizontal sampling) + float32_t samplerB0; // = n_z[0] (normalized edge parameter) + float32_t samplerB1; // = n_z[2] (normalized edge parameter) + + // Precompute solid angle AND sampler intermediates in one pass + // (solidAngleOfRectangle and generate() both compute n_z/cosGamma -- fuse them) + static UrenaSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + UrenaSampler self; + + const float32_t4 denorm_n_z = float32_t4(-pyramid.rectR0.y, pyramid.rectR0.x + pyramid.rectExtents.x, pyramid.rectR0.y + pyramid.rectExtents.y, -pyramid.rectR0.x); + const float32_t4 n_z = denorm_n_z / sqrt((float32_t4)(pyramid.rectR0.z * pyramid.rectR0.z) + denorm_n_z * denorm_n_z); + const float32_t4 cosGamma = float32_t4(-n_z[0] * n_z[1], -n_z[1] * n_z[2], + -n_z[2] * n_z[3], -n_z[3] * n_z[0]); + + nbl::hlsl::math::sincos_accumulator adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[0]); + adder.addCosine(cosGamma[1]); + const float32_t p = adder.getSumofArccos(); + adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[2]); + adder.addCosine(cosGamma[3]); + const float32_t q = adder.getSumofArccos(); + + self.solidAngle = p + q - 2.0f * nbl::hlsl::numbers::pi; + self.samplerK = 2.0f * nbl::hlsl::numbers::pi - q; + self.samplerB0 = n_z[0]; + self.samplerB1 = n_z[2]; + + return self; + } + + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Inlined Urena 2003 with algebraic simplifications: + const float32_t r1x = pyramid.rectR0.x + pyramid.rectExtents.x; + const float32_t r1y = pyramid.rectR0.y + pyramid.rectExtents.y; + + // Horizontal CDF inversion + const float32_t au = xi.x * solidAngle + samplerK; + float32_t sinAu, cosAu; + sincos(au, sinAu, cosAu); + const float32_t fu = (cosAu * samplerB0 - samplerB1) / sinAu; + + // cu = sign(fu)/sqrt(cu_2), xu = cu/sqrt(1-cu^2) + // Fused: xu = sign(fu)/sqrt(cu_2 - 1) [eliminates 2 sqrt + 2 div -> 1 rsqrt] + const float32_t cu_2 = max(fu * fu + samplerB0 * samplerB0, 1.0f); + const float32_t xu = clamp( + (fu >= 0.0f ? 1.0f : -1.0f) * rsqrt(max(cu_2 - 1.0f, 1e-10f)), + pyramid.rectR0.x, r1x); + const float32_t d_2 = xu * xu + 1.0f; + + // Vertical sampling in h-space (div -> rsqrt + mul) + const float32_t h0 = pyramid.rectR0.y * rsqrt(d_2 + pyramid.rectR0.y * pyramid.rectR0.y); + const float32_t h1 = r1y * rsqrt(d_2 + r1y * r1y); + const float32_t hv = h0 + xi.y * (h1 - h0); + + // Normalized direction via ||(xu,yv,1)||^2 = d_2/(1-hv^2): + // localDir.y = yv/||v|| = hv (exact cancellation) + // localDir.xz = (xu, 1) * t where t = sqrt(1-hv^2)/sqrt(d_2) + // Eliminates: sqrt(d_2), yv computation, and normalize() + const float32_t t = sqrt(max(1.0f - hv * hv, 0.0f)) * rsqrt(d_2); + const float32_t3 localDir = float32_t3(xu * t, hv, t); + + float32_t3 direction = localDir.x * pyramid.axis1 + + localDir.y * pyramid.axis2 + + localDir.z * pyramid.axis3; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + pdf = 1.0f / max(solidAngle, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl similarity index 68% rename from 73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl rename to 73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl index a8a1ff52d..d01b3a07f 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/RayVis.frag.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl @@ -1,3 +1,6 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h #pragma wave shader_stage(fragment) #include "common.hlsl" @@ -16,18 +19,15 @@ struct ArrowResult }; [[vk::push_constant]] struct PushConstantRayVis pc; -// #if DEBUG_DATA -[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; -// #endif #if VISUALIZE_SAMPLES -#include "Drawing.hlsl" +#include "drawing.hlsl" // Ray-AABB intersection in world space // Returns the distance to the nearest intersection point, or -1 if no hit float32_t rayAABBIntersection(float32_t3 rayOrigin, float32_t3 rayDir, float32_t3 aabbMin, float32_t3 aabbMax) { - float32_t3 invDir = 1.0 / rayDir; + float32_t3 invDir = 1.0f / rayDir; float32_t3 t0 = (aabbMin - rayOrigin) * invDir; float32_t3 t1 = (aabbMax - rayOrigin) * invDir; @@ -61,7 +61,7 @@ ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf { ArrowResult result; result.color = float32_t4(0, 0, 0, 0); - result.depth = 1.0; // Far plane in reversed-Z + result.depth = 0.0; // Far plane in reversed-Z float32_t3 rayDir = normalize(directionAndPdf.xyz); float32_t pdf = directionAndPdf.w; @@ -140,7 +140,7 @@ ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf // Compute NDC depth for reversed-Z float32_t depthNDC = clipPos.z / clipPos.w; - result.depth = depthNDC; + result.depth = 1.0f - depthNDC; // Clip against valid depth range if (result.depth < 0.0 || result.depth > 1.0) @@ -157,32 +157,6 @@ ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf return result; } -// Transform a point by inverse of model matrix (world to local space) -float32_t3 worldToLocal(float32_t3 worldPos, float32_t3x4 modelMatrix) -{ - // Manually construct 4x4 from 3x4 - float32_t4x4 model4x4 = float32_t4x4( - modelMatrix[0], - modelMatrix[1], - modelMatrix[2], - float32_t4(0.0, 0.0, 0.0, 1.0)); - float32_t4x4 invModel = inverse(model4x4); - return mul(invModel, float32_t4(worldPos, 1.0)).xyz; -} - -// Transform a direction by inverse of model matrix (no translation) -float32_t3 worldToLocalDir(float32_t3 worldDir, float32_t3x4 modelMatrix) -{ - // Manually construct 4x4 from 3x4 - float32_t4x4 model4x4 = float32_t4x4( - modelMatrix[0], - modelMatrix[1], - modelMatrix[2], - float32_t4(0.0, 0.0, 0.0, 1.0)); - float32_t4x4 invModel = inverse(model4x4); - return mul(invModel, float32_t4(worldDir, 0.0)).xyz; -} - // Returns both tMin (entry) and tMax (exit) for ray-AABB intersection struct AABBIntersection { @@ -220,6 +194,7 @@ AABBIntersection rayAABBIntersectionFull(float32_t3 origin, float32_t3 dir, floa } #endif // VISUALIZE_SAMPLES +// [shader("pixel")] [[vk::location(0)]] ArrowResult main(SVertexAttributes vx) { ArrowResult output; @@ -253,58 +228,54 @@ AABBIntersection rayAABBIntersectionFull(float32_t3 origin, float32_t3 dir, floa uint32_t sampleCount = DebugDataBuffer[0].sampleCount; - // for (uint32_t i = 0; i < sampleCount; i++) - // { - // float32_t3 rayOrigin = float32_t3(0, 0, 0); - // float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; - // float32_t3 rayDir = normalize(directionAndPdf.xyz); - - // // Define cube bounds in local space - // float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); - // float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); - - // // Transform ray to local space of the cube - // float32_t3 localRayOrigin = worldToLocal(rayOrigin, pc.modelMatrix); - // float32_t3 localRayDir = normalize(worldToLocalDir(rayDir, pc.modelMatrix)); - - // // Get both entry and exit distances - // AABBIntersection intersection = rayAABBIntersectionFull( - // localRayOrigin, - // localRayDir, - // cubeLocalMin, - // cubeLocalMax); - - // float32_t arrowLength; - // float32_t3 arrowColor; - - // if (intersection.hit) - // { - // // Use tMax (exit point at back face) instead of tMin (entry point at front face) - // float32_t3 localExitPoint = localRayOrigin + localRayDir * intersection.tMax; - // float32_t3 worldExitPoint = mul(pc.modelMatrix, float32_t4(localExitPoint, 1.0)).xyz; - // arrowLength = length(worldExitPoint - rayOrigin); - // arrowColor = float32_t3(0.0, 1.0, 0.0); // Green for valid samples - // } - // else - // { - // // Ray doesn't intersect - THIS SHOULD NEVER HAPPEN with correct sampling! - // float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; - // arrowLength = length(cubeCenter - rayOrigin) + 2.0; - // arrowColor = float32_t3(1.0, 0.0, 0.0); // Red for BROKEN samples - // } - - // ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); - - // // Only update depth if arrow was actually drawn - // if (arrow.color.a > 0.0) - // { - // maxDepth = max(maxDepth, arrow.depth); - // } - - // // Modulate arrow color by its alpha (only add where arrow is visible) - // output.color.rgb += arrowColor * arrow.color.a; - // output.color.a = max(output.color.a, arrow.color.a); - // } + for (uint32_t i = 0; i < sampleCount; i++) + { + float32_t3 rayOrigin = float32_t3(0, 0, 0); + float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; + float32_t3 rayDir = normalize(directionAndPdf.xyz); + + // Define cube bounds in local space + float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); + float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + + // Transform ray to local space of the cube (using precomputed inverse) + float32_t3 localRayOrigin = mul(pc.invModelMatrix, float32_t4(rayOrigin, 1.0)).xyz; + float32_t3 localRayDir = normalize(mul(pc.invModelMatrix, float32_t4(rayDir, 0.0)).xyz); + + // Get both entry and exit distances + AABBIntersection intersection = rayAABBIntersectionFull(localRayOrigin, localRayDir, cubeLocalMin, cubeLocalMax); + + float32_t arrowLength; + float32_t3 arrowColor; + + if (intersection.hit) + { + // Use tMax (exit point at back face) instead of tMin (entry point at front face) + float32_t3 localExitPoint = localRayOrigin + localRayDir * intersection.tMax; + float32_t3 worldExitPoint = mul(pc.modelMatrix, float32_t4(localExitPoint, 1.0)).xyz; + arrowLength = length(worldExitPoint - rayOrigin); + arrowColor = float32_t3(0.0, 1.0, 0.0); // Green for valid samples + } + else + { + // Ray doesn't intersect - THIS SHOULD NEVER HAPPEN with correct sampling! + float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; + arrowLength = length(cubeCenter - rayOrigin) + 2.0; + arrowColor = float32_t3(1.0, 0.0, 0.0); // Red for BROKEN samples + } + + ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); + + // Only update depth if arrow was actually drawn + if (arrow.color.a > 0.0) + { + maxDepth = max(maxDepth, arrow.depth); + } + + // Modulate arrow color by its alpha (only add where arrow is visible) + output.color.rgb += arrowColor * arrow.color.a; + output.color.a = max(output.color.a, arrow.color.a); + } // Clamp to prevent overflow output.color = saturate(output.color); diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl index 504db2db9..8213c17fc 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl @@ -1,189 +1,244 @@ -#ifndef _SILHOUETTE_HLSL_ -#define _SILHOUETTE_HLSL_ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ #include "gpu_common.hlsl" -#include "utils.hlsl" -// Special index values for clip points -static const uint32_t CLIP_POINT_A = 23; // Clip point between last positive and first negative -static const uint32_t CLIP_POINT_B = 24; // Clip point between last negative and first positive - -// Compute region and configuration index from model matrix -uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) +struct ClippedSilhouette { - float32_t4x3 columnModel = transpose(modelMatrix); - float32_t3 obbCenter = columnModel[3].xyz; - float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + float32_t3 vertices[MAX_SILHOUETTE_VERTICES]; // Max 7 vertices after clipping, unnormalized + uint32_t count; - float32_t3 rcpSqScales = rcp(float32_t3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]))); + void normalize() + { + vertices[0] = nbl::hlsl::normalize(vertices[0]); + vertices[1] = nbl::hlsl::normalize(vertices[1]); + vertices[2] = nbl::hlsl::normalize(vertices[2]); + if (count > 3) + { + vertices[3] = nbl::hlsl::normalize(vertices[3]); + if (count > 4) + { + vertices[4] = nbl::hlsl::normalize(vertices[4]); + if (count > 5) + { + vertices[5] = nbl::hlsl::normalize(vertices[5]); + if (count > 6) + { + vertices[6] = nbl::hlsl::normalize(vertices[6]); + } + } + } + } + } - float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; + // Compute the silhouette centroid (average direction) + float32_t3 getCenter() + { + float32_t3 sum = float32_t3(0, 0, 0); - region = uint32_t3( - normalizedProj.x < -0.5f ? 0 : (normalizedProj.x > 0.5f ? 2 : 1), - normalizedProj.y < -0.5f ? 0 : (normalizedProj.y > 0.5f ? 2 : 1), - normalizedProj.z < -0.5f ? 0 : (normalizedProj.z > 0.5f ? 2 : 1)); + NBL_UNROLL + for (uint32_t i = 0; i < MAX_SILHOUETTE_VERTICES; i++) + { + if (i < count) + sum += vertices[i]; + } - configIndex = region.x + region.y * 3u + region.z * 9u; + return nbl::hlsl::normalize(sum); + } - // uint32_t sil = packSilhouette(silhouettes[configIndex]); - uint32_t sil = binSilhouettes[configIndex]; - vertexCount = getSilhouetteSize(sil); + static uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) + { + float32_t4x3 columnModel = transpose(modelMatrix); + float32_t3 obbCenter = columnModel[3].xyz; + float32_t3x3 upper3x3 = (float32_t3x3)columnModel; - return sil; -} + float32_t3 rcpSqScales = rcp(float32_t3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); -#if VISUALIZE_SAMPLES -float32_t4 -#else -void -#endif -computeSilhouette(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil -#if VISUALIZE_SAMPLES - , - float32_t3 spherePos, float32_t aaWidth -#endif - , - NBL_REF_ARG(ClippedSilhouette) silhouette) -{ -#if VISUALIZE_SAMPLES - float32_t4 color = float32_t4(0, 0, 0, 0); -#endif + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - silhouette.count = 0; + region = uint32_t3( + normalizedProj.x < -0.5f ? 0 : (normalizedProj.x > 0.5f ? 2 : 1), + normalizedProj.y < -0.5f ? 0 : (normalizedProj.y > 0.5f ? 2 : 1), + normalizedProj.z < -0.5f ? 0 : (normalizedProj.z > 0.5f ? 2 : 1)); - // Build clip mask (z < 0) - uint32_t clipMask = 0u; - NBL_UNROLL - for (uint32_t i = 0; i < 4; i++) - clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + configIndex = region.x + region.y * 3u + region.z * 9u; - if (vertexCount == 6) + uint32_t sil = binSilhouettes[configIndex]; + vertexCount = getSilhouetteSize(sil); + + return sil; + } + + void compute(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil) { + count = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; NBL_UNROLL - for (uint32_t i = 4; i < 6; i++) + for (uint32_t i = 0; i < 4; i++) clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; - } - uint32_t clipCount = countbits(clipMask); + if (vertexCount == 6) + { + NBL_UNROLL + for (uint32_t i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + uint32_t clipCount = countbits(clipMask); -#if 0 - // Early exit if fully clipped - if (clipCount == vertexCount) - return color; + // Invert clip mask to find first positive vertex + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - // No clipping needed - fast path - if (clipCount == 0) - { - for (uint32_t i = 0; i < vertexCount; i++) + // Check if wrap-around is needed (first and last bits negative) + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); + + // Compute rotation amount + uint32_t rotateAmount = wrapAround + ? firstbitlow(invertedMask) // first positive + : firstbithigh(clipMask) + 1; // first vertex after last negative + + // Rotate masks + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + uint32_t positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + uint32_t lastPosIdx = positiveCount - 1; + uint32_t firstNegIdx = positiveCount; + + float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); + float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); + float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); + + float32_t3 vLastNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float32_t3 vFirstPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); + + NBL_UNROLL + for (uint32_t i = 0; i < positiveCount; i++) { - uint32_t i0 = i; - uint32_t i1 = (i + 1) % vertexCount; - float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(sil, i0)); - silhouette.vertices[silhouette.count] = v0; - silhouette.indices[silhouette.count++] = i0; // Original index (no rotation) - -#if VISUALIZE_SAMPLES - float32_t3 v1 = getVertex(modelMatrix, getSilhouetteVertex(sil, i1)); - float32_t3 pts[2] = {v0, v1}; - color += drawEdge(i1, pts, spherePos, aaWidth); + float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); + +#if DEBUG_DATA + uint32_t originalIndex = (i + rotateAmount) % vertexCount; + DebugDataBuffer[0].clippedSilhouetteVertices[count] = v0; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = originalIndex; #endif + vertices[count++] = v0; } - return color; - } + + if (clipCount > 0 && clipCount < vertexCount) + { +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipA; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_A; #endif + vertices[count++] = clipA; - // Rotate clip mask so positives come first - uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); - uint32_t rotateAmount = wrapAround - ? firstbitlow(invertedMask) // -> First POSITIVE - : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative - - uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); - uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); - uint32_t positiveCount = vertexCount - clipCount; - - // ALWAYS compute both clip points - uint32_t lastPosIdx = positiveCount - 1; - uint32_t firstNegIdx = positiveCount; - - float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); - float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); - float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); - float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); - - float32_t3 vLastNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, vertexCount - 1)); - float32_t3 vFirstPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); - t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); - float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); - - NBL_UNROLL - for (uint32_t i = 0; i < positiveCount; i++) - { - // Get raw vertex - float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); - bool isLastPositive = (i == positiveCount - 1); - bool useClipA = (clipCount > 0) && isLastPositive; - - // Compute original index before rotation - uint32_t originalIndex = (i + rotateAmount) % vertexCount; - -#if VISUALIZE_SAMPLES - float32_t3 v1 = useClipA ? clipA : getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); - float32_t3 pts[2] = {normalize(v0), normalize(v1)}; - color += drawEdge((i + 1) % vertexCount, pts, spherePos, aaWidth); +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipB; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_B; #endif + vertices[count++] = clipB; + } #if DEBUG_DATA - DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = v0; - DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = originalIndex; + DebugDataBuffer[0].clippedSilhouetteVertexCount = count; + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; #endif - silhouette.vertices[silhouette.count++] = normalize(v0); } +}; - if (clipCount > 0 && clipCount < vertexCount) +struct SilEdgeNormals +{ + float16_t3 edgeNormals[MAX_SILHOUETTE_VERTICES]; // 10.5 floats instead of 21 + uint32_t count; + + // Better not use and calculate it while creating the sampler + static SilEdgeNormals create(NBL_CONST_REF_ARG(ClippedSilhouette) sil) { - float32_t3 vFirst = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + SilEdgeNormals result = (SilEdgeNormals)0; + result.count = sil.count; -#if VISUALIZE_SAMPLES - float32_t3 npPts[2] = {normalize(clipB), normalize(vFirst)}; - color += drawEdge(0, npPts, spherePos, aaWidth); + float32_t3 v0 = sil.vertices[0]; + float32_t3 v1 = sil.vertices[1]; + float32_t3 v2 = sil.vertices[2]; - float32_t3 arcPts[2] = {normalize(clipA), normalize(clipB)}; - color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); -#endif + result.edgeNormals[0] = float16_t3(cross(v0, v1)); + result.edgeNormals[1] = float16_t3(cross(v1, v2)); -#if DEBUG_DATA - DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = clipA; - DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = CLIP_POINT_A; -#endif - silhouette.vertices[silhouette.count++] = normalize(clipA); + if (sil.count > 3) + { + float32_t3 v3 = sil.vertices[3]; + result.edgeNormals[2] = float16_t3(cross(v2, v3)); + + if (sil.count > 4) + { + float32_t3 v4 = sil.vertices[4]; + result.edgeNormals[3] = float16_t3(cross(v3, v4)); + + if (sil.count > 5) + { + float32_t3 v5 = sil.vertices[5]; + result.edgeNormals[4] = float16_t3(cross(v4, v5)); + + if (sil.count > 6) + { + float32_t3 v6 = sil.vertices[6]; + result.edgeNormals[5] = float16_t3(cross(v5, v6)); + result.edgeNormals[6] = float16_t3(cross(v6, v0)); + } + else + { + result.edgeNormals[5] = float16_t3(cross(v5, v0)); + } + } + else + { + result.edgeNormals[4] = float16_t3(cross(v4, v0)); + } + } + else + { + result.edgeNormals[3] = float16_t3(cross(v3, v0)); + } + } + else + { + result.edgeNormals[2] = float16_t3(cross(v2, v0)); + } -#if DEBUG_DATA - DebugDataBuffer[0].clippedSilhouetteVertices[silhouette.count] = clipB; - DebugDataBuffer[0].clippedSilhouetteVerticesIndices[silhouette.count] = CLIP_POINT_B; -#endif - silhouette.vertices[silhouette.count++] = normalize(clipB); + return result; } -#if DEBUG_DATA - DebugDataBuffer[0].clippedSilhouetteVertexCount = silhouette.count; - DebugDataBuffer[0].clipMask = clipMask; - DebugDataBuffer[0].clipCount = clipCount; - DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; - DebugDataBuffer[0].rotateAmount = rotateAmount; - DebugDataBuffer[0].positiveVertCount = positiveCount; - DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; - DebugDataBuffer[0].rotatedSil = rotatedSil; -#endif - -#if VISUALIZE_SAMPLES - return color; -#endif -} + bool isInside(float32_t3 dir) + { + float16_t3 d = float16_t3(dir); + half maxDot = dot(d, edgeNormals[0]); + maxDot = max(maxDot, dot(d, edgeNormals[1])); + maxDot = max(maxDot, dot(d, edgeNormals[2])); + maxDot = max(maxDot, dot(d, edgeNormals[3])); + maxDot = max(maxDot, dot(d, edgeNormals[4])); + maxDot = max(maxDot, dot(d, edgeNormals[5])); + maxDot = max(maxDot, dot(d, edgeNormals[6])); + return maxDot <= float16_t(0.0f); + } +}; -#endif // _SILHOUETTE_HLSL_ +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl new file mode 100644 index 000000000..bba9aba75 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl @@ -0,0 +1,305 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +#include "drawing.hlsl" +#include "utils.hlsl" +#include "silhouette.hlsl" +#include "triangle_sampling.hlsl" +#include "pyramid_sampling.hlsl" +#include "parallelogram_sampling.hlsl" + +[[vk::push_constant]] struct PushConstants pc; + +static const SAMPLING_MODE samplingMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + +void computeCubeGeo() +{ + for (uint32_t i = 0; i < 8; i++) + corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; + + for (uint32_t f = 0; f < 6; f++) + { + faceCenters[f] = float32_t3(0, 0, 0); + for (uint32_t v = 0; v < 4; v++) + faceCenters[f] += corners[faceToCorners[f][v]]; + faceCenters[f] /= 4.0f; + } +} + +void validateSilhouetteEdges(uint32_t sil, uint32_t vertexCount, inout uint32_t silEdgeMask) +{ +#if DEBUG_DATA + { + for (uint32_t i = 0; i < vertexCount; i++) + { + uint32_t vIdx = i % vertexCount; + uint32_t v1Idx = (i + 1) % vertexCount; + + uint32_t v0Corner = getSilhouetteVertex(sil, vIdx); + uint32_t v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (uint32_t e = 0; e < 12; e++) + { + uint32_t2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(pc.modelMatrix, sil, vertexCount, silEdgeMask); + } +#endif +} + +void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 spherePos) +{ + ndc = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndc.x *= aspect; + + float32_t2 normalized = ndc / CIRCLE_RADIUS; + float32_t r2 = dot(normalized, normalized); + + if (r2 <= 1.0f) + { + spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float32_t uv2Plus1 = r2 + 1.0f; + spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); +} + +#if VISUALIZE_SAMPLES +float32_t4 visualizeSample(float32_t3 sampleDir, float32_t2 xi, uint32_t index, float32_t2 screenUV, float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth +#if DEBUG_DATA + , + inout RWStructuredBuffer DebugDataBuffer +#endif +) +{ + float32_t4 accumColor = 0; + + float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen + float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner + bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); + + float32_t dist3D = distance(sampleDir, normalize(spherePos)); + float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); + + if (alpha3D > 0.0f /* && !isInsidePSS*/) + { + float32_t3 sampleColor = colorLUT[index].rgb; + accumColor += float32_t4(sampleColor * alpha3D, alpha3D); + } + + // if (isInsidePSS) + // { + // // Map the raw xi to the PSS square dimensions + // float32_t2 xiPixelPos = pssPos + xi * pssSize; + // float32_t dist2D = distance(screenUV, xiPixelPos); + + // float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); + // if (alpha2D > 0.0f) + // { + // float32_t3 sampleColor = colorLUT[index].rgb; + // accumColor += float32_t4(sampleColor * alpha2D, alpha2D); + // } + // } + + // // just the outline of the PSS + // if (isInsidePSS && accumColor.a < 0.1) + // accumColor = float32_t4(0.1, 0.1, 0.1, 1.0); + + return accumColor; +} +#endif // VISUALIZE_SAMPLES + +// [shader("pixel")] +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + for (uint32_t i = 0; i < 1; i++) + { + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + float32_t3 spherePos; + float32_t2 ndc; + computeSpherePos(vx, ndc, spherePos); +#if !FAST || DEBUG_DATA + computeCubeGeo(); +#endif + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); + + uint32_t silEdgeMask = 0; // TODO: take from 'fast' compute() +#if DEBUG_DATA + validateSilhouetteEdges(sil, vertexCount, silEdgeMask); +#endif + ClippedSilhouette silhouette; + silhouette.compute(pc.modelMatrix, vertexCount, sil); + +#if VISUALIZE_SAMPLES + // Draw silhouette edges on the sphere + for (uint32_t ei = 0; ei < silhouette.count; ei++) + { + float32_t3 v0 = normalize(silhouette.vertices[ei]); + float32_t3 v1 = normalize(silhouette.vertices[(ei + 1) % silhouette.count]); + float32_t3 pts[2] = {v0, v1}; + color += drawEdge(0, pts, spherePos, aaWidth); + } +#endif + + TriangleFanSampler samplingData; + Parallelogram parallelogram; + SphericalPyramid pyramid; + UrenaSampler urena; + BiquadraticSampler biquad; + BilinearSampler bilin; + + SilEdgeNormals silEdgeNormals; + //===================================================================== + // Building + //===================================================================== + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + samplingData = TriangleFanSampler::create(silhouette, samplingMode); + } + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + silhouette.normalize(); + parallelogram = Parallelogram::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + } + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + pyramid = SphericalPyramid::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + + if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + urena = UrenaSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + biquad = BiquadraticSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + bilin = BilinearSampler::create(pyramid); + } + +#if DEBUG_DATA + uint32_t validSampleCount = 0u; + DebugDataBuffer[0].sampleCount = pc.sampleCount; +#endif + //===================================================================== + // Sampling + //===================================================================== + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + // Hash the invocation to offset the grid + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5) / 8.0f, + (float32_t(i >> 3u) + 0.5) / 8.0f); + + float32_t pdf; + uint32_t index = 0; + float32_t3 sampleDir; + bool valid; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + sampleDir = samplingData.sample(silhouette, xi, pdf, index); + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + sampleDir = parallelogram.sample(silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + sampleDir = urena.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + sampleDir = biquad.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + sampleDir = bilin.sample(pyramid, silEdgeNormals, xi, pdf, valid); + + if (!valid) + { + pdf = 0.0f; + // sampleDir = float32_t3(0, 0, 1); + } +#if DEBUG_DATA + else + { + validSampleCount++; + } + + DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); +#endif + +#if VISUALIZE_SAMPLES + // Draw samples on sphere + color += visualizeSample(sampleDir, xi, index, vx.uv, spherePos, ndc, aaWidth +#if DEBUG_DATA + , + DebugDataBuffer +#endif + ); +#else + if (pdf > 0.0f) + color += float4(sampleDir * 0.02f / pdf, 1.0f); +#endif // VISUALIZE_SAMPLES + } + +#if VISUALIZE_SAMPLES + + // For debugging: Draw a small indicator of which faces are found + // color += drawVisibleFaceOverlay(pc.modelMatrix, spherePos, region, aaWidth); + + // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); + + // Draw clipped silhouette vertices + // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); + // color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); + // color += drawCorners(pc.modelMatrix, ndc, aaWidth, 0.05f); + color += drawRing(ndc, aaWidth); + + if (all(vx.uv >= float32_t2(0.f, 0.97f)) && all(vx.uv <= float32_t2(0.03f, 1.0f))) + { + return float32_t4(colorLUT[configIndex], 1.0f); + } +#else +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + InterlockedAdd(DebugDataBuffer[0].validSampleCount, validSampleCount); + InterlockedAdd(DebugDataBuffer[0].threadCount, 1u); + DebugDataBuffer[0].region = uint32_t3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + for (uint32_t i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; + +#endif + } + + return color; +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl new file mode 100644 index 000000000..46277ca27 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl @@ -0,0 +1,241 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ + +// Include the spherical triangle utilities +#include "gpu_common.hlsl" +#include +#include +#include +#include +#include +#include "silhouette.hlsl" + +using namespace nbl::hlsl; + +// Maximum number of triangles we can have after clipping +// Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles +// With clipping, one more edge. 7 - 2 = 5 max triangles because fanning from one vertex +#define MAX_TRIANGLES 5 + +struct TriangleFanSampler +{ + uint32_t count; // Number of valid triangles + uint32_t samplingMode; // Mode used during build + float32_t totalWeight; // Sum of all triangle weights + float32_t3 faceNormal; // Face normal (only used for projected mode) + float32_t triangleSolidAngles[MAX_TRIANGLES]; // Weight per triangle (for selection) + uint32_t triangleIndices[MAX_TRIANGLES]; // Vertex index i (forms triangle with v0, vi, vi+1) + + float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float32_t3 v2, float32_t3 N) + { + // 1. Get edge normals (unit vectors) + // We use the cross product of the vertices (unit vectors on sphere) + float32_t3 n0 = cross(v0, v1); + float32_t3 n1 = cross(v1, v2); + float32_t3 n2 = cross(v2, v0); + + // 2. Normalize edge normals (magnitude is sin of the arc length) + float32_t l0 = length(n0); + float32_t l1 = length(n1); + float32_t l2 = length(n2); + + // Guard against degenerate triangles + if (l0 < 1e-7 || l1 < 1e-7 || l2 < 1e-7) + return 0.0f; + + n0 /= l0; + n1 /= l1; + n2 /= l2; + + // 3. Get arc lengths (angles in radians) + float32_t a = asin(clamp(l0, -1.0f, 1.0f)); // side v0-v1 + float32_t b = asin(clamp(l1, -1.0f, 1.0f)); // side v1-v2 + float32_t c = asin(clamp(l2, -1.0f, 1.0f)); // side v2-v0 + + // Handle acos/asin quadrant if dot product is negative + if (dot(v0, v1) < 0) + a = 3.14159265 - a; + if (dot(v1, v2) < 0) + b = 3.14159265 - b; + if (dot(v2, v0) < 0) + c = 3.14159265 - c; + + // 4. Compute projected solid angle + float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); + + // Return the absolute value of the total + return abs(Gamma); + } + + // Build fan triangulation, cache weights for triangle selection + static TriangleFanSampler create(ClippedSilhouette silhouette, uint32_t mode) + { + TriangleFanSampler self; + self.count = 0; + self.totalWeight = 0.0f; + self.samplingMode = mode; + self.faceNormal = float32_t3(0, 0, 0); + + if (silhouette.count < 3) + return self; + + const float32_t3 v0 = silhouette.vertices[0]; + const float32_t3 origin = float32_t3(0, 0, 0); + + // Compute face normal ONCE before the loop - silhouette is planar! + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 v1 = silhouette.vertices[1]; + float32_t3 v2 = silhouette.vertices[2]; + self.faceNormal = normalize(cross(v1 - v0, v2 - v0)); + } + + // Build fan triangulation from v0 + NBL_UNROLL + for (uint32_t i = 1; i < silhouette.count - 1; i++) + { + float32_t3 v1 = silhouette.vertices[i]; + float32_t3 v2 = silhouette.vertices[i + 1]; + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Skip degenerate triangles + if (shapeTri.pyramidAngles()) + continue; + + // Calculate triangle solid angle + float32_t solidAngle; + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + solidAngle = shapeTri.projectedSolidAngleOfTriangle(self.faceNormal, shapeTri.cos_sides, shapeTri.csc_sides, cos_vertices); + } + else + { + solidAngle = shapeTri.solidAngleOfTriangle(); + } + + if (solidAngle <= 0.0f) + continue; + + // Store only what's needed for weighted selection + self.triangleSolidAngles[self.count] = solidAngle; + self.triangleIndices[self.count] = i; + self.totalWeight += solidAngle; + self.count++; + } + +#if DEBUG_DATA + // Validate no antipodal edges exist (would create spherical lune) + for (uint32_t i = 0; i < silhouette.count; i++) + { + uint32_t j = (i + 1) % silhouette.count; + float32_t3 n1 = normalize(silhouette.vertices[i]); + float32_t3 n2 = normalize(silhouette.vertices[j]); + + if (dot(n1, n2) < -0.99f) + { + DebugDataBuffer[0].sphericalLuneDetected = 1; + assert(false && "Spherical lune detected: antipodal silhouette edge"); + } + } + DebugDataBuffer[0].maxTrianglesExceeded = (self.count > MAX_TRIANGLES); + DebugDataBuffer[0].triangleCount = self.count; + DebugDataBuffer[0].totalSolidAngles = self.totalWeight; + for (uint32_t tri = 0; tri < self.count; tri++) + { + DebugDataBuffer[0].solidAngles[tri] = self.triangleSolidAngles[tri]; + } +#endif + + return self; + } + + // Sample using cached selection weights, recompute geometry on-demand + float32_t3 sample(ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) + { + selectedIdx = 0; + + // Handle empty or invalid data + if (count == 0 || totalWeight <= 0.0f) + { + pdf = 0.0f; + return float32_t3(0, 0, 1); + } + + // Select triangle using cached weighted random selection + float32_t targetWeight = xi.x * totalWeight; + float32_t cumulativeWeight = 0.0f; + float32_t prevCumulativeWeight = 0.0f; + + NBL_UNROLL + for (uint32_t i = 0; i < count; i++) + { + prevCumulativeWeight = cumulativeWeight; + cumulativeWeight += triangleSolidAngles[i]; + + if (targetWeight <= cumulativeWeight) + { + selectedIdx = i; + break; + } + } + + // Remap xi.x to [0,1] within selected triangle's solidAngle interval + float32_t triSolidAngle = triangleSolidAngles[selectedIdx]; + float32_t u = (targetWeight - prevCumulativeWeight) / max(triSolidAngle, 1e-7f); + + // Reconstruct the selected triangle geometry + uint32_t vertexIdx = triangleIndices[selectedIdx]; + float32_t3 v0 = silhouette.vertices[0]; + float32_t3 v1 = silhouette.vertices[vertexIdx]; + float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; + + float32_t3 fn = normalize(cross(v1 - v0, v2 - v0)); + + float32_t3 origin = float32_t3(0, 0, 0); + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Compute vertex angles once + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + float32_t3 sin_vertices = sqrt(float32_t3(1.0f, 1.0f, 1.0f) - cos_vertices * cos_vertices); + + // Sample based on mode + float32_t3 direction; + float32_t rcpPdf; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + sampling::ProjectedSphericalTriangle samplingTri = sampling::ProjectedSphericalTriangle::create(shapeTri); + + direction = samplingTri.generate(rcpPdf, triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], fn, false, float32_t2(u, xi.y)); + triSolidAngle = rcpPdf; // projected solid angle returned as rcpPdf + } + else + { + sampling::SphericalTriangle samplingTri = sampling::SphericalTriangle::create(shapeTri); + direction = samplingTri.generate(triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], float32_t2(u, xi.y)); + } + + // Calculate PDF + float32_t trianglePdf = 1.0f / triSolidAngle; + float32_t selectionProb = triSolidAngle / totalWeight; + pdf = trianglePdf * selectionProb; + + return normalize(direction); + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl index e4bf804cb..832204cf2 100644 --- a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl +++ b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -1,21 +1,33 @@ -#ifndef _UTILS_HLSL_ -#define _UTILS_HLSL_ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#include +#include // TODO: implemented somewhere else? // Bit rotation helpers uint32_t rotl(uint32_t value, uint32_t bits, uint32_t width) { - bits = bits % width; - uint32_t mask = (1u << width) - 1u; + // mask for the width + uint32_t mask = (width == 32) ? 0xFFFFFFFFu : ((1u << width) - 1u); value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + return ((value << bits) | (value >> (width - bits))) & mask; } uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) { - bits = bits % width; - uint32_t mask = (1u << width) - 1u; + uint32_t mask = ((1u << width) - 1u); value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + return ((value >> bits) | (value << (width - bits))) & mask; } @@ -46,4 +58,11 @@ float32_t2 hammersleySample(uint32_t i, uint32_t numSamples) float32_t(reversebits(i)) / 4294967295.0f); } -#endif // _UTILS_HLSL_ +float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) +{ + return float32_t2( + float32_t(rnd()) * 2.3283064365386963e-10, + float32_t(rnd()) * 2.3283064365386963e-10); +} + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/include/common.hpp b/73_SolidAngleVisualizer/include/common.hpp index 2e8e985dd..fe7d086dd 100644 --- a/73_SolidAngleVisualizer/include/common.hpp +++ b/73_SolidAngleVisualizer/include/common.hpp @@ -6,7 +6,6 @@ // the example's headers #include "transform.hpp" -#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" using namespace nbl; using namespace nbl::core; diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp index 9d9941da3..c60952394 100644 --- a/73_SolidAngleVisualizer/main.cpp +++ b/73_SolidAngleVisualizer/main.cpp @@ -4,6 +4,8 @@ #include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "common.hpp" +#include +#include #include "app_resources/hlsl/common.hlsl" #include "app_resources/hlsl/benchmark/common.hlsl" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -18,17 +20,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR using device_base_t = MonoWindowApplication; using asset_base_t = BuiltinResourcesApplication; - inline static std::string SolidAngleVisShaderPath = "app_resources/hlsl/SolidAngleVis.frag.hlsl"; - inline static std::string RayVisShaderPath = "app_resources/hlsl/RayVis.frag.hlsl"; - public: - inline SolidAngleVisualizer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + inline SolidAngleVisualizer(const path &_localInputCWD, const path &_localOutputCWD, const path &_sharedInputCWD, const path &_sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 2048, 1024 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + device_base_t({2048, 1024}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { } - inline bool onAppInitialized(smart_refctd_ptr&& system) override + inline bool onAppInitialized(smart_refctd_ptr &&system) override { if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; @@ -46,16 +45,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { if (!pool) return logFail("Couldn't create Command Pool!"); - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {m_cmdBufs.data() + i, 1})) return logFail("Couldn't create Command Buffer!"); } - const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()}; m_scene = CGeometryCreatorScene::create( - { .transferQueue = getTransferUpQueue(), + {.transferQueue = getTransferUpQueue(), .utilities = m_utils.get(), .logger = m_logger.get(), - .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies }, + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies}, CSimpleDebugRenderer::DefaultPolygonGeometryPatch); // for the scene drawing pass @@ -65,29 +64,29 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR {{{.format = sceneRenderDepthFormat, .samples = IGPUImage::ESCF_1_BIT, .mayAlias = false}, - /*.loadOp =*/{IGPURenderpass::LOAD_OP::CLEAR}, - /*.storeOp =*/{IGPURenderpass::STORE_OP::STORE}, - /*.initialLayout =*/{IGPUImage::LAYOUT::UNDEFINED}, - /*.finalLayout =*/{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}, - IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd }; + /*.loadOp =*/{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd}; params.depthStencilAttachments = depthAttachments; const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { {{ {.format = finalSceneRenderFormat, .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, .mayAlias = false}, - /*.loadOp =*/IGPURenderpass::LOAD_OP::CLEAR, - /*.storeOp =*/IGPURenderpass::STORE_OP::STORE, - /*.initialLayout =*/IGPUImage::LAYOUT::UNDEFINED, - /*.finalLayout =*/IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read - }}, - IGPURenderpass::SCreationParams::ColorAttachmentsEnd }; + /*.loadOp =*/IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd}; params.colorAttachments = colorAttachments; IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { {}, - IGPURenderpass::SCreationParams::SubpassesEnd }; - subpasses[0].depthStencilAttachment = { {.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}} }; - subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} }; + IGPURenderpass::SCreationParams::SubpassesEnd}; + subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + subpasses[0].colorAttachments[0] = {.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; params.subpasses = subpasses; const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { @@ -96,16 +95,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .dstSubpass = 0, .memoryBarrier = { - // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later - // while color is sampled by ImGUI - .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - // don't want any writes to be available, as we are clearing both attachments - .srcAccessMask = ACCESS_FLAGS::NONE, - // destination needs to wait as early as possible - // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // because depth and color get cleared first no read mask - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} // leave view offsets and flags default }, { @@ -117,9 +116,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, // but we only care about the availability-visibility chain between renderpass and imgui .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT} - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd }; + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; params.dependencies = dependencies; auto solidAngleRenderpassParams = params; m_mainRenderpass = m_device->createRenderpass(std::move(params)); @@ -131,13 +130,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return logFail("Failed to create Solid Angle Renderpass!"); } - const auto& geometries = m_scene->getInitParams().geometries; - m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, { &geometries.front().get(), geometries.size() }); + const auto &geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, {&geometries.front().get(), geometries.size()}); // special case { - const auto& pipelines = m_renderer->getInitParams().pipelines; + const auto &pipelines = m_renderer->getInitParams().pipelines; auto ix = 0u; - for (const auto& name : m_scene->getInitParams().geometryNames) + for (const auto &name : m_scene->getInitParams().geometryNames) { if (name == "Cone") m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; @@ -149,90 +148,65 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Create graphics pipeline { - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, IShader::E_SHADER_STAGE stage, const std::string& defineMacro = "") -> smart_refctd_ptr + auto loadPrecompiledShader = [&](auto key) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) { - IAssetLoader::SAssetLoadParams lp = {}; - lp.workingDirectory = localInputCWD; - auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - { - m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - auto source = smart_refctd_ptr_static_cast(assets[0]); - // The down-cast should not fail! - assert(source); - - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SOptions options = {}; - options.stage = stage; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; -#ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); -#endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT;// | IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_FILE_BIT | IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - core::vector defines; - if (!defineMacro.empty()) - defines.push_back({ defineMacro, "" }); - - options.preprocessorOptions.extraDefines = defines; - - source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); - - auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); - if (!shader) - { - m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - return shader; - }; + m_logger->log("Could not load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + assert(assets.size() == 1); + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + return shader; + }; ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - // Load Fragment Shader - auto solidAngleVisFragShader = loadAndCompileHLSLShader(SolidAngleVisShaderPath, ESS_FRAGMENT); - if (!solidAngleVisFragShader) - return logFail("Failed to Load and Compile Fragment Shader: SolidAngleVis!"); - - const IGPUPipelineBase::SShaderSpecInfo solidAngleFragSpec = { - .shader = solidAngleVisFragShader.get(), - .entryPoint = "main" }; - - auto rayVisFragShader = loadAndCompileHLSLShader(RayVisShaderPath, ESS_FRAGMENT); - if (!rayVisFragShader) - return logFail("Failed to Load and Compile Fragment Shader: rayVis!"); - const IGPUPipelineBase::SShaderSpecInfo RayFragSpec = { - .shader = rayVisFragShader.get(), - .entryPoint = "main" }; + // Load pre-compiled fragment shaders (6 modes x 2 debug = 12 SolidAngleVis + 2 RayVis) + // Can't use string literal template args in a loop, so unroll manually + // Index: mode * 2 + debugFlag (0=release, 1=debug) + smart_refctd_ptr saVisShaders[SAMPLING_MODE::Count * DebugPermutations]; + saVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa">(m_device.get())); + saVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa_dbg">(m_device.get())); + saVisShaders[2] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa">(m_device.get())); + saVisShaders[3] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa_dbg">(m_device.get())); + saVisShaders[4] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para">(m_device.get())); + saVisShaders[5] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para_dbg">(m_device.get())); + saVisShaders[6] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle">(m_device.get())); + saVisShaders[7] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle_dbg">(m_device.get())); + saVisShaders[8] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad">(m_device.get())); + saVisShaders[9] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad_dbg">(m_device.get())); + saVisShaders[10] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear">(m_device.get())); + saVisShaders[11] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear_dbg">(m_device.get())); + + smart_refctd_ptr rayVisShaders[DebugPermutations]; + rayVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis">(m_device.get())); + rayVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis_dbg">(m_device.get())); smart_refctd_ptr solidAngleVisLayout, rayVisLayout; - nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { - {.binding = 0, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_FRAGMENT, - .count = 1} }; + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = + { + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1}}; smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - const asset::SPushConstantRange saRanges[] = { {.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(PushConstants)} }; - const asset::SPushConstantRange rayRanges[] = { {.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(PushConstantRayVis)} }; + const asset::SPushConstantRange saRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstants)}}; + const asset::SPushConstantRange rayRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstantRayVis)}}; if (!dsLayout) logFail("Failed to create a Descriptor Layout!\n"); @@ -242,17 +216,31 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR rayVisLayout = m_device->createPipelineLayout(rayRanges, dsLayout); { - m_solidAngleVisPipeline = fsTriProtoPPln.createPipeline(solidAngleFragSpec, solidAngleVisLayout.get(), m_solidAngleRenderpass.get()); - if (!m_solidAngleVisPipeline) - return logFail("Could not create Graphics Pipeline!"); + // Create all SolidAngleVis pipeline variants + for (uint32_t i = 0; i < SAMPLING_MODE::Count * DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = saVisShaders[i].get(), + .entryPoint = "main"}; + m_solidAngleVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, solidAngleVisLayout.get(), m_solidAngleRenderpass.get()); + if (!m_solidAngleVisPipelines[i]) + return logFail("Could not create SolidAngleVis Graphics Pipeline variant %d!", i); + } asset::SRasterizationParams rasterParams = ext::FullScreenTriangle::ProtoPipeline::DefaultRasterParams; rasterParams.depthWriteEnable = true; rasterParams.depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER; - m_rayVisualizationPipeline = fsTriProtoPPln.createPipeline(RayFragSpec, rayVisLayout.get(), m_mainRenderpass.get(), 0, {}, rasterParams); - if (!m_rayVisualizationPipeline) - return logFail("Could not create Graphics Pipeline!"); + // Create all RayVis pipeline variants + for (uint32_t i = 0; i < DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = rayVisShaders[i].get(), + .entryPoint = "main"}; + m_rayVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, rayVisLayout.get(), m_mainRenderpass.get(), 0, {}, rasterParams); + if (!m_rayVisPipelines[i]) + return logFail("Could not create RayVis Graphics Pipeline variant %d!", i); + } } // Allocate the memory { @@ -275,20 +263,20 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); - smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(), 1 }); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); m_ds = pool->createDescriptorSet(std::move(dsLayout)); { IGPUDescriptorSet::SDescriptorInfo info[1]; info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); - info[0].info.buffer = { .offset = 0, .size = BufferSize }; + info[0].info.buffer = {.offset = 0, .size = BufferSize}; IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info} }; + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; m_device->updateDescriptorSets(writes, {}); } } - if (!m_allocation.memory->map({ 0ull, m_allocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ)) + if (!m_allocation.memory->map({0ull, m_allocation.memory->getAllocationSize()}, IDeviceMemoryAllocation::EMCAF_READ)) logFail("Failed to map the Device Memory!\n"); // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches @@ -299,10 +287,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Create ImGUI { - auto scRes = static_cast(m_surface->getSwapchainResources()); + auto scRes = static_cast(m_surface->getSwapchainResources()); ext::imgui::UI::SCreationParameters params = {}; - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = TexturesImGUIBindingIndex }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.resources.texturesInfo = {.setIx = 0u, .bindingIx = TexturesImGUIBindingIndex}; + params.resources.samplersInfo = {.setIx = 0u, .bindingIx = 1u}; params.utilities = m_utils; params.transfer = getTransferUpQueue(); params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); @@ -317,12 +305,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // create rest of User Interface { - auto* imgui = interface.imGUI.get(); + auto *imgui = interface.imGUI.get(); // create the suballocated descriptor set { // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout, 1 }); + const auto *layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, {&layout, 1}); auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); if (!interface.subAllocDS) @@ -342,12 +330,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .binding = TexturesImGUIBindingIndex, .arrayElement = ext::imgui::UI::FontAtlasTexId, .count = 1, - .info = &info }; - if (!m_device->updateDescriptorSets({ &write, 1 }, {})) + .info = &info}; + if (!m_device->updateDescriptorSets({&write, 1}, {})) return logFail("Failed to write the descriptor set"); } imgui->registerListener([this]() - { interface(); }); + { interface(); }); } interface.camera.mapKeysToWASD(); @@ -371,8 +359,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR update(nextPresentationTimestamp); { - const auto& virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; - const auto& virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; + const auto &virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const auto &virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) recreateFramebuffers(); @@ -381,7 +369,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); + auto *const cb = m_cmdBufs.data()[resourceIx].get(); cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -390,23 +378,23 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR asset::SBufferRange range{ .offset = 0, .size = m_outputStorageBuffer->getSize(), - .buffer = m_outputStorageBuffer }; + .buffer = m_outputStorageBuffer}; cb->fillBuffer(range, 0u); { - const auto& creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); + const auto &creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); cb->beginDebugMarker("Draw Circle View Frame"); { - const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f, 0.f, 0.f, 1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = - { - .framebuffer = m_solidAngleViewFramebuffer.get(), - .colorClearValues = &clearValue, - .depthStencilClearValues = &farValue, - .renderArea = { - .offset = {0, 0}, - .extent = {creationParams.width, creationParams.height}} }; + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; beginRenderpass(cb, renderpassInfo); } // draw scene @@ -416,10 +404,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR PushConstants pc{ .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, - .samplingMode = m_samplingMode, .sampleCount = static_cast(m_SampleCount), - .frameIndex = lastFrameSeed }; - auto pipeline = m_solidAngleVisPipeline; + .frameIndex = lastFrameSeed}; + const uint32_t debugIdx = m_debugVisualization ? 1u : 0u; + auto pipeline = m_solidAngleVisPipelines[m_samplingMode * DebugPermutations + debugIdx]; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); @@ -428,27 +416,29 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->endRenderPass(); cb->endDebugMarker(); } -#if DEBUG_DATA - m_device->waitIdle(); - std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); - m_device->waitIdle(); -#endif + + if (m_debugVisualization) + { + m_device->waitIdle(); + std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); + m_device->waitIdle(); + } } // draw main view if (m_mainViewFramebuffer) { { auto creationParams = m_mainViewFramebuffer->getCreationParameters(); - const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.1f, 0.1f, 0.1f, 1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.1f, 0.1f, 0.1f, 1.f}}; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = - { - .framebuffer = m_mainViewFramebuffer.get(), - .colorClearValues = &clearValue, - .depthStencilClearValues = &farValue, - .renderArea = { - .offset = {0, 0}, - .extent = {creationParams.width, creationParams.height}} }; + { + .framebuffer = m_mainViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; beginRenderpass(cb, renderpassInfo); } { // draw rays visualization @@ -457,15 +447,16 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->beginDebugMarker("Draw Rays visualization"); // draw scene { - float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); - float32_t3x4 view = *reinterpret_cast(&interface.camera.getViewMatrix()); + float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); + float32_t3x4 view = *reinterpret_cast(&interface.camera.getViewMatrix()); PushConstantRayVis pc{ .viewProjMatrix = viewProj, .viewMatrix = view, .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .invModelMatrix = hlsl::float32_t3x4(hlsl::transpose(hlsl::inverse(interface.m_OBBModelMatrix))), .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, - .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u }; - auto pipeline = m_rayVisualizationPipeline; + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u}; + auto pipeline = m_rayVisPipelines[m_debugVisualization ? 1u : 0u]; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); @@ -481,14 +472,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR float32_t4x4 viewProjMatrix; // TODO: get rid of legacy matrices { - const auto& camera = interface.camera; - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + const auto &camera = interface.camera; + memcpy(&viewMatrix, &camera.getViewMatrix(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, &camera.getConcatenatedMatrix(), sizeof(viewProjMatrix)); } const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); // tear down scene every frame - auto& instance = m_renderer->m_instances[0]; + auto &instance = m_renderer->m_instances[0]; instance.world = float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)); instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB @@ -505,28 +496,28 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); { - auto scRes = static_cast(m_surface->getSwapchainResources()); - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f, 0.f, 0.f, 1.f} }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = - { - .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), - .colorClearValues = &clearValue, - .depthStencilClearValues = nullptr, - .renderArea = { - .offset = {0, 0}, - .extent = {m_window->getWidth(), m_window->getHeight()}} }; + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0, 0}, + .extent = {m_window->getWidth(), m_window->getHeight()}}}; beginRenderpass(cb, renderpassInfo); } // draw ImGUI { - auto* imgui = interface.imGUI.get(); - auto* pipeline = imgui->getPipeline(); + auto *imgui = interface.imGUI.get(); + auto *pipeline = imgui->getPipeline(); cb->bindGraphicsPipeline(pipeline); // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx - const auto* ds = interface.subAllocDS->getDescriptorSet(); + const auto *ds = interface.subAllocDS->getDescriptorSet(); cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); // a timepoint in the future to release streaming resources for geometry - const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + const ISemaphore::SWaitInfo drawFinished = {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u}; if (!imgui->render(cb, drawFinished)) { m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); @@ -539,22 +530,22 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->end(); IQueue::SSubmitInfo::SSemaphoreInfo retval = - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS }; + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS}; const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb} }; + { + {.cmdbuf = cb}}; const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { {.semaphore = device_base_t::getCurrentAcquire().semaphore, .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE} }; + .stageMask = PIPELINE_STAGE_FLAGS::NONE}}; const IQueue::SSubmitInfo infos[] = - { - {.waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval, 1}} }; + { + {.waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval, 1}}}; if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) { @@ -567,7 +558,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } protected: - const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + const video::IGPURenderpass::SCreationParams::SSubpassDependency *getDefaultSubpassDependencies() const override { // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { @@ -581,27 +572,27 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // layout transition needs to finish before the color write .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} - // leave view offsets and flags default - }, + // leave view offsets and flags default + }, // want layout transition to begin after all color output is done { .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd }; + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; return dependencies; } private: inline void update(const std::chrono::microseconds nextPresentationTimestamp) { - auto& camera = interface.camera; + auto &camera = interface.camera; camera.setMoveSpeed(interface.moveSpeed); camera.setRotateSpeed(interface.rotateSpeed); @@ -623,8 +614,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // `timeDiff` being computed since `lastVirtualUpTimeStamp` camera.beginInputProcessing(nextPresentationTimestamp); { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void - { + mouse.consumeEvents([&](const IMouseEventChannel::range_t &events) -> void + { if (interface.move) camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl else @@ -644,9 +635,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); //} } }, - m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { + m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t &events) -> void + { if (interface.move) camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl @@ -658,18 +649,18 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR previousEventTimestamp = e.timeStamp; uiEvents.keyboard.emplace_back(e); } }, - m_logger.get()); + m_logger.get()); } camera.endInputProcessing(nextPresentationTimestamp); const auto cursorPosition = m_window->getCursorControl()->getPosition(); ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()), - .displaySize = {m_window->getWidth(), m_window->getHeight()}, - .mouseEvents = uiEvents.mouse, - .keyboardEvents = uiEvents.keyboard }; + { + .mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()), + .displaySize = {m_window->getWidth(), m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard}; // interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; interface.imGUI->update(params); @@ -679,23 +670,23 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format) -> smart_refctd_ptr - { - auto image = m_device->createImage({ {.type = IGPUImage::ET_2D, - .samples = IGPUImage::ESCF_1_BIT, - .format = format, - .extent = {resolution.x, resolution.y, 1}, - .mipLevels = 1, - .arrayLayers = 1, - .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT} }); - if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) - return nullptr; - IGPUImageView::SCreationParams params = { - .image = std::move(image), - .viewType = IGPUImageView::ET_2D, - .format = format }; - params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; - return m_device->createImageView(std::move(params)); - }; + { + auto image = m_device->createImage({{.type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x, resolution.y, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT}}); + if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format}; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; smart_refctd_ptr solidAngleView; smart_refctd_ptr mainView; @@ -708,19 +699,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); - m_solidAngleViewFramebuffer = m_device->createFramebuffer({ {.renderpass = m_solidAngleRenderpass, + m_solidAngleViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_solidAngleRenderpass, .depthStencilAttachments = &solidAngleDepthView.get(), .colorAttachments = &solidAngleView.get(), .width = solidAngleViewRes.x, - .height = solidAngleViewRes.y} }); + .height = solidAngleViewRes.y}}); mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); - m_mainViewFramebuffer = m_device->createFramebuffer({ {.renderpass = m_mainRenderpass, + m_mainViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_mainRenderpass, .depthStencilAttachments = &mainDepthView.get(), .colorAttachments = &mainView.get(), .width = mainViewRes.x, - .height = mainViewRes.y} }); + .height = mainViewRes.y}}); } else { @@ -729,7 +720,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // release previous slot and its image - interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1 }); + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1}); // if (solidAngleView && mainView) { @@ -750,13 +741,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .binding = TexturesImGUIBindingIndex, .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], .count = 1, - .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)]} }; - m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)]}}; + m_device->updateDescriptorSets({write, static_cast(CInterface::Count)}, {}); } interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; } - inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info) + inline void beginRenderpass(IGPUCommandBuffer *cb, const IGPUCommandBuffer::SRenderpassBeginInfo &info) { cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); cb->setScissor(0, 1, &info.renderArea); @@ -764,7 +755,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .x = 0, .y = 0, .width = static_cast(info.renderArea.extent.width), - .height = static_cast(info.renderArea.extent.height) }; + .height = static_cast(info.renderArea.extent.height)}; cb->setViewport(0u, 1u, &viewport); } @@ -781,7 +772,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; - static inline SAMPLING_MODE m_samplingMode = SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE; + static inline SAMPLING_MODE m_samplingMode = SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE; + static inline bool m_debugVisualization = true; static inline int m_SampleCount = 64; static inline bool m_frameSeeding = true; static inline ResultData m_GPUOutResulData; @@ -792,8 +784,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_renderer; smart_refctd_ptr m_solidAngleViewFramebuffer; smart_refctd_ptr m_mainViewFramebuffer; - smart_refctd_ptr m_solidAngleVisPipeline; - smart_refctd_ptr m_rayVisualizationPipeline; + // Pipeline variants: SolidAngleVis indexed by [mode * 2 + debugFlag], RayVis by [debugFlag] + static constexpr uint32_t DebugPermutations = 2; + smart_refctd_ptr m_solidAngleVisPipelines[SAMPLING_MODE::Count * DebugPermutations]; + smart_refctd_ptr m_rayVisPipelines[DebugPermutations]; // nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; smart_refctd_ptr m_outputStorageBuffer; @@ -809,27 +803,26 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { void operator()() { - ImGuiIO& io = ImGui::GetIO(); + ImGuiIO &io = ImGui::GetIO(); // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() - { - const auto& sceneRes = float16_t2(mainViewTransformReturnInfo.sceneResolution); + { + hlsl::float32_t4x4 projection; - matrix4SIMD projection; if (isPerspective) if (isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); // TODO: why do I need to divide aspect ratio by 2? else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); else { - float viewHeight = viewWidth * sceneRes.y / sceneRes.x; + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; if (isLH) - projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); } return projection; }()); @@ -857,12 +850,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Text("Sampling Mode:"); ImGui::SameLine(); - const char* samplingModes[] = - { - "Triangle Solid Angle", - "Triangle Projected Solid Angle", - "Parallelogram Projected Solid Angle" - }; + const char *samplingModes[] = + { + "Triangle Solid Angle", + "Triangle Projected Solid Angle", + "Parallelogram Projected Solid Angle", + "Rectangle Pyramid Solid Angle", + "Biquadratic pyramid solid angle", + "Bilinear pyramid solid angle"}; int currentMode = static_cast(m_samplingMode); @@ -871,8 +866,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR m_samplingMode = static_cast(currentMode); } - - + ImGui::Checkbox("Debug Visualization", &m_debugVisualization); + ImGui::Text("Pipeline idx: SA=%d, Ray=%d", + static_cast(m_samplingMode) * DebugPermutations + (m_debugVisualization ? 1 : 0), + m_debugVisualization ? 1 : 0); ImGui::Checkbox("Frame seeding", &m_frameSeeding); ImGui::SliderInt("Sample Count", &m_SampleCount, 0, 512); @@ -983,12 +980,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR * note it also modifies input view matrix but projection matrix is immutable */ - // No need because camera already has this functionality - // if (ImGui::IsKeyPressed(ImGuiKey_Home)) - // { - // cameraToHome(); - // } - if (ImGui::IsKeyPressed(ImGuiKey_End)) { m_TRS = TRS{}; @@ -1003,11 +994,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGuizmo::SetID(0u); // TODO: camera will return hlsl::float32_tMxN - auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); - imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + auto view = camera.getViewMatrix(); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4>(view)); // TODO: camera will return hlsl::float32_tMxN - imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); + imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates @@ -1037,40 +1028,40 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); solidAngleViewTransformReturnInfo.allowCameraMovement = false; // not used in this view - ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); + ImGui::Image({renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW]}, contentRegionSize); ImGui::End(); } // Show data coming from GPU -#if DEBUG_DATA + if (m_debugVisualization) { if (ImGui::Begin("Result Data")) { - auto drawColorField = [&](const char* fieldName, uint32_t index) - { - ImGui::Text("%s: %u", fieldName, index); + auto drawColorField = [&](const char *fieldName, uint32_t index) + { + ImGui::Text("%s: %u", fieldName, index); - if (index >= 27) - { - ImGui::SameLine(); - ImGui::Text(""); - return; - } + if (index >= 27) + { + ImGui::SameLine(); + ImGui::Text(""); + return; + } - const auto& c = colorLUT[index]; // uses the combined LUT we made earlier + const auto &c = colorLUT[index]; // uses the combined LUT we made earlier - ImGui::SameLine(); + ImGui::SameLine(); - // Color preview button - ImGui::ColorButton( - fieldName, - ImVec4(c.r, c.g, c.b, 1.0f), - 0, - ImVec2(20, 20)); + // Color preview button + ImGui::ColorButton( + fieldName, + ImVec4(c.r, c.g, c.b, 1.0f), + 0, + ImVec2(20, 20)); - ImGui::SameLine(); - ImGui::Text("%s", colorNames[index]); - }; + ImGui::SameLine(); + ImGui::Text("%s", colorNames[index]); + }; // Vertices if (ImGui::CollapsingHeader("Vertices", ImGuiTreeNodeFlags_DefaultOpen)) @@ -1085,7 +1076,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SameLine(); static const float32_t3 constCorners[8] = { float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), - float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1) }; + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1)}; float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); } @@ -1110,32 +1101,112 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR drawColorField(" ", i); } + ImGui::Separator(); + ImGui::Text("Valid Samples: %u / %u", m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u), m_GPUOutResulData.sampleCount); + ImGui::ProgressBar(static_cast(m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u)) / static_cast(m_GPUOutResulData.sampleCount)); ImGui::Separator(); - // Silhouette info - drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + // Silhouette + if (ImGui::CollapsingHeader("Silhouette")) + { + drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + ImGui::Text("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + ImGui::Text("Silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); + ImGui::Text("Positive Vertex Count: %u", m_GPUOutResulData.positiveVertCount); + ImGui::Text("Edge Visibility Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Text("Max Triangles Exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); + for (uint32_t i = 0; i < 6; i++) + ImGui::Text("Vertex[%u]: %u", i, m_GPUOutResulData.vertices[i]); + ImGui::Text("Clipped Silhouette Vertex Count: %u", m_GPUOutResulData.clippedSilhouetteVertexCount); + for (uint32_t i = 0; i < 7; i++) + ImGui::Text("Clipped Vertex[%u]: (%.3f, %.3f, %.3f) Index: %u", i, + m_GPUOutResulData.clippedSilhouetteVertices[i].x, + m_GPUOutResulData.clippedSilhouetteVertices[i].y, + m_GPUOutResulData.clippedSilhouetteVertices[i].z, + m_GPUOutResulData.clippedSilhouetteVerticesIndices[i]); + + // Silhouette mask printed in binary + auto printBin = [](uint32_t bin, const char *name) + { + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + ImGui::Text("%s: 0x%08X", name, bin); + ImGui::Text("binary: 0b%s", buf); + ImGui::Separator(); + }; + printBin(m_GPUOutResulData.silhouette, "Silhouette"); + printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); + + printBin(m_GPUOutResulData.clipCount, "clipCount"); + printBin(m_GPUOutResulData.clipMask, "clipMask"); + printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); + printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); + printBin(m_GPUOutResulData.wrapAround, "wrapAround"); + } - ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); - ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); - ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); - ImGui::Separator(); - ImGui::Text("Max triangles exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); - ImGui::Text("spherical lune detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); - ImGui::Separator(); - //ImGui::Text("Sampling outside the silhouette: %s", m_GPUOutResulData.sampleOutsideSilhouette ? "true" : "false"); - ImGui::Text("Parallelogram does not bound: %s", m_GPUOutResulData.parallelogramDoesNotBound ? "true" : "false"); - ImGui::Text("Parallelogram vertices inside: %s", m_GPUOutResulData.parallelogramVerticesInside ? "true" : "false"); - ImGui::Text("Parallelogram edges inside: %s", m_GPUOutResulData.parallelogramEdgesInside ? "true" : "false"); - ImGui::Text("Parallelogram area: %.3f", m_GPUOutResulData.parallelogramArea); - ImGui::Text("Failed vertex index: %u", m_GPUOutResulData.failedVertexIndex); - ImGui::Text("Failed vertex UV: (%.3f, %.3f)", m_GPUOutResulData.failedVertexUV.x, m_GPUOutResulData.failedVertexUV.y); - ImGui::Text("Failed edge index: %u", m_GPUOutResulData.failedEdgeIndex); - ImGui::Text("Failed edge sample: %u", m_GPUOutResulData.failedEdgeSample); - ImGui::Text("Failed edge UV: (%.3f, %.3f)", m_GPUOutResulData.failedEdgeUV.x, m_GPUOutResulData.failedEdgeUV.y); - ImGui::Text("Failed point 3D: (%.3f, %.3f, %.3f)", m_GPUOutResulData.failedPoint.x, m_GPUOutResulData.failedPoint.y, m_GPUOutResulData.failedPoint.z); - for (uint32_t i = 0; i < 8; i++) - ImGui::Text("edge is convex: %s", m_GPUOutResulData.edgeIsConvex[i] ? "true" : "false"); - ImGui::Separator(); + // Parallelogram + if (m_samplingMode == PROJECTED_PARALLELOGRAM_SOLID_ANGLE && ImGui::CollapsingHeader("Projected Parallelogram", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Does Not Bound: %s", m_GPUOutResulData.parallelogramDoesNotBound ? "true" : "false"); + ImGui::Text("Area: %.3f", m_GPUOutResulData.parallelogramArea); + ImGui::Text("Failed Vertex Index: %u", m_GPUOutResulData.failedVertexIndex); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Edge Is Convex[%u]: %s", i, m_GPUOutResulData.edgeIsConvex[i] ? "true" : "false"); + ImGui::Text("Vertices Inside: %s", m_GPUOutResulData.parallelogramVerticesInside ? "true" : "false"); + ImGui::Text("Edges Inside: %s", m_GPUOutResulData.parallelogramEdgesInside ? "true" : "false"); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Corner[%u]: (%.3f, %.3f)", i, m_GPUOutResulData.parallelogramCorners[i].x, m_GPUOutResulData.parallelogramCorners[i].y); + } + else if ((m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC ||m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) && ImGui::CollapsingHeader("Spherical Pyramid", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spans Hemisphere: %s", m_GPUOutResulData.pyramidSpansHemisphere ? "YES (warning)" : "no"); + ImGui::Text("Best Caliper Edge: %u", m_GPUOutResulData.pyramidBestEdge); + ImGui::Separator(); + + ImGui::Text("Axis 1: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis1.x, m_GPUOutResulData.pyramidAxis1.y, m_GPUOutResulData.pyramidAxis1.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth1, m_GPUOutResulData.pyramidOffset1); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin1, m_GPUOutResulData.pyramidMax1); + + ImGui::Text("Axis 2: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis2.x, m_GPUOutResulData.pyramidAxis2.y, m_GPUOutResulData.pyramidAxis2.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth2, m_GPUOutResulData.pyramidOffset2); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin2, m_GPUOutResulData.pyramidMax2); + + ImGui::Separator(); + ImGui::Text("Center: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidCenter.x, m_GPUOutResulData.pyramidCenter.y, m_GPUOutResulData.pyramidCenter.z); + ImGui::Text("Solid Angle (bound): %.6f sr", m_GPUOutResulData.pyramidSolidAngle); + } + else if (m_samplingMode == TRIANGLE_SOLID_ANGLE || m_samplingMode == TRIANGLE_PROJECTED_SOLID_ANGLE && ImGui::CollapsingHeader("Spherical Triangle", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spherical Lune Detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); + ImGui::Text("Triangle Count: %u", m_GPUOutResulData.triangleCount); + // print solidAngles for each triangle + { + ImGui::Text("Solid Angles per Triangle:"); + ImGui::BeginTable("SolidAnglesTable", 2); + ImGui::TableSetupColumn("Triangle Index"); + ImGui::TableSetupColumn("Solid Angle"); + ImGui::TableHeadersRow(); + for (uint32_t i = 0; i < m_GPUOutResulData.triangleCount; ++i) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::Text("%u", i); + ImGui::TableSetColumnIndex(1); + ImGui::Text("%.6f", m_GPUOutResulData.solidAngles[i]); + } + ImGui::Text("Total: %.6f", m_GPUOutResulData.totalSolidAngles); + ImGui::EndTable(); + } + } { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1150,6 +1221,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR bool hasSkew = false; if (abs(dot(nx, ny)) > epsilon || abs(dot(nx, nz)) > epsilon || abs(dot(ny, nz)) > epsilon) hasSkew = true; + ImGui::Separator(); ImGui::Text("Matrix Has Skew: %s", hasSkew ? "true" : "false"); } @@ -1210,92 +1282,44 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } ImGui::EndPopup(); } - - ImGui::Separator(); - - // Region (uint32_t3) - ImGui::Text("region: (%u, %u, %u)", - m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); - - // print solidAngles for each triangle - { - ImGui::Text("Solid Angles per Triangle:"); - ImGui::BeginTable("SolidAnglesTable", 2); - ImGui::TableSetupColumn("Triangle Index"); - ImGui::TableSetupColumn("Solid Angle"); - ImGui::TableHeadersRow(); - for (uint32_t i = 0; i < m_GPUOutResulData.triangleCount; ++i) - { - ImGui::TableNextRow(); - ImGui::TableSetColumnIndex(0); - ImGui::Text("%u", i); - ImGui::TableSetColumnIndex(1); - ImGui::Text("%.6f", m_GPUOutResulData.solidAngles[i]); - } - ImGui::Text("Total: %.6f", m_GPUOutResulData.totalSolidAngles); - ImGui::EndTable(); - } - - ImGui::Separator(); - - // Silhouette mask printed in binary - - auto printBin = [](uint32_t bin, const char* name) - { - char buf[33]; - for (int i = 0; i < 32; i++) - buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; - buf[32] = '\0'; - ImGui::Text("%s: 0x%08X", name, bin); - ImGui::Text("binary: 0b%s", buf); - ImGui::Separator(); - }; - printBin(m_GPUOutResulData.silhouette, "Silhouette"); - printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); - - printBin(m_GPUOutResulData.clipCount, "clipCount"); - printBin(m_GPUOutResulData.clipMask, "clipMask"); - printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); - printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); - printBin(m_GPUOutResulData.wrapAround, "wrapAround"); } ImGui::End(); } -#endif + // view matrices editor { ImGui::Begin("Matrices"); - auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true) + auto addMatrixTable = [&](const char *topText, const char *tableName, const int rows, const int columns, const float *pointer, const bool withSeparator = true) + { + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) { - ImGui::Text(topText); - if (ImGui::BeginTable(tableName, columns)) + for (int y = 0; y < rows; ++y) { - for (int y = 0; y < rows; ++y) + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) { - ImGui::TableNextRow(); - for (int x = 0; x < columns; ++x) - { - ImGui::TableSetColumnIndex(x); - ImGui::Text("%.3f", *(pointer + (y * columns) + x)); - } + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); } - ImGui::EndTable(); } + ImGui::EndTable(); + } - if (withSeparator) - ImGui::Separator(); - }; + if (withSeparator) + ImGui::Separator(); + }; static RandomSampler rng(0x45); // Initialize RNG with seed // Helper function to check if cube intersects unit sphere at origin - auto isCubeOutsideUnitSphere = [](const float32_t3& translation, const float32_t3& scale) -> bool - { - float cubeRadius = glm::length(scale) * 0.5f; - float distanceToCenter = glm::length(translation); - return (distanceToCenter - cubeRadius) > 1.0f; - }; + auto isCubeOutsideUnitSphere = [](const float32_t3 &translation, const float32_t3 &scale) -> bool + { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; static TRS lastTRS = {}; if (ImGui::Button("Randomize Translation")) @@ -1345,8 +1369,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); - addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer()); - addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, &camera.getViewMatrix()[0].x); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, &camera.getProjectionMatrix()[0].x, false); ImGui::End(); } @@ -1355,7 +1379,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. { - auto* streaminingBuffer = imGUI->getStreamingBuffer(); + auto *streaminingBuffer = imGUI->getStreamingBuffer(); const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available @@ -1388,12 +1412,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::PopStyleColor(); - ImDrawList* drawList = ImGui::GetWindowDrawList(); + ImDrawList *drawList = ImGui::GetWindowDrawList(); ImVec2 progressBarPos = ImGui::GetItemRectMin(); ImVec2 progressBarSize = ImGui::GetItemRectSize(); - const char* text = "%.2f%% free"; + const char *text = "%.2f%% free"; char textBuffer[64]; snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); @@ -1430,15 +1454,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ERV_SOLID_ANGLE_VIEW, Count }; - SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = { SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value }; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = {SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value}; // - Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, core::matrix4SIMD(), 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); + Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, {}, 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); // mutables struct TRS // Source of truth { - float32_t3 translation{ 0.0f, 0.0f, 1.5f }; - float32_t3 rotation{ 0.0f }; // MUST stay orthonormal - float32_t3 scale{ 1.0f }; + float32_t3 translation{0.0f, 0.0f, 1.5f}; + float32_t3 rotation{0.0f}; // MUST stay orthonormal + float32_t3 scale{1.0f}; } m_TRS; float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS @@ -1447,9 +1471,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR TransformReturnInfo mainViewTransformReturnInfo; TransformReturnInfo solidAngleViewTransformReturnInfo; - const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; - const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; - const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; + const static inline core::vectorSIMDf cameraIntialPosition{-3.0f, 6.0f, 3.0f}; + const static inline core::vectorSIMDf cameraInitialTarget{0.f, 0.0f, 3.f}; + const static inline core::vectorSIMDf cameraInitialUp{0.f, 0.f, 1.f}; float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; float viewWidth = 10.f; @@ -1457,13 +1481,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; bool firstFrame = true; - SolidAngleVisualizer* m_visualizer; + SolidAngleVisualizer *m_visualizer; } interface; class SamplingBenchmark final { public: - SamplingBenchmark(SolidAngleVisualizer& base) + SamplingBenchmark(SolidAngleVisualizer &base) : m_api(base.m_api), m_device(base.m_device), m_logger(base.m_logger), m_visualizer(&base) { @@ -1478,15 +1502,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampAfterCmdBuff)) base.logFail("Failed to create Command Buffers!\n"); - // Load shaders, set up pipeline + // Load shaders, set up pipelines (one per sampling mode) { - smart_refctd_ptr shader; + auto loadShader = [&](auto key) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); - lp.workingDirectory = "app_resources"; // virtual root - // this time we load a shader directly from a file - auto key = nbl::this_example::builtin::build::get_spirv_key<"benchmark">(m_device.get()); + lp.workingDirectory = "app_resources"; auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) @@ -1494,21 +1516,28 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR base.logFail("Could not load shader!"); assert(0); } - - // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - shader = IAsset::castDown(assets[0]); - } + auto shader = IAsset::castDown(assets[0]); + if (!shader) + base.logFail("Failed to load precompiled benchmark shader!\n"); + return shader; + }; - if (!shader) - base.logFail("Failed to load precompiled \"benchmark\" shader!\n"); + smart_refctd_ptr shaders[SAMPLING_MODE::Count] = { + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_sa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_psa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_para">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_rectangle">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_biquad">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_bilinear">(m_device.get())), + }; nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { {.binding = 0, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1} }; + .count = 1}}; smart_refctd_ptr dsLayout = base.m_device->createDescriptorSetLayout(bindings); if (!dsLayout) base.logFail("Failed to create a Descriptor Layout!\n"); @@ -1516,24 +1545,25 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR SPushConstantRange pushConstantRanges[] = { {.stageFlags = ShaderStage::ESS_COMPUTE, .offset = 0, - .size = sizeof(BenchmarkPushConstants)} }; + .size = sizeof(BenchmarkPushConstants)}}; m_pplnLayout = base.m_device->createPipelineLayout(pushConstantRanges, smart_refctd_ptr(dsLayout)); if (!m_pplnLayout) base.logFail("Failed to create a Pipeline Layout!\n"); + for (uint32_t i = 0; i < SAMPLING_MODE::Count; i++) { IGPUComputePipeline::SCreationParams params = {}; params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!base.m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_pipeline)) + params.shader.shader = shaders[i].get(); + if (!base.m_device->createComputePipelines(nullptr, {¶ms, 1}, &m_pipelines[i])) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); } // Allocate the memory { constexpr size_t BufferSize = BENCHMARK_WORKGROUP_COUNT * BENCHMARK_WORKGROUP_DIMENSION_SIZE_X * - BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y * BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z * sizeof(uint32_t); + BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y * BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z * sizeof(uint32_t); nbl::video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -1551,15 +1581,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR base.logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); assert(dummyBuff->getBoundMemory().memory == m_allocation.memory.get()); - smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(), 1 }); + smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); m_ds = pool->createDescriptorSet(std::move(dsLayout)); { IGPUDescriptorSet::SDescriptorInfo info[1]; info[0].desc = smart_refctd_ptr(dummyBuff); - info[0].info.buffer = { .offset = 0, .size = BufferSize }; + info[0].info.buffer = {.offset = 0, .size = BufferSize}; IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info} }; + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; base.m_device->updateDescriptorSets(writes, {}); } } @@ -1578,15 +1608,23 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { m_logger->log("\n\nsampling benchmark result:", ILogger::ELL_PERFORMANCE); - m_logger->log("sampling benchmark, parallelogram projected solid angle result:", ILogger::ELL_PERFORMANCE); + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR); + + m_logger->log("sampling benchmark, PROJECTED_PARALLELOGRAM_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); performBenchmark(SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE); - m_logger->log("sampling benchmark, triangle solid angle result:", ILogger::ELL_PERFORMANCE); + m_logger->log("sampling benchmark, TRIANGLE_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); performBenchmark(SAMPLING_MODE::TRIANGLE_SOLID_ANGLE); - //m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); - //performBenchmark(SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE); - + // m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); + // performBenchmark(SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE); } private: @@ -1599,35 +1637,34 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR uint64_t semaphoreCounter = 0; smart_refctd_ptr semaphore = m_device->createSemaphore(semaphoreCounter); - IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - IQueue::SSubmitInfo::SSemaphoreInfo waits[] = { {.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + IQueue::SSubmitInfo::SSemaphoreInfo waits[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; IQueue::SSubmitInfo beforeTimestapSubmitInfo[1] = {}; - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = { {.cmdbuf = m_timestampBeforeCmdBuff.get()} }; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = {{.cmdbuf = m_timestampBeforeCmdBuff.get()}}; beforeTimestapSubmitInfo[0].commandBuffers = cmdbufsBegin; beforeTimestapSubmitInfo[0].signalSemaphores = signals; beforeTimestapSubmitInfo[0].waitSemaphores = waits; IQueue::SSubmitInfo afterTimestapSubmitInfo[1] = {}; - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsEnd[] = { {.cmdbuf = m_timestampAfterCmdBuff.get()} }; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsEnd[] = {{.cmdbuf = m_timestampAfterCmdBuff.get()}}; afterTimestapSubmitInfo[0].commandBuffers = cmdbufsEnd; afterTimestapSubmitInfo[0].signalSemaphores = signals; afterTimestapSubmitInfo[0].waitSemaphores = waits; IQueue::SSubmitInfo benchmarkSubmitInfos[1] = {}; - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = {{.cmdbuf = m_cmdbuf.get()}}; benchmarkSubmitInfos[0].commandBuffers = cmdbufs; benchmarkSubmitInfos[0].signalSemaphores = signals; benchmarkSubmitInfos[0].waitSemaphores = waits; - m_pushConstants.benchmarkMode = mode; m_pushConstants.modelMatrix = float32_t3x4(transpose(m_visualizer->interface.m_OBBModelMatrix)); - recordCmdBuff(); + m_pushConstants.sampleCount = m_SampleCount; + recordCmdBuff(mode); // warmup runs for (int i = 0; i < WarmupIterations; ++i) { - if (i == 0) m_api->startCapture(); waits[0].value = semaphoreCounter; @@ -1661,11 +1698,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR m_logger->log("%llu ns, %f s", ILogger::ELL_PERFORMANCE, nativeBenchmarkTimeElapsedNanoseconds, nativeBenchmarkTimeElapsedSeconds); } - void recordCmdBuff() + void recordCmdBuff(SAMPLING_MODE mode) { m_cmdbuf->begin(IGPUCommandBuffer::USAGE::SIMULTANEOUS_USE_BIT); m_cmdbuf->beginDebugMarker("sampling compute dispatch", vectorSIMDf(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindComputePipeline(m_pipelines[mode].get()); m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); m_cmdbuf->pushConstants(m_pplnLayout.get(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(BenchmarkPushConstants), &m_pushConstants); m_cmdbuf->dispatch(BENCHMARK_WORKGROUP_COUNT, 1, 1); @@ -1707,7 +1744,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR core::smart_refctd_ptr m_api; smart_refctd_ptr m_device; smart_refctd_ptr m_logger; - SolidAngleVisualizer* m_visualizer; + SolidAngleVisualizer *m_visualizer; nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; smart_refctd_ptr m_cmdpool = nullptr; @@ -1715,20 +1752,20 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_ds = nullptr; smart_refctd_ptr m_pplnLayout = nullptr; BenchmarkPushConstants m_pushConstants; - smart_refctd_ptr m_pipeline; + smart_refctd_ptr m_pipelines[SAMPLING_MODE::Count]; smart_refctd_ptr m_timestampBeforeCmdBuff = nullptr; smart_refctd_ptr m_timestampAfterCmdBuff = nullptr; smart_refctd_ptr m_queryPool = nullptr; uint32_t m_queueFamily; - IQueue* m_computeQueue; + IQueue *m_computeQueue; static constexpr int WarmupIterations = 50; static constexpr int Iterations = 1; }; template - inline bool logFail(const char* msg, Args &&...args) + inline bool logFail(const char *msg, Args &&...args) { m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); return false; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 782c8b624..8fadbd866 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -16,8 +16,8 @@ #include #include -class Camera -{ +class Camera +{ public: Camera() = default; Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::hlsl::float32_t4x4& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) @@ -72,7 +72,7 @@ class Camera inline void mapKeysCustom(std::array& map) { keysMap = map; } inline const nbl::hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } - inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } + inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } inline const nbl::hlsl::float32_t4x4& getConcatenatedMatrix() const { return concatMatrix; } inline void setProjectionMatrix(const nbl::hlsl::float32_t4x4& projection) @@ -81,16 +81,16 @@ class Camera leftHanded = nbl::hlsl::determinant(projMatrix) < 0.f; concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } - + inline void setPosition(const nbl::core::vectorSIMDf& pos) { position.set(pos); recomputeViewMatrix(); } - + inline const nbl::core::vectorSIMDf& getPosition() const { return position; } - inline void setTarget(const nbl::core::vectorSIMDf& pos) + inline void setTarget(const nbl::core::vectorSIMDf& pos) { target.set(pos); recomputeViewMatrix(); @@ -99,11 +99,11 @@ class Camera inline const nbl::core::vectorSIMDf& getTarget() const { return target; } inline void setUpVector(const nbl::core::vectorSIMDf& up) { upVector = up; } - + inline void setBackupUpVector(const nbl::core::vectorSIMDf& up) { backupUpVector = up; } inline const nbl::core::vectorSIMDf& getUpVector() const { return upVector; } - + inline const nbl::core::vectorSIMDf& getBackupUpVector() const { return backupUpVector; } inline const float getMoveSpeed() const { return moveSpeed; } @@ -114,7 +114,7 @@ class Camera inline void setRotateSpeed(const float _rotateSpeed) { rotateSpeed = _rotateSpeed; } - inline void recomputeViewMatrix() + inline void recomputeViewMatrix() { nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(position).xyz; nbl::hlsl::float32_t3 localTarget = nbl::hlsl::normalize(nbl::core::convertToHLSLVector(target).xyz - pos); @@ -144,64 +144,78 @@ class Camera void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { auto ev = *eventIt; - if(ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) - if(ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) + if (ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) + if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) mouseDown = true; else if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_RELEASED) mouseDown = false; - if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) + if (ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { - nbl::hlsl::float32_t4 pos = nbl::core::convertToHLSLVector(getPosition()); - nbl::hlsl::float32_t4 localTarget = nbl::core::convertToHLSLVector(getTarget()) - pos; - - // Get Relative Rotation for localTarget in Radians - float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.x, localTarget.z); - const double z1 = nbl::core::sqrt(localTarget.x*localTarget.x + localTarget.z*localTarget.z); - relativeRotationX = atan2(z1, localTarget.y) - nbl::core::PI()/2; - - constexpr float RotateSpeedScale = 0.003f; - relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; - float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; - + // --- corrected camera rotation update --- + nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(getPosition()).xyz; + nbl::hlsl::float32_t3 targetVec = nbl::core::convertToHLSLVector(getTarget()).xyz - pos; // original vector to target + + // preserve distance so we don't collapse to unit length + float targetDistance = nbl::hlsl::length(targetVec); + if (targetDistance < 1e-6f) targetDistance = 1.0f; // avoid div-by-zero + + nbl::hlsl::float32_t3 forward = nbl::hlsl::normalize(targetVec); + nbl::hlsl::float32_t3 upVector = nbl::core::convertToHLSLVector(getUpVector()).xyz; + nbl::hlsl::float32_t3 right = nbl::hlsl::normalize(nbl::hlsl::cross(upVector, forward)); + nbl::hlsl::float32_t3 correctedForward = nbl::hlsl::normalize(nbl::hlsl::cross(right, upVector)); + + // horizontal yaw (angle from correctedForward towards right) + float rightDot = nbl::hlsl::dot(targetVec, right); + float forwardDot = nbl::hlsl::dot(targetVec, correctedForward); + float relativeRotationY = atan2(rightDot, forwardDot); + + // pitch: angle above/below horizontal + float upDot = nbl::hlsl::dot(targetVec, upVector); + nbl::hlsl::float32_t3 horizontalComponent = targetVec - upVector * upDot; + float horizontalLength = nbl::hlsl::length(horizontalComponent); + float relativeRotationX = atan2(upDot, horizontalLength); + + // apply mouse/controller deltas (signs simplified) + constexpr float RotateSpeedScale = 0.003f; + relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale; + float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale; if (leftHanded) - yawDelta = -yawDelta; + relativeRotationY += tmpYRot; + else + relativeRotationY -= tmpYRot; - // Clamp pitch BEFORE applying rotation + // clamp pitch const float MaxVerticalAngle = nbl::core::radians(88.0f); - float currentPitch = asin(nbl::core::dot(forward, upVector).X); - float newPitch = nbl::core::clamp(currentPitch + pitchDelta, -MaxVerticalAngle, MaxVerticalAngle); - pitchDelta = newPitch - currentPitch; - - // Create rotation quaternions using axis-angle method - nbl::core::quaternion pitchRot = nbl::core::quaternion::fromAngleAxis(pitchDelta, right); - nbl::core::quaternion yawRot = nbl::core::quaternion::fromAngleAxis(yawDelta, upVector); - nbl::core::quaternion combinedRot = yawRot * pitchRot; - - pos.w = 0; - localTarget = nbl::hlsl::float32_t4(0, 0, nbl::core::max(1.f, nbl::hlsl::length(pos)), 1.0f); - - const nbl::hlsl::math::quaternion quat = nbl::hlsl::math::quaternion::create(relativeRotationX, relativeRotationY, 0.0f); - nbl::hlsl::float32_t3x4 mat = nbl::hlsl::math::linalg::promote_affine<3, 4, 3, 3>(quat.__constructMatrix()); + if (relativeRotationX > MaxVerticalAngle) relativeRotationX = MaxVerticalAngle; + if (relativeRotationX < -MaxVerticalAngle) relativeRotationX = -MaxVerticalAngle; + // build final direction by first yaw-rotating in the horizontal plane, then pitching + float cosYaw = cos(relativeRotationY); + float sinYaw = sin(relativeRotationY); + nbl::hlsl::float32_t3 yawForward = correctedForward * cosYaw + right * sinYaw; + yawForward = nbl::hlsl::normalize(yawForward); - localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 1.0f); + float cosPitch = cos(relativeRotationX); + float sinPitch = sin(relativeRotationX); + nbl::hlsl::float32_t3 finalDir = nbl::hlsl::normalize(yawForward * cosPitch + upVector * sinPitch); - nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(localTarget + pos); + // restore original distance and set target + nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(pos + finalDir * targetDistance); finalTarget.w = 1.0f; setTarget(finalTarget); + } } } void keyboardProcess(const nbl::ui::IKeyboardEventChannel::range_t& events) { - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) perActionDt[k] = 0.0; /* @@ -210,8 +224,8 @@ class Camera * And If an UP event was sent It will get subtracted it from this value. (Currently Disabled Because we Need better Oracle) */ - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) - if(keysDown[k]) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + if (keysDown[k]) { auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - lastVirtualUpTimeStamp).count(); if (timeDiff < 0) @@ -219,10 +233,10 @@ class Camera perActionDt[k] += timeDiff; } - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { const auto ev = *eventIt; - + // accumulate the periods for which a key was down auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); if (timeDiff < 0) @@ -235,12 +249,12 @@ class Camera if (ev.keyCode == code) { - if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) + if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) { perActionDt[logicalKey] += timeDiff; keysDown[logicalKey] = true; } - else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) { // perActionDt[logicalKey] -= timeDiff; keysDown[logicalKey] = false; @@ -264,7 +278,7 @@ class Camera nextPresentationTimeStamp = _nextPresentationTimeStamp; return; } - + void endInputProcessing(std::chrono::microseconds _nextPresentationTimeStamp) { nbl::core::vectorSIMDf pos = getPosition(); @@ -276,13 +290,12 @@ class Camera movedir.makeSafe3D(); movedir = nbl::core::normalize(movedir); - constexpr float MoveSpeedScale = 0.02f; + constexpr float MoveSpeedScale = 0.02f; pos += movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_FORWARD] * moveSpeed * MoveSpeedScale; pos -= movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_BACKWARD] * moveSpeed * MoveSpeedScale; - // strafing - + // if upvector and vector to the target are the same, we have a // problem. so solve this problem: nbl::core::vectorSIMDf up = nbl::core::normalize(upVector); @@ -293,9 +306,11 @@ class Camera up = nbl::core::normalize(backupUpVector); } - pos += up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; - pos -= up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + nbl::core::vectorSIMDf currentUp = nbl::core::normalize(nbl::core::cross(localTarget, nbl::core::cross(up, localTarget))); + pos += currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; + pos -= currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + // strafing nbl::core::vectorSIMDf strafevect = localTarget; if (leftHanded) strafevect = nbl::core::cross(strafevect, up); @@ -311,7 +326,7 @@ class Camera firstUpdate = false; setPosition(pos); - setTarget(localTarget+pos); + setTarget(localTarget + pos); lastVirtualUpTimeStamp = nextPresentationTimeStamp; } @@ -324,10 +339,10 @@ class Camera private: inline void initDefaultKeysMap() { mapKeysToWASD(); } - - inline void allKeysUp() + + inline void allKeysUp() { - for (uint32_t i=0; i< E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) + for (uint32_t i = 0; i < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) keysDown[i] = false; mouseDown = false; @@ -340,7 +355,7 @@ class Camera float moveSpeed, rotateSpeed; bool leftHanded, firstUpdate = true, mouseDown = false; - + std::array keysMap = { {nbl::ui::EKC_NONE} }; // map camera E_CAMERA_MOVE_KEYS to corresponding Nabla key codes, by default camera uses WSAD to move // TODO: make them use std::array bool keysDown[E_CAMERA_MOVE_KEYS::ECMK_COUNT] = {};