diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index 55ebaf41d..6e90f86cb 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index fe063be7c..b5648de8f 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/comp_shader.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index cb3c21f4d..6e34a9064 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include #include "common.hpp" @@ -73,7 +74,7 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes { core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x)/float(m_initialResolution.y), 0.1, 10000); + float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(m_initialResolution.x) / m_initialResolution.y, 0.1f, 10000.0f); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } @@ -139,13 +140,8 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); - memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); - } + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); // tear down scene every frame @@ -251,7 +247,7 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes InputSystem::ChannelReader keyboard; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); uint16_t gcIndex = {}; @@ -270,4 +266,4 @@ class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinRes } }; -NBL_MAIN_FUNC(GeometryCreatorApp) \ No newline at end of file +NBL_MAIN_FUNC(GeometryCreatorApp) diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt index 14bde428d..1c23744fe 100644 --- a/10_CountingSort/CMakeLists.txt +++ b/10_CountingSort/CMakeLists.txt @@ -24,13 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/prefix_sum_shader.comp.hlsl - app_resources/scatter_shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(REQUIRED_CAPS [=[ @@ -72,7 +65,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt index ca9fe8428..6b6304ed8 100644 --- a/11_FFT/CMakeLists.txt +++ b/11_FFT/CMakeLists.txt @@ -24,12 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -50,7 +44,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -66,4 +59,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/12_MeshLoaders/CMakeLists.txt b/12_MeshLoaders/CMakeLists.txt index d2ea26ef5..709b7d40b 100644 --- a/12_MeshLoaders/CMakeLists.txt +++ b/12_MeshLoaders/CMakeLists.txt @@ -17,5 +17,10 @@ nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_L # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) +if (NBL_BUILD_DEBUG_DRAW) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) +endif() + + add_dependencies(${EXECUTABLE_NAME} argparse) target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp index d80fa8998..e27ed4be0 100644 --- a/12_MeshLoaders/main.cpp +++ b/12_MeshLoaders/main.cpp @@ -5,22 +5,33 @@ #include "common.hpp" #include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" +#include #ifdef NBL_BUILD_MITSUBA_LOADER #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" #endif +#ifdef NBL_BUILD_DEBUG_DRAW +#include "nbl/ext/DebugDraw/CDrawAABB.h" +#endif + class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourcesApplication { using device_base_t = MonoWindowApplication; using asset_base_t = BuiltinResourcesApplication; -public: - inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({ 1280,720 }, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) - { - } + enum DrawBoundingBoxMode + { + DBBM_NONE, + DBBM_AABB, + DBBM_OBB, + DBBM_COUNT + }; + + public: + inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} inline bool onAppInitialized(smart_refctd_ptr&& system) override { @@ -88,9 +99,23 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc if (!m_renderer) return logFail("Failed to create renderer!"); - // - if (!reloadModel()) - return false; +#ifdef NBL_BUILD_DEBUG_DRAW + { + auto* renderpass = scRes->getRenderpass(); + ext::debug_draw::DrawAABB::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.transfer = getTransferUpQueue(); + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BATCH; + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; + m_drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); + } +#endif + + // + if (!reloadModel()) + return false; camera.mapKeysToArrows(); @@ -131,48 +156,57 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc }; cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - const SViewport viewport = { - .x = static_cast(currentRenderArea.offset.x), - .y = static_cast(currentRenderArea.offset.y), - .width = static_cast(currentRenderArea.extent.width), - .height = static_cast(currentRenderArea.extent.height) - }; - cb->setViewport(0u, 1u, &viewport); - - cb->setScissor(0u, 1u, ¤tRenderArea); - } - // late latch input - { - bool reload = false; - camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { - for (const auto& event : events) - if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) - reload = true; - camera.keyboardProcess(events); - }, - m_logger.get() - ); - camera.endInputProcessing(nextPresentationTimestamp); - if (reload) - reloadModel(); - } - // draw scene - { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices + const SViewport viewport = { + .x = static_cast(currentRenderArea.offset.x), + .y = static_cast(currentRenderArea.offset.y), + .width = static_cast(currentRenderArea.extent.width), + .height = static_cast(currentRenderArea.extent.height) + }; + cb->setViewport(0u,1u,&viewport); + + cb->setScissor(0u,1u,¤tRenderArea); + } + // late latch input + { + bool reload = false; + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + for (const auto& event : events) + { + if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + if (event.keyCode == E_KEY_CODE::EKC_B && event.action == SKeyboardEvent::ECA_RELEASED) + { + m_drawBBMode = DrawBoundingBoxMode((m_drawBBMode + 1) % DBBM_COUNT); + } + } + camera.keyboardProcess(events); + }, + m_logger.get() + ); + camera.endInputProcessing(nextPresentationTimestamp); + if (reload) + reloadModel(); + } + // draw scene + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); + m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); +#ifdef NBL_BUILD_DEBUG_DRAW + if (m_drawBBMode != DBBM_NONE) { - memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); - memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + ext::debug_draw::DrawAABB::DrawParameters drawParams; + drawParams.commandBuffer = cb; + drawParams.cameraMat = viewProjMatrix; + m_drawAABB->render(drawParams, drawFinished, m_drawBBMode == DBBM_OBB ? m_obbInstances : m_aabbInstances); } - m_renderer->render(cb, CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix)); +#endif + cb->endRenderPass(); } - cb->endRenderPass(); - } - cb->end(); + cb->end(); IQueue::SSubmitInfo::SSemaphoreInfo retval = { @@ -410,36 +444,70 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc cpar.utilities = m_utils.get(); cpar.transfer = &transfer; - // basically it records all data uploads and submits them right away - auto future = reservation.convert(cpar); - if (future.copy() != IQueue::RESULT::SUCCESS) + // basically it records all data uploads and submits them right away + auto future = reservation.convert(cpar); + if (future.copy()!=IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + } + + auto tmp = hlsl::float32_t4x3( + hlsl::float32_t3(1,0,0), + hlsl::float32_t3(0,1,0), + hlsl::float32_t3(0,0,1), + hlsl::float32_t3(0,0,0) + ); + core::vector worldTforms; + const auto& converted = reservation.getGPUObjects(); + m_aabbInstances.resize(converted.size()); + m_obbInstances.resize(converted.size()); + for (uint32_t i = 0; i < converted.size(); i++) { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; + const auto& geom = converted[i]; + const auto promoted = geom.value->getAABB(); + printAABB(promoted,"Geometry"); + tmp[3].x += promoted.getExtent().x; + const auto promotedWorld = hlsl::float64_t3x4(worldTforms.emplace_back(hlsl::transpose(tmp))); + const auto transformed = hlsl::shapes::util::transform(promotedWorld,promoted); + printAABB(transformed,"Transformed"); + bound = hlsl::shapes::util::union_(transformed,bound); + +#ifdef NBL_BUILD_DEBUG_DRAW + + auto& aabbInst = m_aabbInstances[i]; + const auto tmpAabb = shapes::AABB<3,float>(promoted.minVx, promoted.maxVx); + + hlsl::float32_t3x4 aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(tmpAabb); + const auto tmpWorld = hlsl::float32_t3x4(promotedWorld); + const auto world4x4 = float32_t4x4{ + tmpWorld[0], + tmpWorld[1], + tmpWorld[2], + float32_t4(0, 0, 0, 1) + }; + + aabbInst.color = { 1,1,1,1 }; + aabbInst.transform = math::linalg::promoted_mul(world4x4, aabbTransform); + + auto& obbInst = m_obbInstances[i]; + const auto& cpuGeom = geometries[i].get(); + const auto obb = CPolygonGeometryManipulator::calculateOBB( + cpuGeom->getPositionView().getElementCount(), + [geo = cpuGeom, &world4x4](size_t vertex_i) { + hlsl::float32_t3 pt; + geo->getPositionView().decodeElement(vertex_i, pt); + return pt; + }); + obbInst.color = { 0, 0, 1, 1 }; + obbInst.transform = math::linalg::promoted_mul(world4x4, obb.transform); +#endif } - } - auto tmp = hlsl::float32_t4x3( - hlsl::float32_t3(1, 0, 0), - hlsl::float32_t3(0, 1, 0), - hlsl::float32_t3(0, 0, 1), - hlsl::float32_t3(0, 0, 0) - ); - core::vector worldTforms; - const auto& converted = reservation.getGPUObjects(); - for (const auto& geom : converted) - { - const auto promoted = geom.value->getAABB(); - printAABB(promoted, "Geometry"); - tmp[3].x += promoted.getExtent().x; - const auto promotedWorld = hlsl::float64_t3x4(worldTforms.emplace_back(hlsl::transpose(tmp))); - const auto transformed = hlsl::shapes::util::transform(promotedWorld, promoted); - printAABB(transformed, "Transformed"); - bound = hlsl::shapes::util::union_(transformed, bound); - } - printAABB(bound, "Total"); - if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) - return false; + printAABB(bound,"Total"); + if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) + return false; auto worlTformsIt = worldTforms.begin(); for (const auto& geo : m_renderer->getGeometries()) @@ -456,7 +524,7 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc { const auto measure = hlsl::length(diagonal); const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); - camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); + camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); camera.setMoveSpeed(measure * 0.04); } const auto pos = bound.maxVx + diagonal * distance; @@ -492,10 +560,18 @@ class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourc InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables std::string m_modelPath; + DrawBoundingBoxMode m_drawBBMode; +#ifdef NBL_BUILD_DEBUG_DRAW + smart_refctd_ptr m_drawAABB; + std::vector m_aabbInstances; + std::vector m_obbInstances; + +#endif + bool m_saveGeom = false; std::future m_saveGeomTaskFuture; std::optional m_specifiedGeomSavePath; diff --git a/14_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt index a434ff32a..8229b36b5 100644 --- a/14_Mortons/CMakeLists.txt +++ b/14_Mortons/CMakeLists.txt @@ -21,4 +21,52 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }, + { + "INPUT": "app_resources/test2.comp.hlsl", + "KEY": "test2", + }, +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 6933e77e5..2e81ef564 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -4,518 +4,490 @@ #include #include "app_resources/testCommon.hlsl" #include "app_resources/testCommon2.hlsl" -#include "ITester.h" +#include "nbl/examples/Tester/ITester.h" using namespace nbl; -class CTester final : public ITester +class CTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); +private: + InputTestValues generateInputTestValues() override + { std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - uint64_t generatedA = longDistribution(mt); - uint64_t generatedB = longDistribution(mt); - - testInput.generatedA = generatedA; - testInput.generatedB = generatedB; - - expected.emulatedAnd = _static_cast(generatedA & generatedB); - expected.emulatedOr = _static_cast(generatedA | generatedB); - expected.emulatedXor = _static_cast(generatedA ^ generatedB); - expected.emulatedNot = _static_cast(~generatedA); - expected.emulatedPlus = _static_cast(generatedA + generatedB); - expected.emulatedMinus = _static_cast(generatedA - generatedB); - expected.emulatedUnaryMinus = _static_cast(-generatedA); - expected.emulatedLess = uint32_t(generatedA < generatedB); - expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); - expected.emulatedGreater = uint32_t(generatedA > generatedB); - expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); - - expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); - expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); - expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); - } - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); - - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - - uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - - uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - - uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them - // so their highest bits are all 0s or 1s depending on the sign of the number they encode - - int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - // Plus - expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); - expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); - expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); - expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); - - expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); - expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); - expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); - expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); - - expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); - expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); - expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); - expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); - - // Minus - expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); - expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); - expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); - expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); - - expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); - expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); - expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); - expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); - - expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); - expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); - expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); - expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); - - // Coordinate-wise equality - expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); - expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); - expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - - expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); - expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); - expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - - expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); - expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); - expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); - expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); - expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - - expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); - expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); - expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - - expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); - expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); - expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - // Coordinate-wise signed inequality - expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); - expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); - expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - - expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); - expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); - expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - - expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); - expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); - expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(generatedShift); - // Left-shift - expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); - expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); - expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); - expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); - - expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); - expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); - expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); - expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); - - expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); - expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); - expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); - expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); - - // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); - - expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); - - expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); - - // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; -private: - inline static constexpr int Iterations = 100u; + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues cpuTestValues; + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); + testInput.shift = generatedShift; - fillTestValues(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); + return testInput; } - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + TestValues determineExpectedResults(const InputTestValues& testInput) override { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + { + const uint64_t generatedA = testInput.generatedA; + const uint64_t generatedB = testInput.generatedB; + const uint32_t generatedShift = testInput.shift; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Plus + expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(testInput.shift); + // Left-shift + expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); + expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); + expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + + expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); + expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); + expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + + expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); + expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); + expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + } + + return expected; } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + bool verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { + bool pass = true; // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); - verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); + pass &= verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); + pass &= verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); + pass &= verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); + pass &= verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType); + pass &= verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType); + pass &= verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType); + pass &= verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType); + pass &= verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType); + pass &= verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); + pass &= verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType); // Morton Plus - verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); - verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); - verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); - verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); - - verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); + pass &= verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); + pass &= verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); + // Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); - - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); - - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); - - // Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); - verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType); - - // Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); - // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); - - // Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); - // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); - - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); - // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); - - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); - - // Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); - - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); - - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); - - // Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); - - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); - - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); - - // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); - // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); - //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); - // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); - } -}; + pass &= verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); -// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. -class CTester2 final : public ITester -{ -public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + pass &= verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); - std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); - std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + pass &= verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; + // Morton coordinate-wise equality + pass &= verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); + + pass &= verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); + + pass &= verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType); - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); + // Morton coordinate-wise unsigned inequality + pass &= verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + pass &= verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + pass &= verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + // Morton coordinate-wise signed inequality + pass &= verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + pass &= verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + pass &= verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + // Morton left-shift + pass &= verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + pass &= verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); - expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + pass &= verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); - uint16_t castedShift = uint16_t(generatedShift); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + // Morton unsigned right-shift + pass &= verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); - } + pass &= verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + pass &= verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); + + // Morton signed right-shift + pass &= verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + + pass &= verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); + //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + + pass &= verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); + return pass; } +}; -private: - inline static constexpr int Iterations = 100u; +// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. +class CTester2 final : public ITester +{ + using base_t = ITester; +public: + CTester2(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) +private: + InputTestValues generateInputTestValues() override { - TestValues cpuTestValues; + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - fillTestValues2(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; - } + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); + testInput.shift = generatedShift; + + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); + + return testInput; } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + TestValues determineExpectedResults(const InputTestValues& testInput) override { + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + const uint32_t generatedShift = testInput.shift; + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); - verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); - verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + + return expected; + } + + bool verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); + + pass &= verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); + + pass &= verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + pass &= verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); + return pass; } }; #endif \ No newline at end of file diff --git a/14_Mortons/ITester.h b/14_Mortons/ITester.h deleted file mode 100644 index 3be6d1d6b..000000000 --- a/14_Mortons/ITester.h +++ /dev/null @@ -1,279 +0,0 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ - -#include -#include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" - -using namespace nbl; - -class ITester -{ -public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - - struct PipelineSetupData - { - std::string testShaderPath; - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - template - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return logFail("Could not load shader!"); - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - - shader = m_device->compileShader({source.get()}); - } - - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - constexpr size_t BufferSize = sizeof(InputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - constexpr size_t BufferSize = sizeof(OutputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - enum class TestType - { - CPU, - GPU - }; - - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) - { - if (expectedVal == testVal) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - -protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - template - OutputStruct dispatch(const InputStruct& input) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); - m_device->waitIdle(); - - return output; - } - -private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } -}; - -#endif \ No newline at end of file diff --git a/14_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl index 60cdf94b1..2a2c465f4 100644 --- a/14_Mortons/app_resources/test.comp.hlsl +++ b/14_Mortons/app_resources/test.comp.hlsl @@ -1,17 +1,20 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) #include "testCommon.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(1, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] -void main(uint3 invocationID : SV_DispatchThreadID) +void main() { - uint32_t testID = glsl::gl_GlobalInvocationID().x; - fillTestValues(inputTestValues[testID], outputTestValues[testID]); + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); } + diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl index 30b998f49..8561faf83 100644 --- a/14_Mortons/app_resources/test2.comp.hlsl +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -1,17 +1,20 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) #include "testCommon2.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(1, 1, 1)] +[numthreads(256, 1, 1)] [shader("compute")] -void main(uint3 invocationID : SV_DispatchThreadID) +void main() { - uint32_t testID = glsl::gl_GlobalInvocationID().x; - fillTestValues2(inputTestValues[testID], outputTestValues[testID]); + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TestExecutor2 executor; + executor(inputTestValues[invID], outputTestValues[invID]); } + diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index 6144b6ce9..b285bd8cd 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -1,295 +1,297 @@ #include "common.hlsl" - -void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +struct TestExecutor { - emulated_uint64_t emulatedA = _static_cast(input.generatedA); - emulated_uint64_t emulatedB = _static_cast(input.generatedB); - emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); - - // Emulated int tests - output.emulatedAnd = emulatedA & emulatedB; - output.emulatedOr = emulatedA | emulatedB; - output.emulatedXor = emulatedA ^ emulatedB; - output.emulatedNot = emulatedA.operator~(); - output.emulatedPlus = emulatedA + emulatedB; - output.emulatedMinus = emulatedA - emulatedB; - output.emulatedLess = uint32_t(emulatedA < emulatedB); - output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); - output.emulatedGreater = uint32_t(emulatedA > emulatedB); - output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); - - left_shift_operator leftShift; - output.emulatedLeftShifted = leftShift(emulatedA, input.shift); - - arithmetic_right_shift_operator unsignedRightShift; - output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); - - arithmetic_right_shift_operator signedRightShift; - output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); - - output.emulatedUnaryMinus = signedEmulatedA.operator-(); - - // Morton tests - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); - - morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); - - morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); - - morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); - - morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); - - morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - - // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. - // Plus - output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; - output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; - output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; - output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; - - output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; - output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; - output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; - output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; - - output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; - output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; - output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; - output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - - // Minus - output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; - output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; - output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; - output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; - - output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; - output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; - output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; - output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; - - output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; - output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; - output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; - output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - - // Coordinate-wise equality - output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); - output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); - output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); - output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); - - output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); - output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); - output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); - output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); - - output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); - output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); - output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); - output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); - output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); - output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); - - output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); - output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); - output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); - output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); - - output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); - output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); - output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); - // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - - // Coordinate-wise signed inequality - output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); - output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); - output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); - // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); - - output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); - output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); - output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); - // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); - - output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); - output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); - output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); - // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); - - // Cast to uint16_t which is what left shift for Mortons expect - uint16_t castedShift = uint16_t(input.shift); - // Each left shift clamps to correct bits so the result kinda makes sense - // Left-shift - left_shift_operator > leftShiftSmall2; - output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); - left_shift_operator > leftShiftMedium2; - output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - left_shift_operator > leftShiftFull2; - output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); - left_shift_operator > leftShiftEmulated2; - output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - left_shift_operator > leftShiftSmall3; - output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); - left_shift_operator > leftShiftMedium3; - output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - left_shift_operator > leftShiftFull3; - output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); - left_shift_operator > leftShiftEmulated3; - output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - left_shift_operator > leftShiftSmall4; - output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); - left_shift_operator > leftShiftMedium4; - output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - left_shift_operator > leftShiftFull4; - output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); - left_shift_operator > leftShiftEmulated4; - output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Unsigned right-shift - arithmetic_right_shift_operator > rightShiftSmall2; - output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftMedium2; - output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftFull2; - output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftEmulated2; - output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSmall3; - output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftMedium3; - output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftFull3; - output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftEmulated3; - output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSmall4; - output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftMedium4; - output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftFull4; - output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); - arithmetic_right_shift_operator > rightShiftEmulated4; - output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Signed right-shift - arithmetic_right_shift_operator > rightShiftSignedSmall2; - output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftSignedMedium2; - output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftSignedFull2; - output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); - // arithmetic_right_shift_operator > rightShiftSignedEmulated2; - // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSignedSmall3; - output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftSignedMedium3; - output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftSignedFull3; - output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); - // arithmetic_right_shift_operator > rightShiftSignedEmulated3; - // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSignedSmall4; - output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftSignedMedium4; - output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftSignedFull4; - output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); - // arithmetic_right_shift_operator > rightShiftSignedEmulated4; - // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); - -} \ No newline at end of file + void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) + { + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); + + output.emulatedUnaryMinus = signedEmulatedA.operator-(); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); + + morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); + + morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); + + morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + + morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + + morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); + // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); + // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); + // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); + // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + // arithmetic_right_shift_operator > rightShiftSignedEmulated2; + // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + // arithmetic_right_shift_operator > rightShiftSignedEmulated3; + // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + // arithmetic_right_shift_operator > rightShiftSignedEmulated4; + // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); + + } +}; diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl index 365b82340..5c2a953ac 100644 --- a/14_Mortons/app_resources/testCommon2.hlsl +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -1,39 +1,42 @@ #include "common.hlsl" -void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +struct TestExecutor2 { - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - - - output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - - output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); - output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); - output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(input.shift); - - arithmetic_right_shift_operator > rightShiftSignedEmulated2; - output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftSignedEmulated3; - output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftSignedEmulated4; - output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); -} + void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) + { + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(input.shift); + + arithmetic_right_shift_operator > rightShiftSignedEmulated2; + output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftSignedEmulated3; + output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftSignedEmulated4; + output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); + } +}; diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index 12f55805f..d995b8109 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -4,6 +4,8 @@ #include #include +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" @@ -34,29 +36,39 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - - CTester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 + bool pass = true; + { + CTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"test">(m_device.get()); + + CTester mortonTester(4); // 4 * 128 = 512 tests + mortonTester.setupPipeline(pplnSetupData); + pass &= mortonTester.performTestsAndVerifyResults("MortonTestLog.txt"); + } { - CTester mortonTester; - pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; - mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTests(); + CTester2::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"test2">(m_device.get()); - CTester2 mortonTester2; - pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; - mortonTester2.setupPipeline(pplnSetupData); - mortonTester2.performTests(); + CTester2 mortonTester2(4); + mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); + pass &= mortonTester2.performTestsAndVerifyResults("MortonTestLog2.txt"); } - return true; + return pass; } void onAppTerminated_impl() override diff --git a/15_MitsubaLoader/CMakeLists.txt b/15_MitsubaLoader/CMakeLists.txt new file mode 100644 index 000000000..3921c61d9 --- /dev/null +++ b/15_MitsubaLoader/CMakeLists.txt @@ -0,0 +1,9 @@ +list(APPEND NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" +) +list(APPEND NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" +) + + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") diff --git a/15_MitsubaLoader/main.cpp b/15_MitsubaLoader/main.cpp new file mode 100644 index 000000000..2bd96ce16 --- /dev/null +++ b/15_MitsubaLoader/main.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" + +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +// Testing our Mitsuba Loader +class MitsubaLoaderTest final : public BuiltinResourcesApplication +{ + using base_t = BuiltinResourcesApplication; + + bool test(const system::path& listPath) + { + smart_refctd_ptr file; + { + ISystem::future_t> future; + using create_flags_t = IFileBase::E_CREATE_FLAGS; + m_system->createFile(future,listPath,create_flags_t::ECF_READ|create_flags_t::ECF_MAPPABLE); + if (!future.wait()) + return logFail("Failed to list of scenes to test with path %s",listPath.string().c_str()); + smart_refctd_ptr tmp; + future.acquire().move_into(tmp); + file = std::move(tmp); + } + if (!file) + return logFail("Failed to open list of scenes to test with path %s",listPath.string().c_str()); + + const auto base = file->getFileName().parent_path(); + const void* const ptr = file->getMappedPointer(); + const auto end = reinterpret_cast(ptr)+file->getSize(); + for (auto cursor=reinterpret_cast(ptr); cursor(std::isspace)); + if (cursor==end) + break; + auto nextLine = [&]()->const char* + { + constexpr std::array newlines = {'\r','\n'}; + auto retval = std::find_first_of(cursor,end,newlines.begin(),newlines.end()); + while (++retvalgetAsset(relPath,params); + if (asset.getContents().empty() || asset.getAssetType()!=IAsset::E_TYPE::ET_SCENE) + return logFail("Failed To Load %s",relPath.c_str()); + m_logger->log("Loaded %s",ILogger::ELL_INFO,relPath.c_str()); + // TODO: print True Material IR + // so we don't run out of RAM during testing + m_assetMgr->clearAllAssetCache(); + } + else if (*cursor!=';') + { + const char chr[2] = {*cursor,0}; + cursor = std::find(cursor,cursorEnd,'\"'); + if (cursor==cursorEnd) + return logFail("Parser Error, encountered unsupprted character %s near line start",chr); + } + cursor = cursorEnd; + } + return true; + } + + public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + MitsubaLoaderTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!base_t::onAppInitialized(std::move(system))) + return false; + + m_assetMgr->addAssetLoader(make_smart_refctd_ptr(core::smart_refctd_ptr(m_system))); + // some of our test scenes won't load without the `.serialized` support + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + + // public batch + if (!test(localInputCWD/"test_scenes.txt")) + return false; + if (!test(sharedInputCWD/"Ditt-Reference-Scenes/private_test_scenes.txt")) + return false; + + return true; + } + + // One-shot App + bool keepRunning() override { return false; } + + // One-shot App + void workLoopBody() override {} + + // Cleanup + bool onAppTerminated() override + { + return base_t::onAppTerminated(); + } +}; + + +NBL_MAIN_FUNC(MitsubaLoaderTest) \ No newline at end of file diff --git a/15_MitsubaLoader/test_scenes.txt b/15_MitsubaLoader/test_scenes.txt new file mode 100644 index 000000000..a5876752c --- /dev/null +++ b/15_MitsubaLoader/test_scenes.txt @@ -0,0 +1,30 @@ +; Here is my Commented line that batch file will skip (started with semicolons) +"../media/mitsuba/shapetest.xml" +"../media/mitsuba/daily_pt.xml" +"../media/mitsuba/brdf_eval_test.xml" +"../media/mitsuba/brdf_eval_test_as.xml" +"../media/mitsuba/brdf_eval_test_diffuse.xml" +"../media/mitsuba/brdf_eval_test_lambert.xml" +"../media/mitsuba/aniso_ies/72_render_0_2.xml" +"../media/mitsuba/bathroom/scene.xml" +"../media/mitsuba/bathroom2/scene.xml" +;"../media/mitsuba/bedroom/scene.xml" ; we'd need to commit uncompressed 100MB OBJ, and this example doesn't load from ZIP +"../media/mitsuba/car2/scene.xml" +"../media/mitsuba/coffee/scene.xml" +;"../media/mitsuba/classroom/scene.xml" ; skip because is not supported +"../media/mitsuba/ditt/render_720p.xml" +"../media/mitsuba/ditt/render_2160p.xml" +"../media/mitsuba/ditt/render_cube_lh.xml" +"../media/mitsuba/ditt/render_cube_rh.xml" +"../media/mitsuba/glass-of-water/scene.xml" +"../media/mitsuba/kitchen/scene.xml" +;"../media/mitsuba/lamp/scene.xml" ; skip because is not supported +"../media/mitsuba/living-room/scene.xml" +"../media/mitsuba/living-room-2/scene.xml" +"../media/mitsuba/iso_ies/71_render_0_2.xml" +"../media/mitsuba/messed_up_uvs/31_scene_0_1.xml" +"../media/mitsuba/normalmap_test/render_withnormalmap.xml" +"../media/mitsuba/normalmap_test/render_withoutnormalmap.xml" +"../media/mitsuba/spaceship/scene.xml" +"../media/mitsuba/staircase/scene.xml" +;"../media/mitsuba/staircase2/scene.xml" ; skip because is not supported diff --git a/16_ZipArchiveLoaderTest/CMakeLists.txt b/16_ZipArchiveLoaderTest/CMakeLists.txt new file mode 100644 index 000000000..f60757aad --- /dev/null +++ b/16_ZipArchiveLoaderTest/CMakeLists.txt @@ -0,0 +1,32 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +enable_testing() + +add_test(NAME NBL_ZIP_ARCHIVE_LOADER_TEST + COMMAND "$" + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS +) diff --git a/22_RaytracedAO/config.json.template b/16_ZipArchiveLoaderTest/config.json.template similarity index 86% rename from 22_RaytracedAO/config.json.template rename to 16_ZipArchiveLoaderTest/config.json.template index abfc8e387..24adf54fb 100644 --- a/22_RaytracedAO/config.json.template +++ b/16_ZipArchiveLoaderTest/config.json.template @@ -6,7 +6,7 @@ "cmake": { "configurations": [ "Release", "Debug", "RelWithDebInfo" ], "buildModes": [], - "requiredOptions": [ "NBL_BUILD_MITSUBA_LOADER", "NBL_BUILD_RADEON_RAYS" ] + "requiredOptions": [] }, "profiles": [ { @@ -25,4 +25,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/16_ZipArchiveLoaderTest/main.cpp b/16_ZipArchiveLoaderTest/main.cpp new file mode 100644 index 000000000..ccdef7a06 --- /dev/null +++ b/16_ZipArchiveLoaderTest/main.cpp @@ -0,0 +1,266 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include + +#include +#include +#include +#include +#include +#include + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::examples; + +class ZipArchiveLoaderTest final : public BuiltinResourcesApplication +{ + using asset_base_t = BuiltinResourcesApplication; + +public: + ZipArchiveLoaderTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + const path zipPath = sharedInputCWD / "mitsuba/bedroom.zip"; + auto archive = m_system->openFileArchive(zipPath); + if (!archive) + { + m_logger->log("Failed to open zip archive: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + auto archiveFiles = IFileArchive::SFileList::span_t(archive->listAssets()); + if (archiveFiles.empty()) + { + m_logger->log("Zip archive is empty: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const path scenePath = "scene.xml"; + auto sceneIt = std::find_if(archiveFiles.begin(), archiveFiles.end(), [&scenePath](const auto& entry) + { + return entry.pathRelativeToArchive == scenePath; + }); + if (sceneIt == archiveFiles.end()) + { + m_logger->log("Zip archive missing scene.xml: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + auto sceneFile = archive->getFile(scenePath, IFileBase::ECF_READ, ""); + if (!sceneFile) + { + m_logger->log("Failed to open scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + if (sceneIt->size == 0 || sceneFile->getSize() != sceneIt->size) + { + m_logger->log("scene.xml size mismatch in zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const size_t probeSize = std::min(sceneIt->size, 64u); + std::array probe{}; + IFile::success_t probeRead; + sceneFile->read(probeRead, probe.data(), 0, probeSize); + if (!probeRead) + { + m_logger->log("Failed to read scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const std::string_view probeView(probe.data(), probeSize); + if (probeView.find("log("scene.xml header is unexpected in zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + const size_t linesToPrint = 6u; + const char* mapped = static_cast(sceneFile->getMappedPointer()); + if (mapped) + { + std::vector headLines; + headLines.reserve(linesToPrint); + std::deque tailLines; + + size_t lineStart = 0; + for (size_t i = 0; i < sceneIt->size; ++i) + { + if (mapped[i] != '\n') + continue; + + size_t lineLen = i - lineStart; + if (lineLen && mapped[i - 1] == '\r') + --lineLen; + + const std::string_view line(mapped + lineStart, lineLen); + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(line); + lineStart = i + 1; + } + if (lineStart < sceneIt->size) + { + size_t lineLen = sceneIt->size - lineStart; + if (lineLen && mapped[sceneIt->size - 1] == '\r') + --lineLen; + const std::string_view line(mapped + lineStart, lineLen); + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(line); + } + + std::string head; + for (const auto& line : headLines) + { + head.append(line); + head.push_back('\n'); + } + std::string tail; + for (const auto& line : tailLines) + { + tail.append(line); + tail.push_back('\n'); + } + + m_logger->log("scene.xml head (%u lines):\n%s", ILogger::ELL_INFO, static_cast(headLines.size()), head.c_str()); + m_logger->log("scene.xml tail (%u lines):\n%s", ILogger::ELL_INFO, static_cast(tailLines.size()), tail.c_str()); + } + else + { + std::vector headLines; + headLines.reserve(linesToPrint); + std::deque tailLines; + std::string carry; + const size_t chunkSize = 64u * 1024u; + std::string buffer(chunkSize, '\0'); + size_t offset = 0; + while (offset < sceneIt->size) + { + const size_t toRead = std::min(chunkSize, sceneIt->size - offset); + IFile::success_t chunkRead; + sceneFile->read(chunkRead, buffer.data(), offset, toRead); + if (!chunkRead) + { + m_logger->log("Failed to read scene.xml from zip: %s", ILogger::ELL_ERROR, zipPath.string().c_str()); + return false; + } + + size_t lineStart = 0; + for (size_t i = 0; i < toRead; ++i) + { + if (buffer[i] != '\n') + continue; + + size_t lineEnd = i; + if (lineEnd > lineStart && buffer[lineEnd - 1] == '\r') + --lineEnd; + + std::string line; + if (!carry.empty()) + { + line = carry; + if (lineEnd > lineStart) + line.append(buffer.data() + lineStart, lineEnd - lineStart); + if (!line.empty() && line.back() == '\r') + line.pop_back(); + carry.clear(); + } + else + { + line.assign(buffer.data() + lineStart, lineEnd - lineStart); + } + + if (headLines.size() < linesToPrint) + headLines.push_back(line); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(std::move(line)); + lineStart = i + 1; + } + + if (lineStart < toRead) + { + const size_t tailSize = toRead - lineStart; + if (carry.empty()) + carry.assign(buffer.data() + lineStart, tailSize); + else + carry.append(buffer.data() + lineStart, tailSize); + } + + offset += toRead; + } + if (!carry.empty()) + { + if (!carry.empty() && carry.back() == '\r') + carry.pop_back(); + if (headLines.size() < linesToPrint) + headLines.push_back(carry); + if (tailLines.size() == linesToPrint) + tailLines.pop_front(); + tailLines.push_back(carry); + carry.clear(); + } + + std::string head; + for (const auto& line : headLines) + { + head.append(line); + head.push_back('\n'); + } + std::string tail; + for (const auto& line : tailLines) + { + tail.append(line); + tail.push_back('\n'); + } + + m_logger->log("scene.xml head (%u lines):\n%s", ILogger::ELL_INFO, static_cast(headLines.size()), head.c_str()); + m_logger->log("scene.xml tail (%u lines):\n%s", ILogger::ELL_INFO, static_cast(tailLines.size()), tail.c_str()); + } + + std::stringstream ss; + for (const auto& file : archiveFiles) + { + ss << "ID: " << file.ID; + ss << " offset: " << file.offset; + ss << " path relative od archive: " << file.pathRelativeToArchive; + ss << " size: " << file.size << '\n'; + } + + m_logger->log(ss.str().c_str(), ILogger::ELL_PERFORMANCE); + + return true; + } + + void onAppTerminated_impl() override + { + } + + void workLoopBody() override + { + } + + bool keepRunning() override + { + return false; + } +}; + +NBL_MAIN_FUNC(ZipArchiveLoaderTest) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index f014bd1cb..724bac2e8 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -5,19 +5,21 @@ #include "nbl/examples/examples.hpp" #include "app_resources/common.hlsl" -#include "ITester.h" using namespace nbl; -class CIntrinsicsTester final : public ITester +class CIntrinsicsTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CIntrinsicsTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; +private: + IntrinsicsIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); std::uniform_real_distribution realDistributionZeroToOne(0.0f, 1.0f); @@ -26,262 +28,234 @@ class CIntrinsicsTester final : public ITester std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution uintDistribution(0, 100); - m_logger->log("intrinsics.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - IntrinsicsIntputTestValues testInput; - testInput.bitCount = intDistribution(mt); - testInput.crossLhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.crossRhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampVal = realDistribution(mt); - testInput.clampMin = realDistributionNeg(mt); - testInput.clampMax = realDistributionPos(mt); - testInput.length = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.normalize = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.dotLhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.dotRhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.determinant = float32_t3x3( - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt) - ); - testInput.findMSB = realDistribution(mt); - testInput.findLSB = realDistribution(mt); - testInput.inverse = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.transpose = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulLhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulRhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.minA = realDistribution(mt); - testInput.minB = realDistribution(mt); - testInput.maxA = realDistribution(mt); - testInput.maxB = realDistribution(mt); - testInput.rsqrt = realDistributionPos(mt); - testInput.bitReverse = realDistribution(mt); - testInput.frac = realDistribution(mt); - testInput.mixX = realDistributionNeg(mt); - testInput.mixY = realDistributionPos(mt); - testInput.mixA = realDistributionZeroToOne(mt); - testInput.sign = realDistribution(mt); - testInput.radians = realDistribution(mt); - testInput.degrees = realDistribution(mt); - testInput.stepEdge = realDistribution(mt); - testInput.stepX = realDistribution(mt); - testInput.smoothStepEdge0 = realDistributionNeg(mt); - testInput.smoothStepEdge1 = realDistributionPos(mt); - testInput.smoothStepX = realDistribution(mt); - testInput.addCarryA = std::numeric_limits::max() - uintDistribution(mt); - testInput.addCarryB = uintDistribution(mt); - testInput.subBorrowA = uintDistribution(mt); - testInput.subBorrowB = uintDistribution(mt); - - testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampMinVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.clampMaxVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.findMSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.findLSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.minAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.minBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.rsqrtVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.bitReverseVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.fracVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.mixXVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.mixYVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.mixAVec = float32_t3(realDistributionZeroToOne(mt), realDistributionZeroToOne(mt), realDistributionZeroToOne(mt)); - - testInput.signVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.radiansVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.degreesVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepEdgeVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.smoothStepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardN = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardNref = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractEta = realDistribution(mt); - testInput.addCarryAVec = uint32_t3(std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt)); - testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - IntrinsicsTestValues expected; - expected.bitCount = glm::bitCount(testInput.bitCount); - expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); - expected.length = glm::length(testInput.length); - expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); - expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); - expected.findMSB = glm::findMSB(testInput.findMSB); - expected.findLSB = glm::findLSB(testInput.findLSB); - expected.min = glm::min(testInput.minA, testInput.minB); - expected.max = glm::max(testInput.maxA, testInput.maxB); - expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); - expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); - expected.sign = glm::sign(testInput.sign); - expected.radians = glm::radians(testInput.radians); - expected.degrees = glm::degrees(testInput.degrees); - expected.step = glm::step(testInput.stepEdge, testInput.stepX); - expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - - expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); - expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + IntrinsicsIntputTestValues testInput; + testInput.bitCount = intDistribution(getRandomEngine()); + testInput.crossLhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.crossRhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampVal = realDistribution(getRandomEngine()); + testInput.clampMin = realDistributionNeg(getRandomEngine()); + testInput.clampMax = realDistributionPos(getRandomEngine()); + testInput.length = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.normalize = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.dotLhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.dotRhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.determinant = float32_t3x3( + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()) + ); + testInput.findMSB = realDistribution(getRandomEngine()); + testInput.findLSB = realDistribution(getRandomEngine()); + testInput.inverse = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.transpose = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulLhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulRhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.minA = realDistribution(getRandomEngine()); + testInput.minB = realDistribution(getRandomEngine()); + testInput.maxA = realDistribution(getRandomEngine()); + testInput.maxB = realDistribution(getRandomEngine()); + testInput.rsqrt = realDistributionPos(getRandomEngine()); + testInput.bitReverse = realDistribution(getRandomEngine()); + testInput.frac = realDistribution(getRandomEngine()); + testInput.mixX = realDistributionNeg(getRandomEngine()); + testInput.mixY = realDistributionPos(getRandomEngine()); + testInput.mixA = realDistributionZeroToOne(getRandomEngine()); + testInput.sign = realDistribution(getRandomEngine()); + testInput.radians = realDistribution(getRandomEngine()); + testInput.degrees = realDistribution(getRandomEngine()); + testInput.stepEdge = realDistribution(getRandomEngine()); + testInput.stepX = realDistribution(getRandomEngine()); + testInput.smoothStepEdge0 = realDistributionNeg(getRandomEngine()); + testInput.smoothStepEdge1 = realDistributionPos(getRandomEngine()); + testInput.smoothStepX = realDistribution(getRandomEngine()); - expected.frac = testInput.frac - std::floor(testInput.frac); - expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); + testInput.bitCountVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.clampValVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampMinVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.clampMaxVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.findMSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.findLSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.minAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.minBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.rsqrtVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.bitReverseVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.fracVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.mixXVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.mixYVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.mixAVec = float32_t3(realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine())); - expected.normalize = glm::normalize(testInput.normalize); - expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); - expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); - expected.clampVec = float32_t3( - glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), - glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), - glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) - ); - expected.findMSBVec = glm::findMSB(testInput.findMSBVec); - expected.findLSBVec = glm::findLSB(testInput.findLSBVec); - expected.minVec = float32_t3( - glm::min(testInput.minAVec.x, testInput.minBVec.x), - glm::min(testInput.minAVec.y, testInput.minBVec.y), - glm::min(testInput.minAVec.z, testInput.minBVec.z) - ); - expected.maxVec = float32_t3( - glm::max(testInput.maxAVec.x, testInput.maxBVec.x), - glm::max(testInput.maxAVec.y, testInput.maxBVec.y), - glm::max(testInput.maxAVec.z, testInput.maxBVec.z) - ); - expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); - expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); - expected.fracVec = float32_t3( - testInput.fracVec.x - std::floor(testInput.fracVec.x), - testInput.fracVec.y - std::floor(testInput.fracVec.y), - testInput.fracVec.z - std::floor(testInput.fracVec.z)); - expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); - expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); - expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); + testInput.signVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.radiansVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.degreesVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepEdgeVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.smoothStepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardN = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardNref = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.refractN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractEta = realDistribution(getRandomEngine()); - expected.signVec = glm::sign(testInput.signVec); - expected.radiansVec = glm::radians(testInput.radiansVec); - expected.degreesVec = glm::degrees(testInput.degreesVec); - expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); - expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); - expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); - expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); - expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); + return testInput; + } - expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); - expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + IntrinsicsTestValues determineExpectedResults(const IntrinsicsIntputTestValues& testInput) override + { + IntrinsicsTestValues expected; + expected.bitCount = glm::bitCount(testInput.bitCount); + expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); + expected.length = glm::length(testInput.length); + expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); + expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); + expected.findMSB = glm::findMSB(testInput.findMSB); + expected.findLSB = glm::findLSB(testInput.findLSB); + expected.min = glm::min(testInput.minA, testInput.minB); + expected.max = glm::max(testInput.maxA, testInput.maxB); + expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); + expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); + expected.sign = glm::sign(testInput.sign); + expected.radians = glm::radians(testInput.radians); + expected.degrees = glm::degrees(testInput.degrees); + expected.step = glm::step(testInput.stepEdge, testInput.stepX); + expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); - expected.mul = reinterpret_cast(mulGlm); - auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); - expected.transpose = reinterpret_cast(transposeGlm); - auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); - expected.inverse = reinterpret_cast(inverseGlm); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("intrinsics.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.frac = testInput.frac - std::floor(testInput.frac); + expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); -private: - inline static constexpr int Iterations = 100u; + expected.normalize = glm::normalize(testInput.normalize); + expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); + expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); + expected.clampVec = float32_t3( + glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), + glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), + glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) + ); + expected.findMSBVec = glm::findMSB(testInput.findMSBVec); + expected.findLSBVec = glm::findLSB(testInput.findLSBVec); + expected.minVec = float32_t3( + glm::min(testInput.minAVec.x, testInput.minBVec.x), + glm::min(testInput.minAVec.y, testInput.minBVec.y), + glm::min(testInput.minAVec.z, testInput.minBVec.z) + ); + expected.maxVec = float32_t3( + glm::max(testInput.maxAVec.x, testInput.maxBVec.x), + glm::max(testInput.maxAVec.y, testInput.maxBVec.y), + glm::max(testInput.maxAVec.z, testInput.maxBVec.z) + ); + expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); + expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); + expected.fracVec = float32_t3( + testInput.fracVec.x - std::floor(testInput.fracVec.x), + testInput.fracVec.y - std::floor(testInput.fracVec.y), + testInput.fracVec.z - std::floor(testInput.fracVec.z)); + expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); + expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); + expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); - void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues cpuTestValues; + expected.signVec = glm::sign(testInput.signVec); + expected.radiansVec = glm::radians(testInput.radiansVec); + expected.degreesVec = glm::degrees(testInput.degreesVec); + expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); + expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); + expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); + expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); + expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); - } + auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); + expected.mul = reinterpret_cast(mulGlm); + auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); + expected.transpose = reinterpret_cast(transposeGlm); + auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); + expected.inverse = reinterpret_cast(inverseGlm); - void performGpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, ITester::TestType testType) + bool verifyTestResults(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { - verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testType); - verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testType); - verifyTestValue("length", expectedTestValues.length, testValues.length, testType); - verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testType); - verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testType); - verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testType); - verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testType); - verifyTestValue("min", expectedTestValues.min, testValues.min, testType); - verifyTestValue("max", expectedTestValues.max, testValues.max, testType); - verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testType); - verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testType); - verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testType); - verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testType); - verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testType); - verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testType); - verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); - verifyTestValue("step", expectedTestValues.step, testValues.step, testType); - verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); - verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); - verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); - verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); - verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); + bool pass = true; + pass &= verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testIteration, seed, testType); + pass &= verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testIteration, seed, testType); + pass &= verifyTestValue("length", expectedTestValues.length, testValues.length, testIteration, seed, testType, 0.0001); + pass &= verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testIteration, seed, testType, 0.00001); + pass &= verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testIteration, seed, testType); + pass &= verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testIteration, seed, testType); + pass &= verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testIteration, seed, testType); + pass &= verifyTestValue("min", expectedTestValues.min, testValues.min, testIteration, seed, testType); + pass &= verifyTestValue("max", expectedTestValues.max, testValues.max, testIteration, seed, testType); + pass &= verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testIteration, seed, testType); + pass &= verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testIteration, seed, testType); + pass &= verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testIteration, seed, testType); + pass &= verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testIteration, seed, testType); + pass &= verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testIteration, seed, testType); + pass &= verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testIteration, seed, testType, 0.00001); + pass &= verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testIteration, seed, testType, 0.001); + pass &= verifyTestValue("step", expectedTestValues.step, testValues.step, testIteration, seed, testType); + pass &= verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testIteration, seed, testType); + pass &= verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testIteration, seed, testType); + pass &= verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testIteration, seed, testType); + pass &= verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testIteration, seed, testType); + pass &= verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testIteration, seed, testType); - verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); - verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); - verifyTestVector3dValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testType); - verifyTestVector3dValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testType); - verifyTestVector3dValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testType); - verifyTestVector3dValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testType); - verifyTestVector3dValue("minVec", expectedTestValues.minVec, testValues.minVec, testType); - verifyTestVector3dValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testType); - verifyTestVector3dValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testType); - verifyTestVector3dValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testType); - verifyTestVector3dValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testType); - verifyTestVector3dValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testType); + pass &= verifyTestValue("normalize", expectedTestValues.normalize, testValues.normalize, testIteration, seed, testType, 0.000001); + pass &= verifyTestValue("cross", expectedTestValues.cross, testValues.cross, testIteration, seed, testType); + pass &= verifyTestValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testIteration, seed, testType); + pass &= verifyTestValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testIteration, seed, testType); + pass &= verifyTestValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testIteration, seed, testType); + pass &= verifyTestValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testIteration, seed, testType); + pass &= verifyTestValue("minVec", expectedTestValues.minVec, testValues.minVec, testIteration, seed, testType); + pass &= verifyTestValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testIteration, seed, testType); + pass &= verifyTestValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testIteration, seed, testType); + pass &= verifyTestValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testIteration, seed, testType); + pass &= verifyTestValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testIteration, seed, testType); + pass &= verifyTestValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testIteration, seed, testType); - verifyTestVector3dValue("signVec", expectedTestValues.signVec, testValues.signVec, testType); - verifyTestVector3dValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testType); - verifyTestVector3dValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testType); - verifyTestVector3dValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testType); - verifyTestVector3dValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testType); - verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); - verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); - verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); - verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); - verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); - verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); - verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); + pass &= verifyTestValue("signVec", expectedTestValues.signVec, testValues.signVec, testIteration, seed, testType); + pass &= verifyTestValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testIteration, seed, testType, 0.00001); + pass &= verifyTestValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testIteration, seed, testType, 0.001); + pass &= verifyTestValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testIteration, seed, testType); + pass &= verifyTestValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testIteration, seed, testType); + pass &= verifyTestValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testIteration, seed, testType); + pass &= verifyTestValue("reflect", expectedTestValues.reflect, testValues.reflect, testIteration, seed, testType, 0.001); + pass &= verifyTestValue("refract", expectedTestValues.refract, testValues.refract, testIteration, seed, testType, 0.01); + pass &= verifyTestValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testIteration, seed, testType); + pass &= verifyTestValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testIteration, seed, testType); + pass &= verifyTestValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testIteration, seed, testType); + pass &= verifyTestValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testIteration, seed, testType); - verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); - verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); - verifyTestMatrix3x3Value("inverse", expectedTestValues.inverse, testValues.inverse, testType); + pass &= verifyTestValue("mul", expectedTestValues.mul, testValues.mul, testIteration, seed, testType); + pass &= verifyTestValue("transpose", expectedTestValues.transpose, testValues.transpose, testIteration, seed, testType); + pass &= verifyTestValue("inverse", expectedTestValues.inverse, testValues.inverse, testIteration, seed, testType); + return pass; } }; diff --git a/22_CppCompat/CMakeLists.txt b/22_CppCompat/CMakeLists.txt index b7e52875d..d7a203d2d 100644 --- a/22_CppCompat/CMakeLists.txt +++ b/22_CppCompat/CMakeLists.txt @@ -21,4 +21,56 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }, + { + "INPUT": "app_resources/intrinsicsTest.comp.hlsl", + "KEY": "intrinsicsTest", + }, + { + "INPUT": "app_resources/tgmathTest.comp.hlsl", + "KEY": "tgmathTest", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/22_CppCompat/CTgmathTester.h b/22_CppCompat/CTgmathTester.h index 63b0e483e..59645be72 100644 --- a/22_CppCompat/CTgmathTester.h +++ b/22_CppCompat/CTgmathTester.h @@ -3,358 +3,339 @@ #include "nbl/examples/examples.hpp" - #include "app_resources/common.hlsl" -#include "ITester.h" - +#include "nbl/examples/Tester/ITester.h" using namespace nbl; -class CTgmathTester final : public ITester +class CTgmathTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTgmathTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); - std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); +private: + TgmathIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistribution(-100.0f, 100.0f); std::uniform_real_distribution realDistributionSmall(1.0f, 4.0f); std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution coinFlipDistribution(0, 1); - m_logger->log("tgmath.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - TgmathIntputTestValues testInput; - testInput.floor = realDistribution(mt); - testInput.isnan = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::quiet_NaN(); - testInput.isinf = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::infinity(); - testInput.powX = realDistributionSmall(mt); - testInput.powY = realDistributionSmall(mt); - testInput.exp = realDistributionSmall(mt); - testInput.exp2 = realDistributionSmall(mt); - testInput.log = realDistribution(mt); - testInput.log2 = realDistribution(mt); - testInput.absF = realDistribution(mt); - testInput.absI = intDistribution(mt); - testInput.sqrt = realDistribution(mt); - testInput.sin = realDistribution(mt); - testInput.cos = realDistribution(mt); - testInput.tan = realDistribution(mt); - testInput.asin = realDistribution(mt); - testInput.atan = realDistribution(mt); - testInput.sinh = realDistribution(mt); - testInput.cosh = realDistribution(mt); - testInput.tanh = realDistribution(mt); - testInput.asinh = realDistribution(mt); - testInput.acosh = realDistribution(mt); - testInput.atanh = realDistribution(mt); - testInput.atan2X = realDistribution(mt); - testInput.atan2Y = realDistribution(mt); - testInput.acos = realDistribution(mt); - testInput.modf = realDistribution(mt); - testInput.round = realDistribution(mt); - testInput.roundEven = coinFlipDistribution(mt) ? realDistributionSmall(mt) : (static_cast(intDistribution(mt) / 2) + 0.5f); - testInput.trunc = realDistribution(mt); - testInput.ceil = realDistribution(mt); - testInput.fmaX = realDistribution(mt); - testInput.fmaY = realDistribution(mt); - testInput.fmaZ = realDistribution(mt); - testInput.ldexpArg = realDistributionSmall(mt); - testInput.ldexpExp = intDistribution(mt); - testInput.erf = realDistribution(mt); - testInput.erfInv = realDistribution(mt); - - testInput.floorVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isnanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isinfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.powXVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.powYVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.expVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.exp2Vec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.logVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.log2Vec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absFVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absIVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.sqrtVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.cosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.coshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acoshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2XVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2YVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.modfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.ldexpArgVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.ldexpExpVec = float32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.erfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.erfInvVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - - testInput.modfStruct = realDistribution(mt); - testInput.modfStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.frexpStruct = realDistribution(mt); - testInput.frexpStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); + TgmathIntputTestValues testInput; + testInput.floor = realDistribution(getRandomEngine()); + testInput.isnan = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::quiet_NaN(); + testInput.isinf = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::infinity(); + testInput.powX = realDistributionSmall(getRandomEngine()); + testInput.powY = realDistributionSmall(getRandomEngine()); + testInput.exp = realDistributionSmall(getRandomEngine()); + testInput.exp2 = realDistributionSmall(getRandomEngine()); + testInput.log = realDistribution(getRandomEngine()); + testInput.log2 = realDistribution(getRandomEngine()); + testInput.absF = realDistribution(getRandomEngine()); + testInput.absI = intDistribution(getRandomEngine()); + testInput.sqrt = realDistribution(getRandomEngine()); + testInput.sin = realDistribution(getRandomEngine()); + testInput.cos = realDistribution(getRandomEngine()); + testInput.tan = realDistribution(getRandomEngine()); + testInput.asin = realDistribution(getRandomEngine()); + testInput.atan = realDistribution(getRandomEngine()); + testInput.sinh = realDistribution(getRandomEngine()); + testInput.cosh = realDistribution(getRandomEngine()); + testInput.tanh = realDistribution(getRandomEngine()); + testInput.asinh = realDistribution(getRandomEngine()); + testInput.acosh = realDistribution(getRandomEngine()); + testInput.atanh = realDistribution(getRandomEngine()); + testInput.atan2X = realDistribution(getRandomEngine()); + testInput.atan2Y = realDistribution(getRandomEngine()); + testInput.acos = realDistribution(getRandomEngine()); + testInput.modf = realDistribution(getRandomEngine()); + testInput.round = realDistribution(getRandomEngine()); + testInput.roundEven = coinFlipDistribution(getRandomEngine()) ? realDistributionSmall(getRandomEngine()) : (static_cast(intDistribution(getRandomEngine()) / 2) + 0.5f); + testInput.trunc = realDistribution(getRandomEngine()); + testInput.ceil = realDistribution(getRandomEngine()); + testInput.fmaX = realDistribution(getRandomEngine()); + testInput.fmaY = realDistribution(getRandomEngine()); + testInput.fmaZ = realDistribution(getRandomEngine()); + testInput.ldexpArg = realDistributionSmall(getRandomEngine()); + testInput.ldexpExp = intDistribution(getRandomEngine()); + testInput.erf = realDistribution(getRandomEngine()); + testInput.erfInv = realDistribution(getRandomEngine()); + + testInput.floorVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isnanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isinfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.powXVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.powYVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.expVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.exp2Vec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.logVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.log2Vec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absFVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absIVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.sqrtVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.cosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.coshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acoshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2XVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2YVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.modfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.ldexpArgVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.ldexpExpVec = float32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.erfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.erfInvVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + testInput.modfStruct = realDistribution(getRandomEngine()); + testInput.modfStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.frexpStruct = realDistribution(getRandomEngine()); + testInput.frexpStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + return testInput; + } - // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values - TgmathTestValues expected; - expected.floor = std::floor(testInput.floor); - expected.isnan = std::isnan(testInput.isnan); - expected.isinf = std::isinf(testInput.isinf); - expected.pow = std::pow(testInput.powX, testInput.powY); - expected.exp = std::exp(testInput.exp); - expected.exp2 = std::exp2(testInput.exp2); - expected.log = std::log(testInput.log); - expected.log2 = std::log2(testInput.log2); - expected.absF = std::abs(testInput.absF); - expected.absI = std::abs(testInput.absI); - expected.sqrt = std::sqrt(testInput.sqrt); - expected.sin = std::sin(testInput.sin); - expected.cos = std::cos(testInput.cos); - expected.acos = std::acos(testInput.acos); - expected.tan = std::tan(testInput.tan); - expected.asin = std::asin(testInput.asin); - expected.atan = std::atan(testInput.atan); - expected.sinh = std::sinh(testInput.sinh); - expected.cosh = std::cosh(testInput.cosh); - expected.tanh = std::tanh(testInput.tanh); - expected.asinh = std::asinh(testInput.asinh); - expected.acosh = std::acosh(testInput.acosh); - expected.atanh = std::atanh(testInput.atanh); - expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); - expected.erf = std::erf(testInput.erf); + TgmathTestValues determineExpectedResults(const TgmathIntputTestValues& testInput) override + { + // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values + TgmathTestValues expected; + expected.floor = std::floor(testInput.floor); + expected.isnan = std::isnan(testInput.isnan); + expected.isinf = std::isinf(testInput.isinf); + expected.pow = std::pow(testInput.powX, testInput.powY); + expected.exp = std::exp(testInput.exp); + expected.exp2 = std::exp2(testInput.exp2); + expected.log = std::log(testInput.log); + expected.log2 = std::log2(testInput.log2); + expected.absF = std::abs(testInput.absF); + expected.absI = std::abs(testInput.absI); + expected.sqrt = std::sqrt(testInput.sqrt); + expected.sin = std::sin(testInput.sin); + expected.cos = std::cos(testInput.cos); + expected.acos = std::acos(testInput.acos); + expected.tan = std::tan(testInput.tan); + expected.asin = std::asin(testInput.asin); + expected.atan = std::atan(testInput.atan); + expected.sinh = std::sinh(testInput.sinh); + expected.cosh = std::cosh(testInput.cosh); + expected.tanh = std::tanh(testInput.tanh); + expected.asinh = std::asinh(testInput.asinh); + expected.acosh = std::acosh(testInput.acosh); + expected.atanh = std::atanh(testInput.atanh); + expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); + expected.erf = std::erf(testInput.erf); + { + float tmp; + expected.modf = std::modf(testInput.modf, &tmp); + } + expected.round = std::round(testInput.round); + // TODO: uncomment when C++23 + //expected.roundEven = std::roundeven(testInput.roundEven); + // TODO: remove when C++23 + auto roundeven = [](const float& val) -> float { float tmp; - expected.modf = std::modf(testInput.modf, &tmp); - } - expected.round = std::round(testInput.round); - // TODO: uncomment when C++23 - //expected.roundEven = std::roundeven(testInput.roundEven); - // TODO: remove when C++23 - auto roundeven = [](const float& val) -> float + if (std::abs(std::modf(val, &tmp)) == 0.5f) { - float tmp; - if (std::abs(std::modf(val, &tmp)) == 0.5f) - { - int32_t result = static_cast(val); - if (result % 2 != 0) - result >= 0 ? ++result : --result; - return result; - } - - return std::round(val); - }; - expected.roundEven = roundeven(testInput.roundEven); - - expected.trunc = std::trunc(testInput.trunc); - expected.ceil = std::ceil(testInput.ceil); - expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); - expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); - - expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); - - expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); - expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); - - expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); - expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); - expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); - - expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); - expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); - expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); - expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); - expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); - expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); - expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); - expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); - expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); - expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); - expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); - expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); - expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); - expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); - expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); - expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); - expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); - expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); - expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); - expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); - expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); - { - float tmp; - expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); - } - expected.roundVec = float32_t3( - std::round(testInput.roundVec.x), - std::round(testInput.roundVec.y), - std::round(testInput.roundVec.z) - ); - // TODO: uncomment when C++23 - //expected.roundEven = float32_t( - // std::roundeven(testInput.roundEvenVec.x), - // std::roundeven(testInput.roundEvenVec.y), - // std::roundeven(testInput.roundEvenVec.z) - // ); - // TODO: remove when C++23 - expected.roundEvenVec = float32_t3( - roundeven(testInput.roundEvenVec.x), - roundeven(testInput.roundEvenVec.y), - roundeven(testInput.roundEvenVec.z) - ); - - expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); - expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); - expected.fmaVec = float32_t3( - std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), - std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), - std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) - ); - expected.ldexpVec = float32_t3( - std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), - std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), - std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) - ); - - { - ModfOutput expectedModfStructOutput; - expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); - expected.modfStruct = expectedModfStructOutput; - - ModfOutput expectedModfStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); - expected.modfStructVec = expectedModfStructOutputVec; - } - - { - FrexpOutput expectedFrexpStructOutput; - expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); - expected.frexpStruct = expectedFrexpStructOutput; - - FrexpOutput expectedFrexpStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); - expected.frexpStructVec = expectedFrexpStructOutputVec; - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); + int32_t result = static_cast(val); + if (result % 2 != 0) + result >= 0 ? ++result : --result; + return result; + } + + return std::round(val); + }; + expected.roundEven = roundeven(testInput.roundEven); + + expected.trunc = std::trunc(testInput.trunc); + expected.ceil = std::ceil(testInput.ceil); + expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); + expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); + + expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); + + expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); + expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); + + expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); + expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); + expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); + + expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); + expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); + expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); + expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); + expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); + expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); + expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); + expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); + expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); + expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); + expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); + expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); + expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); + expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); + expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); + expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); + expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); + expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); + expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); + expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); + expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); + { + float tmp; + expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); } - m_logger->log("tgmath.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.roundVec = float32_t3( + std::round(testInput.roundVec.x), + std::round(testInput.roundVec.y), + std::round(testInput.roundVec.z) + ); + // TODO: uncomment when C++23 + //expected.roundEven = float32_t( + // std::roundeven(testInput.roundEvenVec.x), + // std::roundeven(testInput.roundEvenVec.y), + // std::roundeven(testInput.roundEvenVec.z) + // ); + // TODO: remove when C++23 + expected.roundEvenVec = float32_t3( + roundeven(testInput.roundEvenVec.x), + roundeven(testInput.roundEvenVec.y), + roundeven(testInput.roundEvenVec.z) + ); + + expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); + expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); + expected.fmaVec = float32_t3( + std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), + std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), + std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) + ); + expected.ldexpVec = float32_t3( + std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), + std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), + std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) + ); -private: - inline static constexpr int Iterations = 100u; + { + ModfOutput expectedModfStructOutput; + expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); + expected.modfStruct = expectedModfStructOutput; + + ModfOutput expectedModfStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); + expected.modfStructVec = expectedModfStructOutputVec; + } - void performCpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues cpuTestValues; - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); - - } + { + FrexpOutput expectedFrexpStructOutput; + expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); + expected.frexpStruct = expectedFrexpStructOutput; + + FrexpOutput expectedFrexpStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); + expected.frexpStructVec = expectedFrexpStructOutputVec; + } - void performGpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, ITester::TestType testType) + bool verifyTestResults(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { // TODO: figure out input for functions: sinh, cosh so output isn't a crazy low number // very low numbers generate comparison errors - verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testType); - verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testType); - verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testType); - verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testType); - verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testType); - verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testType); - verifyTestValue("log", expectedTestValues.log, testValues.log, testType); - verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testType); - verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testType); - verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testType); - verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testType); - verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testType); - verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testType); - verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testType); - verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testType); - verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testType); - verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testType); - //verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testType); - //verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testType); - verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testType); - verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testType); - verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testType); - verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testType); - verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testType); - verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testType); - verifyTestValue("round", expectedTestValues.round, testValues.round, testType); - verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testType); - verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testType); - verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testType); - verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testType); - verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testType); - verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testType); - //verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testType); - - verifyTestVector3dValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testType); - verifyTestVector3dValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testType); - verifyTestVector3dValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testType); - verifyTestVector3dValue("powVec", expectedTestValues.powVec, testValues.powVec, testType); - verifyTestVector3dValue("expVec", expectedTestValues.expVec, testValues.expVec, testType); - verifyTestVector3dValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testType); - verifyTestVector3dValue("logVec", expectedTestValues.logVec, testValues.logVec, testType); - verifyTestVector3dValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testType); - verifyTestVector3dValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testType); - verifyTestVector3dValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testType); - verifyTestVector3dValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testType); - verifyTestVector3dValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testType); - verifyTestVector3dValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testType); - verifyTestVector3dValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testType); - verifyTestVector3dValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testType); - verifyTestVector3dValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testType); - verifyTestVector3dValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testType); - verifyTestVector3dValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testType); - verifyTestVector3dValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testType); - verifyTestVector3dValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testType); - verifyTestVector3dValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testType); - verifyTestVector3dValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testType); - verifyTestVector3dValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testType); - verifyTestVector3dValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testType); - //verifyTestVector3dValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testType); - //verifyTestVector3dValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testType); - verifyTestVector3dValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testType); - verifyTestVector3dValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testType); - verifyTestVector3dValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testType); - verifyTestVector3dValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testType); - verifyTestVector3dValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testType); - verifyTestVector3dValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testType); - //verifyTestVector3dValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testType); + bool pass = true; + pass &= verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testIteration, seed, testType); + pass &= verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testIteration, seed, testType); + pass &= verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testIteration, seed, testType); + pass &= verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testIteration, seed, testType, 0.0001); + pass &= verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testIteration, seed, testType); + pass &= verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testIteration, seed, testType); + pass &= verifyTestValue("log", expectedTestValues.log, testValues.log, testIteration, seed, testType); + pass &= verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testIteration, seed, testType); + pass &= verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testIteration, seed, testType); + pass &= verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testIteration, seed, testType); + pass &= verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testIteration, seed, testType); + pass &= verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testIteration, seed, testType); + pass &= verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testIteration, seed, testType); + pass &= verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testIteration, seed, testType); + pass &= verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testIteration, seed, testType); + pass &= verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testIteration, seed, testType); + pass &= verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testIteration, seed, testType); + //pass &= verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testIteration, seed, testType); + //pass &= verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testIteration, seed, testType); + pass &= verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testIteration, seed, testType); + pass &= verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testIteration, seed, testType); + pass &= verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testIteration, seed, testType); + pass &= verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testIteration, seed, testType); + pass &= verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testIteration, seed, testType); + pass &= verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testIteration, seed, testType); + pass &= verifyTestValue("round", expectedTestValues.round, testValues.round, testIteration, seed, testType); + pass &= verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testIteration, seed, testType); + pass &= verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testIteration, seed, testType); + pass &= verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testIteration, seed, testType); + pass &= verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testIteration, seed, testType); + pass &= verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testIteration, seed, testType); + pass &= verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testIteration, seed, testType); + //pass &= verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testIteration, seed, testType); + + pass &= verifyTestValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testIteration, seed, testType); + pass &= verifyTestValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testIteration, seed, testType); + pass &= verifyTestValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testIteration, seed, testType); + pass &= verifyTestValue("powVec", expectedTestValues.powVec, testValues.powVec, testIteration, seed, testType, 0.0001); + pass &= verifyTestValue("expVec", expectedTestValues.expVec, testValues.expVec, testIteration, seed, testType); + pass &= verifyTestValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testIteration, seed, testType); + pass &= verifyTestValue("logVec", expectedTestValues.logVec, testValues.logVec, testIteration, seed, testType); + pass &= verifyTestValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testIteration, seed, testType); + pass &= verifyTestValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testIteration, seed, testType); + pass &= verifyTestValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testIteration, seed, testType); + pass &= verifyTestValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testIteration, seed, testType); + pass &= verifyTestValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testIteration, seed, testType); + pass &= verifyTestValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testIteration, seed, testType); + pass &= verifyTestValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testIteration, seed, testType); + pass &= verifyTestValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testIteration, seed, testType); + pass &= verifyTestValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testIteration, seed, testType); + pass &= verifyTestValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testIteration, seed, testType); + pass &= verifyTestValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testIteration, seed, testType); + pass &= verifyTestValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testIteration, seed, testType); + pass &= verifyTestValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testIteration, seed, testType); + pass &= verifyTestValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testIteration, seed, testType); + pass &= verifyTestValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testIteration, seed, testType); + pass &= verifyTestValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testIteration, seed, testType); + pass &= verifyTestValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testIteration, seed, testType); + //pass &= verifyTestValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testIteration, seed, testType); + //pass &= verifyTestValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testIteration, seed, testType); + pass &= verifyTestValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testIteration, seed, testType); + pass &= verifyTestValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testIteration, seed, testType); + pass &= verifyTestValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testIteration, seed, testType); + pass &= verifyTestValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testIteration, seed, testType); + pass &= verifyTestValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testIteration, seed, testType); + pass &= verifyTestValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testIteration, seed, testType); + //pass &= verifyTestValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testIteration, seed, testType); // verify output of struct producing functions - verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testType); - verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testType); - - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testType); - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testType); + pass &= verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testIteration, seed, testType); + pass &= verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testIteration, seed, testType); + pass &= verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testIteration, seed, testType); + pass &= verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testIteration, seed, testType); + + pass &= verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testIteration, seed, testType); + pass &= verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testIteration, seed, testType); + pass &= verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testIteration, seed, testType); + pass &= verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testIteration, seed, testType); + return pass; } }; diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl index dc3ff5fcd..7fed20bbe 100644 --- a/22_CppCompat/app_resources/common.hlsl +++ b/22_CppCompat/app_resources/common.hlsl @@ -208,82 +208,6 @@ struct TgmathTestValues ModfOutput modfStructVec; FrexpOutput frexpStruct; FrexpOutput frexpStructVec; - - void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) - { - floor = nbl::hlsl::floor(input.floor); - isnan = nbl::hlsl::isnan(input.isnan); - isinf = nbl::hlsl::isinf(input.isinf); - pow = nbl::hlsl::pow(input.powX, input.powY); - exp = nbl::hlsl::exp(input.exp); - exp2 = nbl::hlsl::exp2(input.exp2); - log = nbl::hlsl::log(input.log); - log2 = nbl::hlsl::log2(input.log2); - absF = nbl::hlsl::abs(input.absF); - absI = nbl::hlsl::abs(input.absI); - sqrt = nbl::hlsl::sqrt(input.sqrt); - sin = nbl::hlsl::sin(input.sin); - cos = nbl::hlsl::cos(input.cos); - tan = nbl::hlsl::tan(input.tan); - asin = nbl::hlsl::asin(input.asin); - atan = nbl::hlsl::atan(input.atan); - sinh = nbl::hlsl::sinh(input.sinh); - cosh = nbl::hlsl::cosh(input.cosh); - tanh = nbl::hlsl::tanh(input.tanh); - asinh = nbl::hlsl::asinh(input.asinh); - acosh = nbl::hlsl::acosh(input.acosh); - atanh = nbl::hlsl::atanh(input.atanh); - atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); - erf = nbl::hlsl::erf(input.erf); - erfInv = nbl::hlsl::erfInv(input.erfInv); - acos = nbl::hlsl::acos(input.acos); - modf = nbl::hlsl::modf(input.modf); - round = nbl::hlsl::round(input.round); - roundEven = nbl::hlsl::roundEven(input.roundEven); - trunc = nbl::hlsl::trunc(input.trunc); - ceil = nbl::hlsl::ceil(input.ceil); - fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); - ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); - - floorVec = nbl::hlsl::floor(input.floorVec); - isnanVec = nbl::hlsl::isnan(input.isnanVec); - isinfVec = nbl::hlsl::isinf(input.isinfVec); - powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); - expVec = nbl::hlsl::exp(input.expVec); - exp2Vec = nbl::hlsl::exp2(input.exp2Vec); - logVec = nbl::hlsl::log(input.logVec); - log2Vec = nbl::hlsl::log2(input.log2Vec); - absFVec = nbl::hlsl::abs(input.absFVec); - absIVec = nbl::hlsl::abs(input.absIVec); - sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); - sinVec = nbl::hlsl::sin(input.sinVec); - cosVec = nbl::hlsl::cos(input.cosVec); - tanVec = nbl::hlsl::tan(input.tanVec); - asinVec = nbl::hlsl::asin(input.asinVec); - atanVec = nbl::hlsl::atan(input.atanVec); - sinhVec = nbl::hlsl::sinh(input.sinhVec); - coshVec = nbl::hlsl::cosh(input.coshVec); - tanhVec = nbl::hlsl::tanh(input.tanhVec); - asinhVec = nbl::hlsl::asinh(input.asinhVec); - acoshVec = nbl::hlsl::acosh(input.acoshVec); - atanhVec = nbl::hlsl::atanh(input.atanhVec); - atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); - acosVec = nbl::hlsl::acos(input.acosVec); - modfVec = nbl::hlsl::modf(input.modfVec); - roundVec = nbl::hlsl::round(input.roundVec); - roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); - truncVec = nbl::hlsl::trunc(input.truncVec); - ceilVec = nbl::hlsl::ceil(input.ceilVec); - fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); - ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); - erfVec = nbl::hlsl::erf(input.erfVec); - erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); - - modfStruct = nbl::hlsl::modfStruct(input.modfStruct); - modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); - frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); - frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); - } }; struct IntrinsicsIntputTestValues @@ -416,58 +340,140 @@ struct IntrinsicsTestValues spirv::SubBorrowOutput subBorrow; spirv::AddCarryOutput addCarryVec; spirv::SubBorrowOutput subBorrowVec; +}; - void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) +struct IntrinsicsTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input, NBL_REF_ARG(IntrinsicsTestValues) output) { - bitCount = nbl::hlsl::bitCount(input.bitCount); - cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); - clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); - length = nbl::hlsl::length(input.length); - normalize = nbl::hlsl::normalize(input.normalize); - dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); - determinant = nbl::hlsl::determinant(input.determinant); - findMSB = nbl::hlsl::findMSB(input.findMSB); - findLSB = nbl::hlsl::findLSB(input.findLSB); - inverse = nbl::hlsl::inverse(input.inverse); - transpose = nbl::hlsl::transpose(input.transpose); - mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); + output.bitCount = nbl::hlsl::bitCount(input.bitCount); + output.cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); + output.clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); + output.length = nbl::hlsl::length(input.length); + output.normalize = nbl::hlsl::normalize(input.normalize); + output.dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); + output.determinant = nbl::hlsl::determinant(input.determinant); + output.findMSB = nbl::hlsl::findMSB(input.findMSB); + output.findLSB = nbl::hlsl::findLSB(input.findLSB); + output.inverse = nbl::hlsl::inverse(input.inverse); + output.transpose = nbl::hlsl::transpose(input.transpose); + output.mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); // TODO: fix min and max - min = nbl::hlsl::min(input.minA, input.minB); - max = nbl::hlsl::max(input.maxA, input.maxB); - rsqrt = nbl::hlsl::rsqrt(input.rsqrt); - bitReverse = nbl::hlsl::bitReverse(input.bitReverse); - frac = nbl::hlsl::fract(input.frac); - mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); - sign = nbl::hlsl::sign(input.sign); - radians = nbl::hlsl::radians(input.radians); - degrees = nbl::hlsl::degrees(input.degrees); - step = nbl::hlsl::step(input.stepEdge, input.stepX); - smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); - - bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); - clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); - findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); - findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); + output.min = nbl::hlsl::min(input.minA, input.minB); + output.max = nbl::hlsl::max(input.maxA, input.maxB); + output.rsqrt = nbl::hlsl::rsqrt(input.rsqrt); + output.bitReverse = nbl::hlsl::bitReverse(input.bitReverse); + output.frac = nbl::hlsl::fract(input.frac); + output.mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); + output.sign = nbl::hlsl::sign(input.sign); + output.radians = nbl::hlsl::radians(input.radians); + output.degrees = nbl::hlsl::degrees(input.degrees); + output.step = nbl::hlsl::step(input.stepEdge, input.stepX); + output.smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); + + output.bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); + output.clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); + output.findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); + output.findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); // TODO: fix min and max - minVec = nbl::hlsl::min(input.minAVec, input.minBVec); - maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); - rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); - bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); - fracVec = nbl::hlsl::fract(input.fracVec); - mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); - - signVec = nbl::hlsl::sign(input.signVec); - radiansVec = nbl::hlsl::radians(input.radiansVec); - degreesVec = nbl::hlsl::degrees(input.degreesVec); - stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); - smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); - faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); - reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); - refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); - addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); - subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); - addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); - subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + output.minVec = nbl::hlsl::min(input.minAVec, input.minBVec); + output.maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); + output.rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); + output.bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); + output.fracVec = nbl::hlsl::fract(input.fracVec); + output.mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); + + output.signVec = nbl::hlsl::sign(input.signVec); + output.radiansVec = nbl::hlsl::radians(input.radiansVec); + output.degreesVec = nbl::hlsl::degrees(input.degreesVec); + output.stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); + output.smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); + output.faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); + output.reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); + output.refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); + output.addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); + output.subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); + output.addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); + output.subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + } +}; + +struct TgmathTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(TgmathIntputTestValues) input, NBL_REF_ARG(TgmathTestValues) output) + { + output.floor = nbl::hlsl::floor(input.floor); + output.isnan = nbl::hlsl::isnan(input.isnan); + output.isinf = nbl::hlsl::isinf(input.isinf); + output.pow = nbl::hlsl::pow(input.powX, input.powY); + output.exp = nbl::hlsl::exp(input.exp); + output.exp2 = nbl::hlsl::exp2(input.exp2); + output.log = nbl::hlsl::log(input.log); + output.log2 = nbl::hlsl::log2(input.log2); + output.absF = nbl::hlsl::abs(input.absF); + output.absI = nbl::hlsl::abs(input.absI); + output.sqrt = nbl::hlsl::sqrt(input.sqrt); + output.sin = nbl::hlsl::sin(input.sin); + output.cos = nbl::hlsl::cos(input.cos); + output.tan = nbl::hlsl::tan(input.tan); + output.asin = nbl::hlsl::asin(input.asin); + output.atan = nbl::hlsl::atan(input.atan); + output.sinh = nbl::hlsl::sinh(input.sinh); + output.cosh = nbl::hlsl::cosh(input.cosh); + output.tanh = nbl::hlsl::tanh(input.tanh); + output.asinh = nbl::hlsl::asinh(input.asinh); + output.acosh = nbl::hlsl::acosh(input.acosh); + output.atanh = nbl::hlsl::atanh(input.atanh); + output.atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); + output.erf = nbl::hlsl::erf(input.erf); + output.erfInv = nbl::hlsl::erfInv(input.erfInv); + output.acos = nbl::hlsl::acos(input.acos); + output.modf = nbl::hlsl::modf(input.modf); + output.round = nbl::hlsl::round(input.round); + output.roundEven = nbl::hlsl::roundEven(input.roundEven); + output.trunc = nbl::hlsl::trunc(input.trunc); + output.ceil = nbl::hlsl::ceil(input.ceil); + output.fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); + output.ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); + + output.floorVec = nbl::hlsl::floor(input.floorVec); + output.isnanVec = nbl::hlsl::isnan(input.isnanVec); + output.isinfVec = nbl::hlsl::isinf(input.isinfVec); + output.powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); + output.expVec = nbl::hlsl::exp(input.expVec); + output.exp2Vec = nbl::hlsl::exp2(input.exp2Vec); + output.logVec = nbl::hlsl::log(input.logVec); + output.log2Vec = nbl::hlsl::log2(input.log2Vec); + output.absFVec = nbl::hlsl::abs(input.absFVec); + output.absIVec = nbl::hlsl::abs(input.absIVec); + output.sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); + output.sinVec = nbl::hlsl::sin(input.sinVec); + output.cosVec = nbl::hlsl::cos(input.cosVec); + output.tanVec = nbl::hlsl::tan(input.tanVec); + output.asinVec = nbl::hlsl::asin(input.asinVec); + output.atanVec = nbl::hlsl::atan(input.atanVec); + output.sinhVec = nbl::hlsl::sinh(input.sinhVec); + output.coshVec = nbl::hlsl::cosh(input.coshVec); + output.tanhVec = nbl::hlsl::tanh(input.tanhVec); + output.asinhVec = nbl::hlsl::asinh(input.asinhVec); + output.acoshVec = nbl::hlsl::acosh(input.acoshVec); + output.atanhVec = nbl::hlsl::atanh(input.atanhVec); + output.atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); + output.acosVec = nbl::hlsl::acos(input.acosVec); + output.modfVec = nbl::hlsl::modf(input.modfVec); + output.roundVec = nbl::hlsl::round(input.roundVec); + output.roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); + output.truncVec = nbl::hlsl::trunc(input.truncVec); + output.ceilVec = nbl::hlsl::ceil(input.ceilVec); + output.fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); + output.ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); + output.erfVec = nbl::hlsl::erf(input.erfVec); + output.erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); + + output.modfStruct = nbl::hlsl::modfStruct(input.modfStruct); + output.modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); + output.frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); + output.frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); } }; diff --git a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl index df7cef1cf..23579cd09 100644 --- a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl +++ b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; [numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + IntrinsicsTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/app_resources/test.comp.hlsl b/22_CppCompat/app_resources/test.comp.hlsl index 17c59f970..9a817e021 100644 --- a/22_CppCompat/app_resources/test.comp.hlsl +++ b/22_CppCompat/app_resources/test.comp.hlsl @@ -1,6 +1,8 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + #include "app_resources/common.hlsl" diff --git a/22_CppCompat/app_resources/tgmathTest.comp.hlsl b/22_CppCompat/app_resources/tgmathTest.comp.hlsl index 5d93ffb64..4aeecb91d 100644 --- a/22_CppCompat/app_resources/tgmathTest.comp.hlsl +++ b/22_CppCompat/app_resources/tgmathTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; [numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TgmathTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 70c8d7b3a..f55e9506f 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "app_resources/common.hlsl" @@ -59,26 +59,37 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat if (!asset_base_t::onAppInitialized(std::move(system))) return false; - ITester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - + bool pass = true; { - CTgmathTester tgmathTester; - pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; - tgmathTester.setupPipeline(pplnSetupData); - tgmathTester.performTests(); + CTgmathTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"tgmathTest">(m_device.get()); + + CTgmathTester tgmathTester(8); + tgmathTester.setupPipeline(pplnSetupData); + pass &= tgmathTester.performTestsAndVerifyResults("TgmathTestLog.txt"); } { - CIntrinsicsTester intrinsicsTester; - pplnSetupData.testShaderPath = "app_resources/intrinsicsTest.comp.hlsl"; - intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTests(); + CIntrinsicsTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"intrinsicsTest">(m_device.get()); + + CIntrinsicsTester intrinsicsTester(8); + intrinsicsTester.setupPipeline(pplnSetupData); + pass &= intrinsicsTester.performTestsAndVerifyResults("IntrinsicsTestLog.txt"); } + if (!pass) + return false; m_queue = m_device->getQueue(0, 0); m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); @@ -88,8 +99,9 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/test.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key<"test">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); @@ -132,7 +144,6 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat return logFail("Failed to create compute pipeline!\n"); } - for (int i = 0; i < 2; ++i) { m_images[i] = m_device->createImage(IGPUImage::SCreationParams { diff --git a/22_RaytracedAO/Renderer.cpp b/22_RaytracedAO/Renderer.cpp index 265f4986d..589baba74 100644 --- a/22_RaytracedAO/Renderer.cpp +++ b/22_RaytracedAO/Renderer.cpp @@ -21,18 +21,6 @@ using namespace nbl::video; constexpr uint32_t kOptiXPixelSize = sizeof(uint16_t)*3u; -core::smart_refctd_ptr specializedShaderFromFile(IAssetManager* assetManager, const char* path) -{ - auto bundle = assetManager->getAsset(path, {}); - return core::smart_refctd_ptr_static_cast(*bundle.getContents().begin()); -} -core::smart_refctd_ptr gpuSpecializedShaderFromFile(IAssetManager* assetManager, IVideoDriver* driver, const char* path) -{ - auto shader = specializedShaderFromFile(assetManager,path); - // TODO: @Crisspl find a way to stop the user from such insanity as moving from the bundle's dynamic array - //return std::move(driver->getGPUObjectsFromAssets(&shader,&shader+1u)->operator[](0)); - return driver->getGPUObjectsFromAssets(&shader,&shader+1u)->operator[](0); -} // TODO: make these util function in `IDescriptorSetLayout` -> Assign: @Vib auto fillIotaDescriptorBindingDeclarations = [](auto* outBindings, uint32_t accessFlags, uint32_t count, asset::E_DESCRIPTOR_TYPE descType=asset::EDT_INVALID, uint32_t startIndex=0u) -> void { @@ -53,7 +41,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f}, m_staticViewData{ {0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u), - m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr), + m_raygenWorkGroups{0u,0u},m_colorBuffer(nullptr), m_envMapImportanceSampling(_driver) { // TODO: reimplement @@ -1495,8 +1483,6 @@ void Renderer::initScreenSizedResources( } // set up m_raygenDS - core::smart_refctd_ptr visibilityBuffer = createScreenSizedTexture(EF_R32G32B32A32_UINT); - { { constexpr auto ScrambleStateChannels = 2u; auto tmpBuff = m_driver->createCPUSideGPUVisibleGPUBufferOnDedMem(sizeof(uint32_t)*ScrambleStateChannels*renderPixelCount); @@ -1522,7 +1508,6 @@ void Renderer::initScreenSizedResources( m_driver->copyBufferToImage(tmpBuff.get(),scrambleKeys->getCreationParameters().image.get(),1u,®ion); setImageInfo(infos+0,asset::EIL_SHADER_READ_ONLY_OPTIMAL,std::move(scrambleKeys)); } - setImageInfo(infos+1,asset::EIL_SHADER_READ_ONLY_OPTIMAL,core::smart_refctd_ptr(visibilityBuffer)); setImageInfo(infos+2,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); setDstSetAndDescTypesOnWrites(m_raygenDS.get(),writes,infos,{ @@ -1530,8 +1515,7 @@ void Renderer::initScreenSizedResources( EDT_COMBINED_IMAGE_SAMPLER, EDT_STORAGE_IMAGE }); - } - m_driver->updateDescriptorSets(3u,writes,0u,nullptr); + m_driver->updateDescriptorSets(3u,writes,0u,nullptr); // set up m_closestHitDS for (auto i=0u; i<2u; i++) @@ -1580,9 +1564,6 @@ void Renderer::initScreenSizedResources( } m_driver->updateDescriptorSets(8u,writes,0u,nullptr); - m_visibilityBuffer = m_driver->addFrameBuffer(); - m_visibilityBuffer->attach(EFAP_DEPTH_ATTACHMENT,createScreenSizedTexture(EF_D32_SFLOAT)); - m_visibilityBuffer->attach(EFAP_COLOR_ATTACHMENT0,std::move(visibilityBuffer)); m_colorBuffer = m_driver->addFrameBuffer(); m_colorBuffer->attach(EFAP_COLOR_ATTACHMENT0, core::smart_refctd_ptr(m_tonemapOutput)); @@ -1599,81 +1580,6 @@ void Renderer::initScreenSizedResources( std::cout << std::endl; } -void Renderer::deinitScreenSizedResources() -{ - auto commandQueue = m_rrManager->getCLCommandQueue(); - ocl::COpenCLHandler::ocl.pclFinish(commandQueue); - - glFinish(); - - // make sure descriptor sets dont dangle - //m_driver->bindDescriptorSets(video::EPBP_COMPUTE,nullptr,0u,4u,nullptr); - m_closestHitDS[0] = m_closestHitDS[1] = nullptr; - m_commonRaytracingDS[0] = m_commonRaytracingDS[1] = nullptr; - - // unset the framebuffer (dangling smartpointer in state cache can prevent the framebuffer from being dropped until the next framebuffer set) - m_driver->setRenderTarget(nullptr,false); - if (m_visibilityBuffer) - { - m_driver->removeFrameBuffer(m_visibilityBuffer); - m_visibilityBuffer = nullptr; - } - if (m_colorBuffer) - { - m_driver->removeFrameBuffer(m_colorBuffer); - m_colorBuffer = nullptr; - } - m_accumulation = m_tonemapOutput = nullptr; - m_albedoAcc = m_albedoRslv = nullptr; - m_normalAcc = m_normalRslv = nullptr; - m_maskAcc = nullptr; - - glFinish(); - - // wait for OpenCL to finish - ocl::COpenCLHandler::ocl.pclFlush(commandQueue); - ocl::COpenCLHandler::ocl.pclFinish(commandQueue); - for (auto i=0; i<2u; i++) - { - auto deleteInteropBuffer = [&](InteropBuffer& buffer) -> void - { - m_rrManager->unlinkBuffer(std::move(buffer.asRRBuffer)); - buffer = {}; - }; - deleteInteropBuffer(m_intersectionBuffer[i]); - deleteInteropBuffer(m_rayBuffer[i]); - } - - m_raygenWorkGroups[0] = m_raygenWorkGroups[1] = 0u; - - m_cullPipeline = nullptr; - m_raygenPipeline = nullptr; - m_closestHitPipeline = nullptr; - m_resolvePipeline = nullptr; - - m_staticViewData.imageDimensions[0] = 0u; - m_staticViewData.imageDimensions[1] = 0u; - m_staticViewData.maxPathDepth = DefaultPathDepth; - m_staticViewData.noRussianRouletteDepth = 5u; - m_staticViewData.samplesPerPixelPerDispatch = 1u; - m_staticViewData.hideEnvmap = false; - m_staticViewData.envMapPDFNormalizationFactor = core::infinity(); - m_staticViewData.cascadeParams = {}; - m_totalRaysCast = 0ull; - m_rcpPixelSize = {0.f,0.f}; - m_framesDispatched = 0u; - std::fill_n(m_prevView.pointer(),12u,0.f); - m_prevCamTform = nbl::core::matrix4x3(); -} - -void Renderer::resetSampleAndFrameCounters() -{ - m_totalRaysCast = 0ull; - m_framesDispatched = 0u; - std::fill_n(m_prevView.pointer(),12u,0.f); - m_prevCamTform = nbl::core::matrix4x3(); -} - void Renderer::takeAndSaveScreenShot(const std::filesystem::path& screenshotFilePath, bool denoise, const DenoiserArgs& denoiserArgs) { auto commandQueue = m_rrManager->getCLCommandQueue(); @@ -2160,1032 +2066,4 @@ bool Renderer::traceBounce(uint32_t& raycount) } return true; -} - -const float Renderer::AntiAliasingSequence[Renderer::AntiAliasingSequenceLength][2] = -{ -{0.229027962000000, 0.100901043000000}, -{0.934988661250000, 0.900492937500000}, -{0.693936740750000, 0.477888665000000}, -{0.396013875250000, 0.867381653000000}, -{0.151208663250000, 0.331649132250000}, -{0.919338615000000, 0.306386117750000}, -{0.454737456500000, 0.597940860250000}, -{0.911951413000000, 0.584874565000000}, -{0.471331207500000, 0.117509299250000}, -{0.724981748000000, 0.988645892000000}, -{0.227727943750000, 0.553082892250000}, -{0.927148254750000, 0.059077206250000}, -{0.170420940250000, 0.853803466500000}, -{0.369496963250000, 0.372492160250000}, -{0.709055501500000, 0.719526612750000}, -{0.708593019750000, 0.236308825250000}, -{0.053515783250000, 0.244794542562500}, -{0.759417624125000, 0.846532545187500}, -{0.572365454937500, 0.341559262437500}, -{0.269128942562500, 0.962581831375000}, -{0.246508261687500, 0.286661635812500}, -{0.819542439062500, 0.459099133812500}, -{0.411348913687500, 0.737420359250000}, -{0.896647944437500, 0.717554343125000}, -{0.358057598000000, 0.050206801437500}, -{0.605871046250000, 0.779868041500000}, -{0.036816445812500, 0.506511135625000}, -{0.806931985937500, 0.138270723062500}, -{0.045020470000000, 0.818334270875000}, -{0.433264399500000, 0.254739200375000}, -{0.556258709500000, 0.559776624000000}, -{0.611048395312500, 0.162518625750000}, -{0.028918631812500, 0.053438072375000}, -{0.856252533125000, 0.916712681500000}, -{0.580344816187500, 0.463534157062500}, -{0.291334488000000, 0.774756179000000}, -{0.157847279187500, 0.464948199125000}, -{0.775478249937500, 0.320623736250000}, -{0.306258709500000, 0.653526624000000}, -{0.798533046937500, 0.552896543187500}, -{0.349953270437500, 0.123764825500000}, -{0.534027961437500, 0.969931745937500}, -{0.122488661312500, 0.681742937625000}, -{0.849003468812500, 0.216845413250000}, -{0.145343900750000, 0.962506045625000}, -{0.395929912437500, 0.488477370312500}, -{0.675219736437500, 0.601237158875000}, -{0.728921568625000, 0.053308823500000}, -{0.153721825125000, 0.145597505062500}, -{0.852763510375000, 0.797682223125000}, -{0.644595719312500, 0.367380713687500}, -{0.475934665312500, 0.787623234375000}, -{0.037670496437500, 0.386130180750000}, -{0.916111850937500, 0.403604173437500}, -{0.307256453062500, 0.518207928812500}, -{0.836158139312500, 0.677526975812500}, -{0.291525812500000, 0.197831715312500}, -{0.632543215125000, 0.896220934750000}, -{0.039235045687500, 0.629605464812500}, -{0.927263875375000, 0.179881653187500}, -{0.036335975187500, 0.990626511375000}, -{0.458406617875000, 0.372877193062500}, -{0.545614665812500, 0.676662283062500}, -{0.606815968812500, 0.044970413250000}, -{0.031533697125000, 0.184836288625000}, -{0.943869562500000, 0.830155934062500}, -{0.607026984312500, 0.286243495000000}, -{0.385468447812500, 0.923477959062500}, -{0.211591778000000, 0.432717372437500}, -{0.959561740812500, 0.477888665062500}, -{0.340921091062500, 0.599871303750000}, -{0.770926812125000, 0.740443845937500}, -{0.492972183312500, 0.243769330562500}, -{0.520086204062500, 0.865883539250000}, -{0.194132187625000, 0.711586172812500}, -{0.867832801875000, 0.029377324812500}, -{0.018898352500000, 0.755166315812500}, -{0.294110519250000, 0.340476317312500}, -{0.645436781125000, 0.669120978187500}, -{0.537010584750000, 0.070669853500000}, -{0.161951413000000, 0.209874565062500}, -{0.786335975187500, 0.990626511375000}, -{0.525681985937500, 0.419520723062500}, -{0.287619562500000, 0.834550465312500}, -{0.100299557750000, 0.367542953000000}, -{0.787670496437500, 0.386130180750000}, -{0.425010132750000, 0.666850725937500}, -{0.959417841312500, 0.712724761625000}, -{0.259027114250000, 0.027505482375000}, -{0.706747124500000, 0.863983912687500}, -{0.118758709500000, 0.559776624000000}, -{0.979834653750000, 0.076596529437500}, -{0.076814113250000, 0.879551982187500}, -{0.458038607062500, 0.495297691687500}, -{0.676899749875000, 0.533654791000000}, -{0.739509651750000, 0.162886922875000}, -{0.130635833000000, 0.032884578937500}, -{0.995486845875000, 0.879726983937500}, -{0.681683761437500, 0.415213866187500}, -{0.471888733500000, 0.975077322375000}, -{0.002080578437500, 0.292317740812500}, -{0.982026984312500, 0.286243495000000}, -{0.291525812500000, 0.713456715312500}, -{0.803515783250000, 0.619794542562500}, -{0.363736251000000, 0.241491573500000}, -{0.581375603187500, 0.850024182625000}, -{0.126134788437500, 0.739345154625000}, -{0.807256990625000, 0.025225260812500}, -{0.214063133312500, 0.979178170312500}, -{0.279120068187500, 0.455460706437500}, -{0.521614411125000, 0.748128257250000}, -{0.541375661500000, 0.191916865812500}, -{0.092374240812500, 0.093123040062500}, -{0.819780017000000, 0.863865176562500}, -{0.723535390937500, 0.290673655562500}, -{0.333626471625000, 0.991508772375000}, -{0.180081879937500, 0.273337083437500}, -{0.884853249937500, 0.353826861250000}, -{0.486489450437500, 0.649922456187500}, -{0.970355124125000, 0.588720045187500}, -{0.411054041562500, 0.190728892687500}, -{0.670557598000000, 0.782628676437500}, -{0.176686781125000, 0.590995978187500}, -{0.923484185187500, 0.119472166250000}, -{0.229834653750000, 0.826596529437500}, -{0.402229645500000, 0.427815757250000}, -{0.614887300500000, 0.582390020187500}, -{0.721331207625000, 0.117509299250000}, -{0.221780261562500, 0.160787322875000}, -{0.980871046250000, 0.779868041500000}, -{0.521614411125000, 0.498128257250000}, -{0.462698109750000, 0.855009158437500}, -{0.102148981812500, 0.485181351500000}, -{0.790505847500000, 0.272359588500000}, -{0.357263913000000, 0.553624565062500}, -{0.852875617687500, 0.518271589687500}, -{0.412788910312500, 0.072860258937500}, -{0.739509651750000, 0.912886922875000}, -{0.244715387500000, 0.610882883562500}, -{0.931245437000000, 0.247161473000000}, -{0.118495619500000, 0.827404835625000}, -{0.356241537562500, 0.307793951312500}, -{0.739954645312500, 0.601971750750000}, -{0.652229645500000, 0.240315757250000}, -{0.085230272750000, 0.149967825937500}, -{0.790487853250000, 0.802468641250000}, -{0.742972183312500, 0.431269330562500}, -{0.338023546687500, 0.864140358375000}, -{0.161359195250000, 0.386030244500000}, -{0.979622565375000, 0.415764143437500}, -{0.344324410875000, 0.743490102812500}, -{0.850000234687500, 0.588036936812500}, -{0.478921568625000, 0.053308823500000}, -{0.575878945812500, 0.904948635625000}, -{0.066809364125000, 0.711985215062500}, -{0.842374240812500, 0.093123040062500}, -{0.072833622000000, 0.943689044750000}, -{0.473982478062500, 0.309619342562500}, -{0.643468702500000, 0.727011596187500}, -{0.661784804062500, 0.096504548000000}, -{0.075593410125000, 0.020665263437500}, -{0.846367111937500, 0.980869489750000}, -{0.584417841312500, 0.402177886625000}, -{0.419264650000000, 0.807665176000000}, -{0.108911798812500, 0.274823750687500}, -{0.842214949125000, 0.395649388625000}, -{0.424460011250000, 0.541515061937500}, -{0.914875915625000, 0.525088448500000}, -{0.276815978250000, 0.138406141250000}, -{0.682946765937500, 0.941192325375000}, -{0.243922631125000, 0.674414353500000}, -{0.983747165312500, 0.225123234375000}, -{0.209039534812500, 0.919381743937500}, -{0.317979460312500, 0.396931190375000}, -{0.595789254625000, 0.645833852812500}, -{0.589063133312500, 0.229178170312500}, -{0.201732996437500, 0.034567680750000}, -{0.911951413000000, 0.959874565062500}, -{0.669465614812500, 0.307270670687500}, -{0.442773254937500, 0.918452206312500}, -{0.228659227750000, 0.498372525687500}, -{0.864786425062500, 0.258916160937500}, -{0.366015783250000, 0.682294542562500}, -{0.832368054687500, 0.749523853312500}, -{0.475148582250000, 0.180782790250000}, -{0.543804934062500, 0.806559289687500}, -{0.041831345187500, 0.574164114062500}, -{0.981787063687500, 0.014769301562500}, -{0.167325380812500, 0.796656456375000}, -{0.305883847687500, 0.260168413875000}, -{0.736593646187500, 0.544303438875000}, -{0.595631980687500, 0.113338942562500}, -{0.233747165312500, 0.225123234375000}, -{0.881420496437500, 0.854880180750000}, -{0.514120916562500, 0.361726452125000}, -{0.262088408375000, 0.897305108937500}, -{0.040764352812500, 0.448613484812500}, -{0.882527922875000, 0.453355770312500}, -{0.486593646187500, 0.544303438875000}, -{0.944193581437500, 0.650074503000000}, -{0.403764392000000, 0.003513614062500}, -{0.647805652187500, 0.839498260375000}, -{0.004346402437500, 0.700568695812500}, -{0.863684364125000, 0.149485215062500}, -{0.075593410125000, 0.770665263437500}, -{0.260573230937500, 0.378374937125000}, -{0.606947383687500, 0.518907935937500}, -{0.522543332062500, 0.131538315687500}, -{0.115674527437500, 0.213752289562500}, -{0.978861550187500, 0.943531534250000}, -{0.716222608750000, 0.357993041500000}, -{0.396123640562500, 0.988911053812500}, -{0.116417439062500, 0.427849133812500}, -{0.960375986437500, 0.355143408875000}, -{0.396123640562500, 0.613911053812500}, -{0.771872079375000, 0.679610301562500}, -{0.407651999187500, 0.129979780250000}, -{0.610967390562500, 0.957538983500000}, -{0.099030910125000, 0.622227763437500}, -{0.792605235062500, 0.213450866625000}, -{0.231787063687500, 0.764769301562500}, -{0.345102996500000, 0.451097146437500}, -{0.537639116937500, 0.609792796562500}, -{0.670557598000000, 0.032628676437500}, -{0.161148929812500, 0.091845178375000}, -{0.915446201000000, 0.774126136937500}, -{0.542495165812500, 0.275647345250000}, -{0.316935423562500, 0.923529210750000}, -{0.209068937250000, 0.340693571625000}, -{0.770926812125000, 0.490443845937500}, -{0.462732614000000, 0.712224844000000}, -{0.887121046250000, 0.643149291500000}, -{0.302373640562500, 0.082661053812500}, -{0.728921568625000, 0.803308823500000}, -{0.181258709500000, 0.653526624000000}, -{0.977987853250000, 0.146218641250000}, -{0.016702341062500, 0.927996303750000}, -{0.467374240812500, 0.431013665062500}, -{0.706158315437500, 0.659556033875000}, -{0.669301523312500, 0.186625825562500}, -{0.039461819812500, 0.116237049750000}, -{0.798533046937500, 0.927896543187500}, -{0.636245165812500, 0.463147345250000}, -{0.358057598000000, 0.800206801437500}, -{0.057953992187500, 0.323742603312500}, -{0.838196808062500, 0.318240323625000}, -{0.288441098000000, 0.563378403500000}, -{0.981947383687500, 0.518907935937500}, -{0.348181288187500, 0.183212688250000}, -{0.506420496437500, 0.917380180750000}, -{0.164875915625000, 0.525088448500000}, -{0.787802165812500, 0.082912283062500}, -{0.134409779187500, 0.902448199125000}, -{0.408376747562500, 0.326245734125000}, -{0.584561740812500, 0.727888665062500}, -{0.534266910125000, 0.009900787187500}, -{0.192731703031250, 0.122610961484375}, -{0.886403666453125, 0.919165570765625}, -{0.740362459437500, 0.479082682703125}, -{0.381143881328125, 0.825372076343750}, -{0.177058600328125, 0.355660703968750}, -{0.880411986109375, 0.304388585781250}, -{0.489954645359375, 0.601971750750000}, -{0.931245437015625, 0.622161472968750}, -{0.495691442609375, 0.089534956375000}, -{0.748617226093750, 0.964032599359375}, -{0.200716102781250, 0.538594564312500}, -{0.902762098828125, 0.040629656437500}, -{0.167366403703125, 0.826817667671875}, -{0.326986691484375, 0.360298081343750}, -{0.725793624375000, 0.689620323765625}, -{0.725003755000000, 0.197653489734375}, -{0.019203528312500, 0.219887995546875}, -{0.789461819796875, 0.866237049781250}, -{0.602987853250000, 0.364968641265625}, -{0.286530910140625, 0.997227763453125}, -{0.197691088203125, 0.299653371203125}, -{0.864509651765625, 0.475386922921875}, -{0.409509265921875, 0.695098575390625}, -{0.924966150343750, 0.731175805390625}, -{0.366549151562500, 0.016182260046875}, -{0.620446765921875, 0.753692325390625}, -{0.004126035234375, 0.512365679515625}, -{0.759204111453125, 0.126627783906250}, -{0.039461819796875, 0.866237049781250}, -{0.385324830531250, 0.282537197156250}, -{0.529500805046875, 0.539659192578125}, -{0.620486845921875, 0.129726983984375}, -{0.040287232453125, 0.022961294593750}, -{0.871648411546875, 0.886075859718750}, -{0.567731703031250, 0.497610961484375}, -{0.271219649968750, 0.785940731265625}, -{0.149813081953125, 0.495059302156250}, -{0.752354406031250, 0.336633136296875}, -{0.267950605953125, 0.630717656421875}, -{0.763064970921875, 0.526211358984375}, -{0.350302165859375, 0.082912283093750}, -{0.541375661546875, 0.941916865828125}, -{0.067888875390625, 0.648631653203125}, -{0.817282235640625, 0.240645457843750}, -{0.176686781125000, 0.965995978171875}, -{0.414407018781250, 0.458335411421875}, -{0.635381359671875, 0.622395865796875}, -{0.696580598671875, 0.010305563015625}, -{0.146140435203125, 0.181972166265625}, -{0.853197227578125, 0.768215064734375}, -{0.631158461921875, 0.330667103468750}, -{0.443098689046875, 0.770526325000000}, -{0.008860189078125, 0.404241883828125}, -{0.920499240812500, 0.436873040078125}, -{0.274931749687500, 0.517395920968750}, -{0.872488661328125, 0.681742937687500}, -{0.273658139312500, 0.240026975812500}, -{0.686178846875000, 0.902720720890625}, -{0.022328994562500, 0.659601535312500}, -{0.889064677375000, 0.139944156000000}, -{0.041831345203125, 0.949164114093750}, -{0.443262299140625, 0.313206182765625}, -{0.553107937421875, 0.637161480234375}, -{0.576361266343750, 0.010049207671875}, -{0.024757727531250, 0.155556940859375}, -{0.954885609765625, 0.864774783453125}, -{0.576988498046875, 0.268435650828125}, -{0.378272153750000, 0.889096529468750}, -{0.243922631171875, 0.424414353515625}, -{0.993449504812500, 0.487462829328125}, -{0.315047772046875, 0.590538342781250}, -{0.757436479140625, 0.715431613031250}, -{0.454737456671875, 0.222940860375000}, -{0.506538910328125, 0.822860258968750}, -{0.223798019234375, 0.699851317375000}, -{0.839514399500000, 0.012551700359375}, -{0.013378945812500, 0.811198635640625}, -{0.259404890625000, 0.333637616328125}, -{0.674460011265625, 0.635265061968750}, -{0.552179912484375, 0.113477370312500}, -{0.133506990359375, 0.242482936484375}, -{0.792605235078125, 0.963450866640625}, -{0.556245437015625, 0.434661472968750}, -{0.302640369765625, 0.866357903265625}, -{0.104025812484375, 0.322831715312500}, -{0.788292439093750, 0.420036633796875}, -{0.383947288453125, 0.645595154640625}, -{0.987679025703125, 0.720586141078125}, -{0.310166960328125, 0.049274940406250}, -{0.692634651765625, 0.826949422921875}, -{0.066739180750000, 0.551367061812500}, -{0.954885609765625, 0.114774783453125}, -{0.106252533187500, 0.916712681484375}, -{0.490362459437500, 0.479082682703125}, -{0.646028602781250, 0.509297689312500}, -{0.696508847734375, 0.182043413890625}, -{0.167639399500000, 0.008157169109375}, -{0.942731703031250, 0.935110961484375}, -{0.682010510203125, 0.383364961312500}, -{0.444750986453125, 0.993815283906250}, -{0.012293182515625, 0.265019109265625}, -{0.943520700468750, 0.285664643703125}, -{0.256436740812500, 0.727888665078125}, -{0.792605235078125, 0.588450866640625}, -{0.323828669343750, 0.228345414000000}, -{0.589727949703125, 0.818705937671875}, -{0.146647944500000, 0.717554343109375}, -{0.763378945812500, 0.061198635640625}, -{0.245217761562500, 0.944967010375000}, -{0.302009651765625, 0.475386922921875}, -{0.508157018781250, 0.708335411421875}, -{0.552058600328125, 0.230660703968750}, -{0.076470961921875, 0.065042103468750}, -{0.839060384390625, 0.826948487828125}, -{0.743383847734375, 0.260168413890625}, -{0.361729406031250, 0.961633136296875}, -{0.130411986109375, 0.304388585781250}, -{0.913057411375000, 0.372578939312500}, -{0.450791960328125, 0.635700721656250}, -{0.994715387546875, 0.610882883562500}, -{0.396123640625000, 0.238911053828125}, -{0.635171568625000, 0.803308823531250}, -{0.134436274500000, 0.588235294109375}, -{0.893091363734375, 0.085389815609375}, -{0.204885609765625, 0.864774783453125}, -{0.419763913046875, 0.397374565093750}, -{0.589063133281250, 0.604178170375000}, -{0.692634651765625, 0.076949422921875}, -{0.192731703031250, 0.185110961484375}, -{0.951814247656250, 0.756306315203125}, -{0.506689186515625, 0.439765218421875}, -{0.456461826015625, 0.821001137000000}, -{0.083707248625000, 0.461775383828125}, -{0.764249240812500, 0.280623040078125}, -{0.323579912671875, 0.557221730578125}, -{0.818079645359375, 0.508221750750000}, -{0.435674328421875, 0.095052115796875}, -{0.725148582281250, 0.930782790234375}, -{0.211591777984375, 0.620217372437500}, -{0.901467761562500, 0.194967010375000}, -{0.114509651765625, 0.873824422921875}, -{0.350302165859375, 0.270412283093750}, -{0.713196808109375, 0.568240323640625}, -{0.631158461921875, 0.205667103468750}, -{0.121648411546875, 0.136075859718750}, -{0.807256990687500, 0.775225260828125}, -{0.748441255000000, 0.385153489734375}, -{0.322881990687500, 0.822100260828125}, -{0.170499240812500, 0.436873040078125}, -{0.976735045687500, 0.379605464812500}, -{0.326988498046875, 0.705935650828125}, -{0.849030910140625, 0.622227763453125}, -{0.456628942609375, 0.025081831375000}, -{0.603718978906250, 0.881272112125000}, -{0.087671365468750, 0.733711546609375}, -{0.858316099875000, 0.063684800093750}, -{0.079233855890625, 0.980882302687500}, -{0.498831558375000, 0.275019330593750}, -{0.683006746078125, 0.696560873750000}, -{0.634421685203125, 0.070155760015625}, -{0.100941098000000, 0.000878403515625}, -{0.868983666328125, 0.946905808593750}, -{0.622823333015625, 0.407884578921875}, -{0.380701413046875, 0.772374565093750}, -{0.070966777984375, 0.276467372437500}, -{0.869730392156250, 0.411764705875000}, -{0.390973727828125, 0.533716984406250}, -{0.934919978109375, 0.561328326953125}, -{0.267725152796875, 0.170350753109375}, -{0.650556467859375, 0.939171186375000}, -{0.208092013671875, 0.656130963328125}, -{0.939854406031250, 0.211633136296875}, -{0.227987853250000, 0.896218641265625}, -{0.362037827187500, 0.411778179156250}, -{0.575564970921875, 0.682461358984375}, -{0.603861550250000, 0.193531534234375}, -{0.245936791328125, 0.056280808593750}, -{0.931245437015625, 0.997161472968750}, -{0.674341363734375, 0.272889815609375}, -{0.475148582281250, 0.930782790234375}, -{0.196690920375000, 0.490305748390625}, -{0.823073230953125, 0.290484312156250}, -{0.349801672015625, 0.643614082703125}, -{0.816809364156250, 0.711985215093750}, -{0.442773254953125, 0.168452206343750}, -{0.559175124515625, 0.768645847968750}, -{0.012608312281250, 0.564660480046875}, -{0.951732996484375, 0.034567680765625}, -{0.130635833015625, 0.782884578921875}, -{0.295859197828125, 0.295320202140625}, -{0.712431749687500, 0.517395920968750}, -{0.572626461875000, 0.068089897125000}, -{0.211591777984375, 0.245217372437500}, -{0.901756746078125, 0.821560873750000}, -{0.512364601359375, 0.315328221531250}, -{0.275838593406250, 0.932598880093750}, -{0.007956102718750, 0.451497525703125}, -{0.924966150343750, 0.481175805390625}, -{0.454495282953125, 0.559257955593750}, -{0.978187903312500, 0.673257136171875}, -{0.416103249937500, 0.041326861265625}, -{0.664447403859375, 0.864416693968750}, -{0.033521988046875, 0.696631179015625}, -{0.852837228421875, 0.184355089812500}, -{0.090934062750000, 0.810372893375000}, -{0.275746963312500, 0.411554660312500}, -{0.588037245453125, 0.558795337875000}, -{0.554922654015625, 0.160357524531250}, -{0.072833622000000, 0.193689044750000}, -{0.964063133281250, 0.979178170375000}, -{0.708517234312500, 0.319548392906250}, -{0.432256990687500, 0.962725260828125}, -{0.068079645359375, 0.414471750750000}, -{0.963190877593750, 0.324420555781250}, -{0.411502470921875, 0.573086358984375}, -{0.800162124781250, 0.669611615750000}, -{0.387554934109375, 0.150309289718750}, -{0.579945004250000, 0.965966294140625}, -{0.065522102093750, 0.599326277234375}, -{0.761255117500000, 0.204583567718750}, -{0.196950541453125, 0.770728070765625}, -{0.344324410937500, 0.493490102843750}, -{0.510111550250000, 0.568531534234375}, -{0.636389399500000, 0.008157169109375}, -{0.128530229140625, 0.090431613031250}, -{0.883566727531250, 0.752475196796875}, -{0.552206093281250, 0.309003302484375}, -{0.348181288187500, 0.933212688265625}, -{0.227987853250000, 0.364968641265625}, -{0.771924527437500, 0.448127289609375}, -{0.489679912484375, 0.745313307812500}, -{0.927148254953125, 0.684077206343750}, -{0.264066255000000, 0.103903489734375}, -{0.740057568187500, 0.764054456484375}, -{0.148947313656250, 0.630208463203125}, -{0.974161986109375, 0.179388585781250}, -{0.010457836296875, 0.893541028515625}, -{0.498441255000000, 0.385153489734375}, -{0.744468904421875, 0.637380397421875}, -{0.678301531546875, 0.136423458078125}, -{0.010191088203125, 0.112153371203125}, -{0.774757727531250, 0.905556940859375}, -{0.674229406031250, 0.446008136296875}, -{0.319922419343750, 0.784986039000000}, -{0.011042923812500, 0.349437248625000}, -{0.821379264859375, 0.354136628500000}, -{0.257237357453125, 0.579287821171875}, -{0.948151308765625, 0.522112716656250}, -{0.318520700468750, 0.160664643703125}, -{0.543804934109375, 0.900309289718750}, -{0.130607996843750, 0.519919121093750}, -{0.811627065421875, 0.071665408953125}, -{0.160867175625000, 0.931752899046875}, -{0.428297719390625, 0.362355138953125}, -{0.609505036140625, 0.690144858781250}, -{0.504804041609375, 0.003228892687500}, -{0.216196606265625, 0.064729040234375}, -{0.901736845921875, 0.879726983984375}, -{0.708719649968750, 0.453909481265625}, -{0.415337952218750, 0.849024716109375}, -{0.134853249937500, 0.353826861265625}, -{0.903787097500000, 0.267080047484375}, -{0.479025812484375, 0.572831715312500}, -{0.876605124109375, 0.604345045187500}, -{0.456461826015625, 0.071001137000000}, -{0.709494562484375, 0.955644215312500}, -{0.231947383703125, 0.518907935984375}, -{0.932230392156250, 0.013327205875000}, -{0.145086204046875, 0.865883539265625}, -{0.350930456281250, 0.348899376265625}, -{0.725034838203125, 0.739106496203125}, -{0.739954645359375, 0.226971750750000}, -{0.042605235078125, 0.213450866640625}, -{0.811627065421875, 0.821665408953125}, -{0.587735274500000, 0.312719600875000}, -{0.307230392156250, 0.950827205875000}, -{0.217487057734375, 0.273792953031250}, -{0.854401308765625, 0.440081466656250}, -{0.384747995484375, 0.706561433531250}, -{0.899206688062500, 0.699456330843750}, -{0.334068937265625, 0.028193571656250}, -{0.576864665859375, 0.801662283093750}, -{0.041360180171875, 0.539240991515625}, -{0.780622165328125, 0.170435734406250}, -{0.025074889437500, 0.841885738250000}, -{0.412686740812500, 0.274763665078125}, -{0.551686781125000, 0.512870978171875}, -{0.574633261734375, 0.138224135796875}, -{0.058436791328125, 0.056280808593750}, -{0.822110274500000, 0.890844600875000}, -{0.618449504812500, 0.487462829328125}, -{0.264066255000000, 0.807028489734375}, -{0.132527922859375, 0.453355770328125}, -{0.807953992203125, 0.323742603312500}, -{0.302148254953125, 0.684077206343750}, -{0.794171695281250, 0.522748994546875}, -{0.372686140625000, 0.110004803828125}, -{0.507741981953125, 0.951245734125000}, -{0.107097304093750, 0.651192048015625}, -{0.822833622000000, 0.193689044750000}, -{0.181245437015625, 0.997161472968750}, -{0.384747995484375, 0.456561433531250}, -{0.662226998781250, 0.569583683906250}, -{0.727197083078125, 0.021632137984375}, -{0.184988661328125, 0.150492937687500}, -{0.873243045828125, 0.810942332640625}, -{0.684963615078125, 0.357045297593750}, -{0.461525611203125, 0.759528012437500}, -{0.025718904421875, 0.410817897421875}, -{0.897009311359375, 0.420948834468750}, -{0.263037063734375, 0.546019301578125}, -{0.857097304093750, 0.651192048015625}, -{0.252866199843750, 0.205957512640625}, -{0.665602115484375, 0.895166775859375}, -{0.056996963312500, 0.684992160312500}, -{0.918804934109375, 0.150309289718750}, -{0.019203528312500, 0.969887995546875}, -{0.485853633703125, 0.339220435984375}, -{0.509352115484375, 0.676416775859375}, -{0.564952190359375, 0.039350341546875}, -{0.061178846875000, 0.152720720890625}, -{0.979027962734375, 0.850901043359375}, -{0.618986693593750, 0.251067502968750}, -{0.416788412578125, 0.890801763843750}, -{0.191935423609375, 0.392279210781250}, -{0.959424527437500, 0.448127289609375}, -{0.360967390625000, 0.582538983515625}, -{0.802390435203125, 0.744472166265625}, -{0.498617226093750, 0.214032599359375}, -{0.542495165859375, 0.838147345281250}, -{0.211457644609375, 0.742513028953125}, -{0.837488317609375, 0.030941206375000}, -{0.038430456281250, 0.786399376265625}, -{0.290324504812500, 0.370275329328125}, -{0.678301531546875, 0.667673458078125}, -{0.510613942796875, 0.106955928328125}, -{0.170736691484375, 0.235298081343750}, -{0.759083993796875, 0.997656627843750}, -{0.539572313656250, 0.380208463203125}, -{0.255388875390625, 0.867381653203125}, -{0.071379264859375, 0.354136628500000}, -{0.758860189078125, 0.404241883828125}, -{0.420321786390625, 0.636298924375000}, -{0.978659227718750, 0.748372525703125}, -{0.287503755000000, 0.010153489734375}, -{0.739679912484375, 0.870313307812500}, -{0.089315978250000, 0.513406141265625}, -{0.943869562484375, 0.080155934062500}, -{0.076564677375000, 0.913381656000000}, -{0.444542439093750, 0.459099133796875}, -{0.633162999796875, 0.540307445062500}, -{0.709818581500000, 0.157887002984375}, -{0.167325380828125, 0.046656456390625}, -{0.977987853250000, 0.896218641265625}, -{0.652229645515625, 0.427815757218750}, -{0.492972183375000, 0.993769330593750}, -{0.040505847500000, 0.272359588500000}, -{0.962978249937500, 0.260076861265625}, -{0.302640369765625, 0.741357903265625}, -{0.768208405500000, 0.610922261187500}, -{0.318273390046875, 0.193320190000000}, -{0.619905641343750, 0.853941035859375}, -{0.181258709546875, 0.747276624031250}, -{0.757229657953125, 0.013359518093750}, -{0.244715387546875, 0.985882883562500}, -{0.259008847734375, 0.494543413890625}, -{0.554055652187500, 0.714498260375000}, -{0.534027961468750, 0.219931745984375}, -{0.069780017046875, 0.113865176609375}, -{0.848982478109375, 0.872119342578125}, -{0.713196808109375, 0.271365323640625}, -{0.363736251062500, 0.991491573531250}, -{0.150433761421875, 0.282401366203125}, -{0.901208663437500, 0.331649132359375}, -{0.463675308375000, 0.681269330593750}, -{0.949633261734375, 0.606974135796875}, -{0.384851754687500, 0.208333852843750}, -{0.668614601359375, 0.752828221531250}, -{0.181245437015625, 0.622161472968750}, -{0.895086204046875, 0.115883539265625}, -{0.193869562484375, 0.830155934062500}, -{0.387335340921875, 0.396347453890625}, -{0.564854406031250, 0.586633136296875}, -{0.745691442609375, 0.089534956375000}, -{0.245486845921875, 0.129726983984375}, -{0.982811359250000, 0.811790368250000}, -{0.536503468843750, 0.474657913296875}, -{0.489679912484375, 0.870313307812500}, -{0.067888875390625, 0.492381653203125}, -{0.751398662484375, 0.307813307812500}, -{0.352987853250000, 0.521218641265625}, -{0.869468904421875, 0.543630397421875}, -{0.400000938734375, 0.102147747421875}, -{0.716287097500000, 0.892080047484375}, -{0.204945004250000, 0.590966294140625}, -{0.883506990359375, 0.242482936484375}, -{0.065143307734375, 0.844593734281250}, -{0.327001686515625, 0.299140218421875}, -{0.748446786390625, 0.573798924375000}, -{0.665686274500000, 0.213235294109375}, -{0.115683153500000, 0.178056211000000}, -{0.757883424281250, 0.796209072156250}, -{0.713222390562500, 0.397222857359375}, -{0.362679025703125, 0.845586141078125}, -{0.147009311359375, 0.420948834468750}, -{0.954472218843750, 0.404345413296875}, -{0.363836204046875, 0.740883539265625}, -{0.821904890625000, 0.583637616328125}, -{0.492832801906250, 0.029377324812500}, -{0.599161986109375, 0.929388585781250}, -{0.096331207625000, 0.695634299281250}, -{0.848982478109375, 0.122119342578125}, -{0.099003468843750, 0.966845413296875}, -{0.462431749687500, 0.267395920968750}, -{0.677354460328125, 0.725544471656250}, -{0.650008709546875, 0.122276624031250}, -{0.123243045828125, 0.060942332640625}, -{0.822833622000000, 0.943689044750000}, -{0.586591777984375, 0.432717372437500}, -{0.408903485687500, 0.776738982078125}, -{0.080479406031250, 0.305383136296875}, -{0.818079645359375, 0.414471750750000}, -{0.431931985968750, 0.513270723078125}, -{0.903169756421875, 0.555146535265625}, -{0.285517059468750, 0.166546514046875}, -{0.675219736453125, 0.976237158906250}, -{0.238942076937500, 0.643155269500000}, -{0.964063133281250, 0.229178170375000}, -{0.192731703031250, 0.935110961484375}, -{0.348181288187500, 0.401962688265625}, -{0.618003220203125, 0.658636770343750}, -{0.588190877593750, 0.199420555781250}, -{0.216958200468750, 0.007344331203125}, -{0.883506990359375, 0.992482936484375}, -{0.636042923812500, 0.294749748625000}, -{0.481304934109375, 0.900309289718750}, -{0.237679025703125, 0.470586141078125}, -{0.831750421625000, 0.262285054609375}, -{0.341264392046875, 0.675388614109375}, -{0.864509651765625, 0.725386922921875}, -{0.481304934109375, 0.150309289718750}, -{0.507180456281250, 0.786399376265625}, -{0.033602444796875, 0.600612049781250}, -{0.962250867203125, 0.054211353312500}, -{0.151703992203125, 0.761242603312500}, -{0.261464799453125, 0.261330050531250}, -{0.740667841375000, 0.511552886640625}, -{0.615093996609375, 0.088785852218750}, -{0.228861550250000, 0.193531534234375}, -{0.920420940359375, 0.853803466546875}, -{0.541111850968750, 0.345010423484375}, -{0.305472183375000, 0.900019330593750}, -{0.044319650703125, 0.478886922328125}, -{0.892725152796875, 0.482850753109375}, -{0.490667841375000, 0.511552886640625}, -{0.971780261562500, 0.629537322875000}, -{0.387554934109375, 0.056559289718750}, -{0.662939186515625, 0.814765218421875}, -{0.052390435203125, 0.744472166265625}, -{0.826564677375000, 0.163381656000000}, -{0.103197227578125, 0.768215064734375}, -{0.306265020015625, 0.415613958984375}, -{0.575564970921875, 0.526211358984375}, -{0.505411986109375, 0.183294835781250}, -{0.079233855890625, 0.230882302687500}, -{0.939854406031250, 0.961633136296875}, -{0.747488661328125, 0.369242937687500}, -{0.399926992500000, 0.954583567718750}, -{0.122446606265625, 0.392854040234375}, -{0.987719736453125, 0.351237158906250}, -{0.425219736453125, 0.601237158906250}, -{0.774082801906250, 0.642658574812500}, -{0.416299322109375, 0.161398788656250}, -{0.602987853250000, 0.989968641265625}, -{0.089514399500000, 0.575051700359375}, -{0.786335975187500, 0.240626511406250}, -{0.231815968843750, 0.794970413296875}, -{0.318750014656250, 0.443002308546875}, -{0.505566445812500, 0.623698635640625}, -{0.637067640531250, 0.039603148984375}, -{0.180883847734375, 0.072668413890625}, -{0.917325380828125, 0.796656456390625}, -{0.526756746078125, 0.259060873750000}, -{0.334878599875000, 0.893762925093750}, -{0.225425998046875, 0.315310650828125}, -{0.794319650703125, 0.478886922328125}, -{0.445957801906250, 0.724689824812500}, -{0.915440720703125, 0.656963966125000}, -{0.302640369765625, 0.116357903265625}, -{0.698976641187500, 0.774455388406250}, -{0.177148254953125, 0.684077206343750}, -{0.949633261734375, 0.138224135796875}, -{0.045314677375000, 0.913381656000000}, -{0.458038607125000, 0.401547691687500}, -{0.713980484156250, 0.625879926296875}, -{0.643255797593750, 0.155068738921875}, -{0.025074889437500, 0.091885738250000}, -{0.766702341031250, 0.927996303765625}, -{0.664447403859375, 0.489416693968750}, -{0.352787232453125, 0.772961294593750}, -{0.025478249937500, 0.320623736265625}, -{0.854074830531250, 0.352849697156250}, -{0.286530910140625, 0.622227763453125}, -{0.977727943875000, 0.553082892328125}, -{0.350685461906250, 0.153596186453125}, -{0.557230392156250, 0.880514705875000}, -{0.153169756421875, 0.555146535265625}, -{0.789461819796875, 0.116237049781250}, -{0.168804934109375, 0.900309289718750}, -{0.377355421296875, 0.330038940000000}, -{0.612679025703125, 0.720586141078125}, -{0.508506990359375, 0.054982936484375}, -{0.196917624109375, 0.096532545187500}, -{0.910867175625000, 0.931752899046875}, -{0.728171685203125, 0.443690916265625}, -{0.435674328421875, 0.845052115796875}, -{0.182230392156250, 0.325827205875000}, -{0.884087952218750, 0.286524716109375}, -{0.448834987281250, 0.579381097156250}, -{0.884436274500000, 0.588235294109375}, -{0.439752211921875, 0.123635853468750}, -{0.688559795171875, 0.981591765453125}, -{0.198151308765625, 0.522112716656250}, -{0.901703992203125, 0.011242603312500}, -{0.128530229140625, 0.840431613031250}, -{0.317826892046875, 0.321872989109375}, -{0.696508847734375, 0.744543413890625}, -{0.688847218843750, 0.216845413296875}, -{0.004724588125000, 0.188791578953125}, -{0.787802165859375, 0.832912283093750}, -{0.567052101359375, 0.371480565281250}, -{0.302148254953125, 0.996577206343750}, -{0.243986693593750, 0.251067502968750}, -{0.825795788375000, 0.474201552750000}, -{0.430456688062500, 0.699456330843750}, -{0.931258709546875, 0.747276624031250}, -{0.325564970921875, 0.057461358984375}, -{0.575724486109375, 0.777044835781250}, -{0.058436791328125, 0.525030808593750}, -{0.759808761421875, 0.157401366203125}, -{0.010191088203125, 0.862153371203125}, -{0.411476654468750, 0.296344298265625}, -{0.505607996843750, 0.519919121093750}, -{0.563898662484375, 0.182813307812500}, -{0.007229657953125, 0.013359518093750}, -{0.826564677375000, 0.913381656000000}, -{0.615744562484375, 0.440019215312500}, -{0.273049196593750, 0.752824731078125}, -{0.126134788453125, 0.489345154640625}, -{0.776528750687500, 0.346344691359375}, -{0.271093019843750, 0.673808825390625}, -{0.766504140937500, 0.549462718109375}, -{0.321690920375000, 0.115305748390625}, -{0.552058600328125, 0.980660703968750}, -{0.086158139312500, 0.677526975812500}, -{0.868983666328125, 0.196905808593750}, -{0.133506990359375, 0.992482936484375}, -{0.428389216390625, 0.479768192062500}, -{0.642183234343750, 0.594837245046875}, -{0.693414534828125, 0.060006743953125}, -{0.177263875390625, 0.179881653203125}, -{0.822881453125000, 0.799457928828125}, -{0.657602996515625, 0.342698708937500}, -{0.447833622000000, 0.795251544750000}, -{0.049908315421875, 0.432993533953125}, -{0.884744019140625, 0.384203634359375}, -{0.303495121031250, 0.531469981562500}, -{0.829156508796875, 0.635903919875000}, -{0.279003945812500, 0.225261135640625}, -{0.626154293906250, 0.912625724750000}, -{0.009546365468750, 0.639961546609375}, -{0.886403666453125, 0.169165570765625}, -{0.052259883703125, 0.979845435984375}, -{0.459494562484375, 0.330644215312500}, -{0.524813081953125, 0.651309302156250}, -{0.606787063734375, 0.014769301578125}, -{0.010457836296875, 0.143541028515625}, -{0.947626461875000, 0.818089897125000}, -{0.571917624109375, 0.284032545187500}, -{0.416299322109375, 0.911398788656250}, -{0.236033046906250, 0.396646543203125}, -{0.990797719390625, 0.456105138953125}, -{0.333626471656250, 0.616508772390625}, -{0.787209695250000, 0.716482502812500}, -{0.439756778562500, 0.194376370593750}, -{0.552179912484375, 0.863477370312500}, -{0.243449504812500, 0.737462829328125}, -{0.822881453125000, 0.049457928828125}, -{0.058436791328125, 0.806280808593750}, -{0.267237456671875, 0.347940860375000}, -{0.659100916609375, 0.628228892687500}, -{0.525109796625000, 0.082597554609375}, -{0.135951233515625, 0.201639822421875}, -{0.761255117500000, 0.954583567718750}, -{0.556490320328125, 0.399823343937500}, -{0.258466777984375, 0.838967372437500}, -{0.104074830531250, 0.352849697156250}, -{0.803389216390625, 0.386018192062500}, -{0.392068237890625, 0.666186056234375}, -{0.984505036140625, 0.690144858781250}, -{0.277932351500000, 0.052908284062500}, -{0.713196808109375, 0.833865323640625}, -{0.102915496515625, 0.537034646437500}, -{0.979027962734375, 0.100901043359375}, -{0.106115002812500, 0.885378765484375}, -{0.473302101359375, 0.455953221531250}, -{0.685199429843750, 0.551526284734375}, -{0.706158315421875, 0.128306033953125}, -{0.147265783328125, 0.057294542578125}, -{0.959039534828125, 0.919381743953125}, -{0.646013875390625, 0.398631653203125}, -{0.458517234312500, 0.944548392906250}, -{0.056758487734375, 0.295253452109375}, -{0.988836204046875, 0.303383539265625}, -{0.254309364156250, 0.711985215093750}, -{0.775799306890625, 0.567053766171875}, -{0.340921091031250, 0.224871303765625}, -{0.620506746078125, 0.821560873750000}, -{0.160012464359375, 0.690720392781250}, -{0.788430456281250, 0.036399376265625}, -{0.220355124109375, 0.963720045187500}, -{0.287648582281250, 0.493282790234375}, -{0.536503468843750, 0.724657913296875}, -{0.552148254953125, 0.246577206343750}, -{0.116843560203125, 0.107753416265625}, -{0.842374240812500, 0.843123040078125}, -{0.696912145562500, 0.310820697562500}, -{0.330881907437500, 0.958779769828125}, -{0.169338615078125, 0.306386117906250}, -{0.927058600328125, 0.355660703968750}, -{0.477915496515625, 0.630784646437500}, -{0.947107614062500, 0.571599844062500}, -{0.411298019234375, 0.231101317375000}, -{0.634196201015625, 0.774126137000000}, -{0.133506990359375, 0.617482936484375}, -{0.911148929828125, 0.091845178421875}, -{0.229027962734375, 0.850901043359375}, -{0.433436791328125, 0.431280808593750}, -{0.570900611203125, 0.618903012437500}, -{0.691994851500000, 0.099783284062500}, -{0.212500058671875, 0.147009234203125}, -{0.951732996484375, 0.784567680765625}, -{0.538479240078125, 0.442006235093750}, -{0.495691442609375, 0.839534956375000}, -{0.092295940359375, 0.445600341546875}, -{0.806758487734375, 0.295253452109375}, -{0.374367956281250, 0.505149376265625}, -{0.822584307093750, 0.538276272453125}, -{0.381143881328125, 0.075372076343750}, -{0.746648411546875, 0.886075859718750}, -{0.225690524703125, 0.572657414093750}, -{0.908135803781250, 0.224055233687500}, -{0.100557411375000, 0.856953939312500}, -{0.326749240812500, 0.257185540078125}, -{0.703382877203125, 0.594522854968750}, -{0.635381359671875, 0.247395865796875}, -{0.091503945812500, 0.170573635640625}, -{0.773093560203125, 0.773280760015625}, -{0.699864601359375, 0.424703221531250}, -{0.337978249937500, 0.822576861265625}, -{0.177278602781250, 0.415547689312500}, -{0.950716102781250, 0.382344564312500}, -{0.345102996515625, 0.701097146437500}, -{0.850941098000000, 0.563378403515625}, -{0.447833622000000, 0.045251544750000}, -{0.584417841375000, 0.933427886640625}, -{0.117581880000000, 0.710837083484375}, -{0.864601654468750, 0.093219298265625}, -{0.115674527437500, 0.963752289609375}, -{0.438901999203125, 0.285253217765625}, -{0.641702341031250, 0.693621303765625}, -{0.662939186515625, 0.064765218421875}, -{0.102763510390625, 0.047682223171875}, -{0.821904890625000, 0.958637616328125}, -{0.617697313656250, 0.380208463203125}, -{0.387554934109375, 0.806559289718750}, -{0.096212938656250, 0.263020963203125}, -{0.867422654015625, 0.379107524531250}, -{0.383386910937500, 0.555990102843750}, -{0.880607996843750, 0.519919121093750}, -{0.252252211921875, 0.162698353468750}, -{0.642183234343750, 0.969837245046875}, -{0.196583993796875, 0.685156627843750}, -{0.963190877593750, 0.199420555781250}, -{0.199633261734375, 0.888224135796875}, -{0.350685461906250, 0.434846186453125}, -{0.614399749906250, 0.627404791062500}, -{0.608747165328125, 0.225123234406250}, -{0.244958663437500, 0.019149132359375}, -{0.880021551015625, 0.966470884812500}, -{0.686627065421875, 0.259165408953125}, -{0.463246963312500, 0.880304660312500}, -{0.209424527437500, 0.448127289609375}, -{0.850148582281250, 0.305782790234375}, -{0.321802423796875, 0.647870625703125}, -{0.853921568625000, 0.709558823531250}, -{0.496648411546875, 0.136075859718750}, -{0.534266910125000, 0.759900787234375}, -{0.018208405500000, 0.610922261187500}, -{0.981815968843750, 0.044970413296875}, -{0.147265783328125, 0.807294542578125}, -{0.278764594718750, 0.293912076453125}, -{0.697168203437500, 0.508447093109375}, -{0.569936479140625, 0.090431613031250}, -{0.199633261734375, 0.231974135796875}, -{0.878530229140625, 0.840431613031250}, -{0.530111691484375, 0.313423081343750}, -{0.287503755000000, 0.877340989734375}, -{0.020926812156250, 0.490443845953125}, -{0.925118529859375, 0.463551966546875}, -{0.460281853234375, 0.514086682671875}, -{0.946583993796875, 0.685156627843750}, -{0.432953992203125, 0.011242603312500}, -{0.681996963312500, 0.841242160312500}, -{0.035440756328125, 0.741967535328125}, -{0.826814113265625, 0.129551982203125}, -{0.102763510390625, 0.797682223171875}, -{0.257250986453125, 0.431315283906250}, -{0.601735045687500, 0.535855464812500}, -{0.523947313656250, 0.161458463203125}, -{0.089514399500000, 0.200051700359375}, -{0.998871711468750, 0.969931745984375}, -{0.725557411375000, 0.325703939312500}, -{0.411054041609375, 0.940728892687500}, -{0.092214949156250, 0.395649388656250}, -{0.947571808109375, 0.318240323640625}, -{0.385150528859375, 0.598791693968750}, -{0.760340045031250, 0.666060247875000}, -{0.385468447859375, 0.173477959078125}, -{0.619715387546875, 0.985882883562500}, -{0.110693313734375, 0.604613051578125}, -{0.759083993796875, 0.247656627843750}, -{0.198151308765625, 0.803362716656250}, -{0.368295322046875, 0.475490672062500}, -{0.522431987421875, 0.579676484406250}, -{0.668614601359375, 0.002828221531250}, -{0.159061291453125, 0.115786836312500}, -{0.885343915375000, 0.797979216453125}, -{0.502049350968750, 0.290322923484375}, -{0.361960011265625, 0.916515061968750}, -{0.212257727531250, 0.374306940859375}, -{0.808006746078125, 0.446560873750000}, -{0.489786425109375, 0.696416160921875}, -{0.914775229140625, 0.625807223171875}, -{0.272394756234375, 0.082105300000000}, -{0.697833622000000, 0.795251544750000}, -{0.146614411140625, 0.654378257218750}, -{0.959039534828125, 0.169381743953125}, -{0.056931985968750, 0.888270723078125}, -{0.493765020015625, 0.415613958984375}, -{0.727875617718750, 0.674521589734375}, -{0.646232996843750, 0.172262871093750}, -{0.052267234312500, 0.085173392906250}, -{0.794171695281250, 0.897748994546875}, -{0.664407018781250, 0.458335411421875}, -{0.318273390046875, 0.755820190000000}, -{0.046247165328125, 0.350123234406250}, -{0.865799202015625, 0.333466330906250}, -{0.285498339406250, 0.583855022421875}, -{0.956147102093750, 0.505576277234375}, -{0.318949826718750, 0.141763441546875}, -{0.510429611953125, 0.887119489765625}, -{0.184919978109375, 0.561328326953125}, -{0.759417624109375, 0.096532545187500}, -{0.130403602781250, 0.937032064312500}, -{0.394595719296875, 0.367380713734375}, -{0.567731703031250, 0.747610961484375}, -{0.538716806515625, 0.039836274843750} -}; \ No newline at end of file +} \ No newline at end of file diff --git a/22_RaytracedAO/Renderer.h b/22_RaytracedAO/Renderer.h index 5c8e45738..81e38ac7f 100644 --- a/22_RaytracedAO/Renderer.h +++ b/22_RaytracedAO/Renderer.h @@ -24,20 +24,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac public: #include "rasterizationCommon.h" #include "raytraceCommon.h" - #ifdef __cplusplus - #undef uint - #undef vec4 - #undef mat4 - #undef mat4x3 - #endif - struct DenoiserArgs - { - std::filesystem::path bloomFilePath; - float bloomScale = 0.0f; - float bloomIntensity = 0.0f; - std::string tonemapperArgs = ""; - }; Renderer(nbl::video::IVideoDriver* _driver, nbl::asset::IAssetManager* _assetManager, nbl::scene::ISceneManager* _smgr, bool deferDenoise, bool useDenoiser = true); @@ -59,8 +46,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac const nbl::core::vector& clipPlanes={} ); - void deinitScreenSizedResources(); - void resetSampleAndFrameCounters(); void takeAndSaveScreenShot(const std::filesystem::path& screenshotFilePath, bool denoise, const DenoiserArgs& denoiserArgs = {}); @@ -173,7 +158,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_perCameraRasterDSLayout; nbl::core::smart_refctd_ptr m_rasterInstanceDataDSLayout,m_additionalGlobalDSLayout,m_commonRaytracingDSLayout; nbl::core::smart_refctd_ptr m_raygenDSLayout,m_closestHitDSLayout,m_resolveDSLayout; - nbl::core::smart_refctd_ptr m_visibilityBufferFillPipeline; nbl::core::smart_refctd_ptr m_cullPipelineLayout; nbl::core::smart_refctd_ptr m_raygenPipelineLayout; @@ -208,14 +192,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac private: nbl::core::smart_refctd_ptr bufferView; } sampleSequence; - uint16_t maxPathDepth; - uint16_t noRussianRouletteDepth : 15; - uint16_t hideEnvironment : 1; - uint32_t maxSensorSamples; - - // scene specific data - nbl::core::vector<::RadeonRays::Shape*> rrShapes; - nbl::core::vector<::RadeonRays::Shape*> rrInstances; nbl::core::matrix3x4SIMD m_prevView; nbl::core::matrix4x3 m_prevCamTform; @@ -245,20 +221,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_commonRaytracingDS[2]; nbl::core::smart_refctd_ptr m_rasterInstanceDataDS,m_raygenDS,m_resolveDS; nbl::core::smart_refctd_ptr m_closestHitDS[2]; - uint32_t m_raygenWorkGroups[2]; - struct InteropBuffer - { - nbl::core::smart_refctd_ptr buffer; - std::pair<::RadeonRays::Buffer*, cl_mem> asRRBuffer = { nullptr,0u }; - }; - InteropBuffer m_rayBuffer[2]; - InteropBuffer m_intersectionBuffer[2]; - nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; - nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; - nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; - nbl::core::smart_refctd_ptr m_maskAcc; - nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer; + nbl::video::IFrameBuffer* m_colorBuffer; // Resources used for envmap sampling nbl::core::smart_refctd_ptr m_finalEnvmap; diff --git a/22_RaytracedAO/SimpleJson.cpp b/22_RaytracedAO/SimpleJson.cpp deleted file mode 100644 index d478991df..000000000 --- a/22_RaytracedAO/SimpleJson.cpp +++ /dev/null @@ -1,150 +0,0 @@ -#include "SimpleJson.h" - -using namespace simplejson; - -// Hackery to emit JSON without using nlohmann/json C++ library (which requires a -// higher level of compiler compliance than is required by SPIRV-Cross -void Stream::begin_json_array() -{ - if (!stack.empty() && stack.top().second) - { - statement_inner(",\n"); - } - statement("["); - ++indent; - stack.emplace(Type::Array, false); -} - -void Stream::end_json_array() -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - { - statement_inner("\n"); - } - --indent; - statement_no_return("]"); - stack.pop(); - if (!stack.empty()) - { - stack.top().second = true; - } -} - -void Stream::emit_json_array_value(const std::string& value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - - if (stack.top().second) - statement_inner(",\n"); - - statement_no_return("\"", value, "\""); - stack.top().second = true; -} - -void Stream::emit_json_array_value(uint32_t value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - statement_inner(",\n"); - statement_no_return(std::to_string(value)); - stack.top().second = true; -} - -void Stream::emit_json_array_value(bool value) -{ - if (stack.empty() || stack.top().first != Type::Array) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - statement_inner(",\n"); - statement_no_return(value ? "true" : "false"); - stack.top().second = true; -} - -void Stream::begin_json_object() -{ - if (!stack.empty() && stack.top().second) - { - statement_inner(",\n"); - } - statement("{"); - ++indent; - stack.emplace(Type::Object, false); -} - -void Stream::end_json_object() -{ - if (stack.empty() || stack.top().first != Type::Object) - std::cerr << "Invalid JSON state"; - if (stack.top().second) - { - statement_inner("\n"); - } - --indent; - statement_no_return("}"); - stack.pop(); - if (!stack.empty()) - { - stack.top().second = true; - } -} - -void Stream::emit_json_key(const std::string& key) -{ - if (stack.empty() || stack.top().first != Type::Object) - std::cerr << "Invalid JSON state"; - - if (stack.top().second) - statement_inner(",\n"); - statement_no_return("\"", key, "\" : "); - stack.top().second = true; -} - -void Stream::emit_json_key_value(const std::string& key, const std::string& value) -{ - emit_json_key(key); - statement_inner("\"", value, "\""); -} - -void Stream::emit_json_key_value(const std::string& key, uint32_t value) -{ - emit_json_key(key); - statement_inner(value); -} - -void Stream::emit_json_key_value(const std::string& key, int32_t value) -{ - emit_json_key(key); - statement_inner(value); -} - -void Stream::emit_json_key_value(const std::string& key, float value) -{ - emit_json_key(key); - statement_inner(to_string(value)); -} - -void Stream::emit_json_key_value(const std::string& key, bool value) -{ - emit_json_key(key); - statement_inner(value ? "true" : "false"); -} - -void Stream::emit_json_key_object(const std::string& key) -{ - emit_json_key(key); - statement_inner("{\n"); - ++indent; - stack.emplace(Type::Object, false); -} - -void Stream::emit_json_key_array(const std::string& key) -{ - emit_json_key(key); - statement_inner("[\n"); - ++indent; - stack.emplace(Type::Array, false); -} \ No newline at end of file diff --git a/22_RaytracedAO/SimpleJson.h b/22_RaytracedAO/SimpleJson.h deleted file mode 100644 index b85e5930c..000000000 --- a/22_RaytracedAO/SimpleJson.h +++ /dev/null @@ -1,78 +0,0 @@ -using namespace std; - -namespace simplejson -{ - enum class Type - { - Object, - Array, - }; - - using State = std::pair; - using Stack = std::stack; - - class Stream - { - Stack stack; - stringstream buffer; - uint32_t indent{ 0 }; - - public: - void begin_json_object(); - void end_json_object(); - void emit_json_key(const std::string& key); - void emit_json_key_value(const std::string& key, const std::string& value); - void emit_json_key_value(const std::string& key, bool value); - void emit_json_key_value(const std::string& key, uint32_t value); - void emit_json_key_value(const std::string& key, int32_t value); - void emit_json_key_value(const std::string& key, float value); - void emit_json_key_object(const std::string& key); - void emit_json_key_array(const std::string& key); - - void begin_json_array(); - void end_json_array(); - void emit_json_array_value(const std::string& value); - void emit_json_array_value(uint32_t value); - void emit_json_array_value(bool value); - - std::string str() const - { - return buffer.str(); - } - - private: - inline void statement_indent() - { - for (uint32_t i = 0; i < indent; i++) - buffer << " "; - } - - template - inline void statement_inner(T&& t) - { - buffer << std::forward(t); - } - - template - inline void statement_inner(T&& t, Ts &&... ts) - { - buffer << std::forward(t); - statement_inner(std::forward(ts)...); - } - - template - inline void statement(Ts &&... ts) - { - statement_indent(); - statement_inner(std::forward(ts)...); - buffer << '\n'; - } - - template - void statement_no_return(Ts &&... ts) - { - statement_indent(); - statement_inner(std::forward(ts)...); - } - }; -} // namespace simplejson diff --git a/22_RaytracedAO/common.h b/22_RaytracedAO/common.h index 6e82bcb11..da7528213 100644 --- a/22_RaytracedAO/common.h +++ b/22_RaytracedAO/common.h @@ -2,42 +2,6 @@ #define _COMMON_INCLUDED_ -#define RAYCOUNT_N_BUFFERING_LOG2 2 -#define RAYCOUNT_N_BUFFERING (0x1< - -#include -layout(set=1, binding=0, row_major) writeonly restrict buffer PerInstancePerCamera -{ - DrawData_t data[]; -} instanceDataPerCamera; -layout(set=1, binding=1, std430, row_major) restrict readonly buffer PerInstanceCull -{ - CullData_t cullData[]; -}; -layout(set=1, binding=2, std430) restrict coherent buffer IndirectDraws -{ - nbl_glsl_DrawElementsIndirectCommand_t draws[]; -} commandBuff[2]; - - - -layout(push_constant, row_major) uniform PushConstants -{ - CullShaderData_t data; -} pc; - - - -#include -#include - - -// base instance remains unchanged -// we just do atomic add on the instance count -void main() -{ - for (uint drawCommandGUID=gl_GlobalInvocationID.x; drawCommandGUID=pc.data.maxGlobalInstanceCount) - return; - - // fetch instance data - const CullData_t batchInstanceData = cullData[batchInstanceID]; - const uint batchInstanceGUID = batchInstanceData.batchInstanceGUID; - - const nbl_glsl_ext_Mitsuba_Loader_instance_data_t instanceData = InstData.data[batchInstanceGUID]; - const mat4x3 worldMatrix = instanceData.tform; - const mat4 MVP = nbl_glsl_pseudoMul4x4with4x3(pc.data.viewProjMatrix,worldMatrix); - - // cull - bool notCulled = true; - if (false) - { - const mat2x3 bbox = mat2x3(batchInstanceData.aabbMinEdge,batchInstanceData.aabbMaxEdge); - notCulled = nbl_glsl_couldBeVisible(MVP,bbox); - } - - // set up MDI - if (notCulled) - { - const uint drawCommandGUID = batchInstanceData.drawCommandGUID; - const uint drawInstanceID = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].baseInstance+ - atomicAdd(commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].instanceCount,1u); - - instanceDataPerCamera.data[drawInstanceID].MVP = MVP; - // use the MSB to denote if face orientation should be flipped - instanceDataPerCamera.data[drawInstanceID].backfacingBit_batchInstanceGUID = batchInstanceGUID|((instanceData.determinantSignBit^floatBitsToUint(pc.data.viewProjDeterminant))&0x80000000u); - instanceDataPerCamera.data[drawInstanceID].firstIndex = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].firstIndex; - } -} \ No newline at end of file diff --git a/22_RaytracedAO/extractCubemap.bat b/22_RaytracedAO/extractCubemap.bat deleted file mode 100644 index b3ad104c4..000000000 --- a/22_RaytracedAO/extractCubemap.bat +++ /dev/null @@ -1,40 +0,0 @@ -REM @echo off - -REM examplary usage: -REM mergeCubemap.bat 64 64 mergedImage.png stripeFormat.png - -set cropOffsetX0=%1 -set cropOffsetY0=%2 - -set in=%3 -set out=%4 - -REM set extracted image size -for /f "tokens=*" %%s in ('magick identify -format "%%w" %in%') do set sz=%%s -set /a paddedSize = sz/3 - -set /a realSize = paddedSize-2*cropOffsetX0 - -set /a cropOffsetX1 = cropOffsetX0+paddedSize -set /a cropOffsetX2 = cropOffsetX0+paddedSize*2 -set /a cropOffsetX3 = cropOffsetX0+paddedSize*3 -set /a cropOffsetX4 = cropOffsetX0+paddedSize*4 -set /a cropOffsetX5 = cropOffsetX0+paddedSize*5 -set /a cropOffsetY1 = paddedSize+64 - -set /a x0 = 0 -set /a x1 = realSize -set /a x2 = 2*realSize -set /a x3 = 3*realSize -set /a x4 = 5*realSize -set /a x5 = 4*realSize - -set /a stripWidth = realSize*6 -magick convert -size %stripWidth%x%realSize% canvas:none ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX0%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x0%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX2%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x1%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX1%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x2%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX2%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x3%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX0%+%cropOffsetY0% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x4%+0 ) -composite ^ -( %in% -crop %realSize%x%realSize%+%cropOffsetX1%+%cropOffsetY1% -matte -virtual-pixel transparent -geometry %realSize%x%realSize%+%x5%+0 ) -composite ^ -%out% \ No newline at end of file diff --git a/22_RaytracedAO/fillVisBuffer.frag b/22_RaytracedAO/fillVisBuffer.frag deleted file mode 100644 index 88a18455a..000000000 --- a/22_RaytracedAO/fillVisBuffer.frag +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#version 460 core -#extension GL_EXT_shader_16bit_storage : require -#include - - -#define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0 -#include "virtualGeometry.glsl" - -#include -layout(location = 2) flat in uint BackfacingBit_BatchInstanceGUID; -layout(location = 3) flat in uint drawCmdFirstIndex; - -uint nbl_glsl_barycentric_frag_getDrawID() {return BackfacingBit_BatchInstanceGUID&0x7fffffffu;} -vec3 nbl_glsl_barycentric_frag_getVertexPos(in uint batchInstanceGUID, in uint primID, in uint primsVx) -{ - const uint ix = nbl_glsl_VG_fetchTriangleVertexIndex(primID*3u+drawCmdFirstIndex,primsVx); - return nbl_glsl_fetchVtxPos(ix,InstData.data[batchInstanceGUID]); -} - - -layout(location = 0) out uvec4 frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2; // should it be called backfacing or frontfacing? - - -void main() -{ - vec2 bary = nbl_glsl_barycentric_frag_get(); - - const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[1] = packUnorm2x16(bary); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[2] = packHalf2x16(dFdx(bary)); - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[3] = packHalf2x16(dFdy(bary)); -} diff --git a/22_RaytracedAO/main.cpp b/22_RaytracedAO/main.cpp index 855a6ac63..45db9f715 100644 --- a/22_RaytracedAO/main.cpp +++ b/22_RaytracedAO/main.cpp @@ -17,7 +17,6 @@ #include "CSceneNodeAnimatorCameraModifiedMaya.h" #include "Renderer.h" -#include "SimpleJson.h" using namespace nbl; using namespace core; @@ -131,138 +130,6 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver bool overloadCameraKeyPressed; }; -struct PersistentState -{ - bool isBeauty; - bool isInteractiveMode; - bool isInteractiveViewMatrixLH; - bool isDenoiseDeferred; - uint32_t startSensorID; - std::string zipPath; - std::string xmlPath; - ProcessSensorsBehaviour processSensorsBehaviour; - // It is important to initialize it to all 0s because we use the condition of determinant 0 as an invalid condition for the view matrix. - core::matrix3x4SIMD interactiveCameraViewMatrix = core::matrix3x4SIMD(core::vectorSIMDf(), core::vectorSIMDf(), core::vectorSIMDf()); - - bool readFromDisk() - { - bool readSuccess = false; - std::ifstream readFile("lastRun.cache", std::ios::in | std::ios::binary | std::ios::ate); - if (readFile.is_open()) - { - auto readSize = readFile.tellg(); - if (readSize != std::istream::pos_type(-1)) - { - std::unique_ptr readBuffer = std::make_unique(readSize); - readFile.seekg(0, std::ios::beg); - readFile.read(reinterpret_cast(readBuffer.get()), readSize); - if (readFile.rdstate() == std::ios_base::goodbit) - { - uint64_t offset = 0; - - memcpy(&isBeauty, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&isInteractiveMode, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&isInteractiveViewMatrixLH, readBuffer.get() + offset, sizeof(bool)); - offset += sizeof(bool); - - memcpy(&startSensorID, readBuffer.get() + offset, sizeof(uint32_t)); - offset += sizeof(uint32_t); - - memcpy(&processSensorsBehaviour, readBuffer.get() + offset, sizeof(ProcessSensorsBehaviour)); - offset += sizeof(ProcessSensorsBehaviour); - - memcpy(&interactiveCameraViewMatrix, readBuffer.get() + offset, sizeof(core::matrix3x4SIMD)); - offset += sizeof(core::matrix3x4SIMD); - - const char* path = reinterpret_cast(readBuffer.get() + offset); - zipPath = std::string(path); - offset += zipPath.length() + 1; - - path = reinterpret_cast(readBuffer.get() + offset); - xmlPath = std::string(path); - offset += xmlPath.length() + 1; - - readSuccess = (offset == static_cast(readSize)); - } - } - - readFile.close(); - } - - return readSuccess; - } - - bool writeToDisk() const - { - bool writeSuccess = false; - std::ofstream outFile("lastRun.cache", std::ios::out | std::ios::binary); - if (outFile.is_open()) - { - const size_t writeSize = getSerializedMemorySize(); - - std::unique_ptr writeBuffer = std::make_unique(writeSize); - - uint64_t offset = 0; - - memcpy(writeBuffer.get() + offset, &isBeauty, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &isInteractiveMode, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &isInteractiveViewMatrixLH, sizeof(bool)); - offset += sizeof(bool); - - memcpy(writeBuffer.get() + offset, &startSensorID, sizeof(uint32_t)); - offset += sizeof(uint32_t); - - memcpy(writeBuffer.get() + offset, &processSensorsBehaviour, sizeof(ProcessSensorsBehaviour)); - offset += sizeof(ProcessSensorsBehaviour); - - memcpy(writeBuffer.get() + offset, &interactiveCameraViewMatrix, sizeof(core::matrix3x4SIMD)); - offset += sizeof(core::matrix3x4SIMD); - - memcpy(writeBuffer.get() + offset, zipPath.c_str(), zipPath.length() + 1); - offset += zipPath.length() + 1; - - memcpy(writeBuffer.get() + offset, xmlPath.c_str(), xmlPath.length() + 1); - offset += xmlPath.length() + 1; - - assert(offset == static_cast(writeSize)); - - outFile.write(reinterpret_cast(writeBuffer.get()), writeSize); - if (outFile.rdstate() == std::ios_base::goodbit) - writeSuccess = true; - - outFile.close(); - } - - if (!writeSuccess) - printf("[ERROR]: Failed to write the persistent state cache.\n"); - - return writeSuccess; - } - -private: - inline size_t getSerializedMemorySize() const - { - const size_t result = - sizeof(bool) + // isBeauty - sizeof(bool) + // isInteractiveMode - sizeof(bool) + // isInteractiveViewMatrixLH - sizeof(uint32_t) + // startSensorID - sizeof(ProcessSensorsBehaviour) + // processSensorsBehaviour - sizeof(core::matrix3x4SIMD) + // interactiveCameraViewMatrix - (zipPath.length() + 1) + - (xmlPath.length() + 1) ; - - return result; - } -}; int main(int argc, char** argv) { @@ -272,16 +139,7 @@ int main(int argc, char** argv) for (auto i = 1ul; i < argc; ++i) arguments.emplace_back(argv[i]); } - std::cout << std::endl; - std::cout << "-- Build URL:" << std::endl; - std::cout << NBL_BUILD_URL << std::endl; - std::cout << std::endl; - std::cout << "-- Build log:" << std::endl; - std::cout << NBL_GIT_LOG << std::endl; - std::cout << std::endl; - - bool applicationIsReloaded = false; - PersistentState applicationState; + { CommandLineHandler cmdHandler = CommandLineHandler(arguments); @@ -291,8 +149,6 @@ int main(int argc, char** argv) applicationState.isDenoiseDeferred = cmdHandler.getDeferredDenoiseFlag(); auto sceneDir = cmdHandler.getSceneDirectory(); - if ((sceneDir.size() == 1) && (sceneDir[0] == "")) // special condition for reloading the application - applicationIsReloaded = true; std::string filePath = (sceneDir.size() >= 1) ? sceneDir[0] : ""; // zip or xml std::string extraPath = (sceneDir.size() >= 2) ? sceneDir[1] : "";; // xml in zip @@ -313,36 +169,15 @@ int main(int argc, char** argv) } bool takeScreenShots = true; - std::string mainFileName; // std::filesystem::path(filePath).filename().string(); - - // create device with full flexibility over creation parameters - // you can add more parameters if desired, check nbl::SIrrlichtCreationParameters - nbl::SIrrlichtCreationParameters params; - params.Bits = 24; //may have to set to 32bit for some platforms - params.ZBufferBits = 24; - params.DriverType = video::EDT_OPENGL; - params.Fullscreen = false; - params.Vsync = false; - params.Doublebuffer = true; - params.Stencilbuffer = false; //! This will not even be a choice soon - params.WindowSize = dimension2d(1920, 1080); - auto device = createDeviceEx(params); - if (!device) - return 1; // could not create selected driver. + +// DEVICE CREATION EMITTED // asset::SAssetBundle meshes = {}; core::smart_refctd_ptr globalMeta; { - io::IFileSystem* fs = device->getFileSystem(); - asset::IAssetManager* am = device->getAssetManager(); - auto serializedLoader = core::make_smart_refctd_ptr(am); - auto mitsubaLoader = core::make_smart_refctd_ptr(am, fs); - serializedLoader->initialize(); - mitsubaLoader->initialize(); - am->addAssetLoader(std::move(serializedLoader)); - am->addAssetLoader(std::move(mitsubaLoader)); +// LOADER ADDITION EMITTED if (applicationState.zipPath.empty() && applicationState.xmlPath.empty() && !applicationIsReloaded) { @@ -360,19 +195,9 @@ int main(int argc, char** argv) auto loadScene = [&device, &am, &fs](const std::string& _zipPath, std::string& _xmlPath, std::string& _mainFileName) -> asset::SAssetBundle { asset::SAssetBundle result = {}; - if (_zipPath.empty() && _xmlPath.empty()) - return result; - _mainFileName = ""; - if (!_zipPath.empty()) - { - _mainFileName = std::filesystem::path(_zipPath).filename().string(); - _mainFileName = _mainFileName.substr(0u, _mainFileName.find_first_of('.')); +// ADD ARCHIVE AND VALIDATION EMITTED - io::IFileArchive* arch = nullptr; - device->getFileSystem()->addFileArchive(_zipPath.c_str(), io::EFAT_ZIP, "", &arch); - if (!arch) - return result; auto flist = arch->getFileList(); if (!flist) @@ -449,12 +274,7 @@ int main(int argc, char** argv) } _mainFileName += std::string("_") + std::filesystem::path(_xmlPath.c_str()).filename().replace_extension().string(); - } - else if (!_xmlPath.empty()) - { - _mainFileName = std::filesystem::path(_xmlPath).filename().string(); - _mainFileName = _mainFileName.substr(0u, _mainFileName.find_first_of('.')); - } + printf("[INFO]: Loading XML file: %s\n", _xmlPath.c_str()); @@ -471,92 +291,15 @@ int main(int argc, char** argv) }; meshes = loadScene(applicationState.zipPath, applicationState.xmlPath, mainFileName); - if (meshes.getContents().empty() || applicationIsReloaded) - { - if (meshes.getContents().empty() && !applicationState.xmlPath.empty()) - printf("[ERROR]: Failed to load asset at: %s\n", applicationState.xmlPath.c_str()); - - // Restore state to get new values for zipPath and xmlPath and try loading again - printf("[INFO]: Trying to restore the application to its previous state.\n"); - - bool restoreSuccess = false; - if (applicationState.readFromDisk()) - { - meshes = loadScene(applicationState.zipPath, applicationState.xmlPath, mainFileName); - if (!meshes.getContents().empty()) - restoreSuccess = true; - } - - if (!restoreSuccess) - { - pfd::message("ERROR", "Cannot restore application to its previous state.", pfd::choice::ok); - return 2; - } - } - - globalMeta = core::smart_refctd_ptr(meshes.getMetadata()->selfCast()); - if (!globalMeta) - { - std::cout << "[ERROR] Couldn't get global Meta"; - return 3; - } - - std::cout << "Total number of Sensors = " << globalMeta->m_global.m_sensors.size() << std::endl; - if (globalMeta->m_global.m_sensors.empty()) - { - std::cout << "[ERROR] No Sensors found." << std::endl; - assert(false); - return 5; // return code? - } +// APPLICATION RESTORE OMITTED - if (applicationState.startSensorID >= globalMeta->m_global.m_sensors.size()) - { - applicationState.startSensorID = 0; - printf("[WARNING]: A valid sensor ID was not found. Selecting the sensor: %u\n", applicationState.startSensorID); - } - - // empty out the cache from individual images and meshes taht are not used by the scene - am->clearAllAssetCache(); } - constexpr float DefaultRotateSpeed = 300.0f; - constexpr float DefaultZoomSpeed = 1.0f; - constexpr float DefaultMoveSpeed = 100.0f; - constexpr float DefaultSceneDiagonal = 50.0f; // reference for default zoom and move speed; struct SensorData { - int32_t width = 0u; - int32_t height = 0u; - int32_t cropWidth = 0u; - int32_t cropHeight = 0u; - int32_t cropOffsetX = 0u; - int32_t cropOffsetY = 0u; - bool rightHandedCamera = true; - uint32_t samplesNeeded = 0u; - float moveSpeed = core::nan(); - float stepZoomSpeed = core::nan(); - float rotateSpeed = core::nan(); - scene::ICameraSceneNode * staticCamera; - scene::ICameraSceneNode * interactiveCamera; - std::filesystem::path outputFilePath; - ext::MitsubaLoader::CElementSensor::Type type; - ext::MitsubaLoader::CElementFilm::FileFormat fileFormat; - Renderer::DenoiserArgs denoiserInfo = {}; - int32_t cascadeCount = 1; - float cascadeLuminanceBase = core::nan(); - float cascadeLuminanceStart = core::nan(); - float kappa = 0.f; - float Emin = 0.05f; - bool envmap = false; - float envmapRegFactor = 0.0f; - core::vector clipPlanes; - - scene::CSceneNodeAnimatorCameraModifiedMaya* getInteractiveCameraAnimator() - { - return reinterpret_cast(interactiveCamera->getAnimators()[0]); - } +// ... void resetInteractiveCamera() { @@ -574,242 +317,22 @@ int main(int argc, char** argv) modifiedMayaAnim->setZoomAndRotationBasedOnTargetAndPosition(cameraPos, cameraTarget); } }; - - struct CubemapRender - { - uint32_t sensorIdx = 0u; - uint32_t getSensorsBeginIdx() const { return sensorIdx; } - uint32_t getSensorsEndIdx() const { return sensorIdx + 5; } - }; - - auto smgr = device->getSceneManager(); - - // When outputFilePath isn't set in Film Element in Mitsuba, use this to find the extension string. - auto getFileExtensionFromFormat= [](ext::MitsubaLoader::CElementFilm::FileFormat format) -> std::string - { - std::string ret = ""; - using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; - switch (format) - { - case FileFormat::PNG: - ret = ".png"; - break; - case FileFormat::OPENEXR: - ret = ".exr"; - break; - case FileFormat::JPEG: - ret = ".jpg"; - break; - default: // TODO? - break; - } - return ret; - }; - auto isFileExtensionCompatibleWithFormat = [](std::string extension, ext::MitsubaLoader::CElementFilm::FileFormat format) -> bool - { - if(extension.empty()) - return false; - - if(extension[0] == '.') - extension = extension.substr(1, extension.size()); - // TODO: get the supported extensions from loaders(?) - using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; - switch (format) - { - case FileFormat::PNG: - return extension == "png"; - case FileFormat::OPENEXR: - return extension == "exr"; - case FileFormat::JPEG: - return extension == "jpg" || extension == "jpeg" || extension == "jpe" || extension == "jif" || extension == "jfif" || extension == "jfi"; - default: - return false; - } - }; - - const bool shouldHaveSensorIdxInFileName = globalMeta->m_global.m_sensors.size() > 1; - std::vector sensors; - std::vector cubemapRenders; +// ... auto extractAndAddToSensorData = [&](const ext::MitsubaLoader::CElementSensor& sensor, uint32_t idx) -> bool { SensorData mainSensorData = {}; - const auto& film = sensor.film; - mainSensorData.denoiserInfo.bloomFilePath = std::filesystem::path(film.denoiserBloomFilePath); - mainSensorData.denoiserInfo.bloomScale = film.denoiserBloomScale; - mainSensorData.denoiserInfo.bloomIntensity = film.denoiserBloomIntensity; - mainSensorData.denoiserInfo.tonemapperArgs = std::string(film.denoiserTonemapperArgs); - mainSensorData.fileFormat = film.fileFormat; - mainSensorData.cascadeCount = film.cascadeCount; - mainSensorData.cascadeLuminanceBase = film.cascadeLuminanceBase; - mainSensorData.cascadeLuminanceStart = film.cascadeLuminanceStart; - mainSensorData.kappa = mainSensorData.cascadeCount<2 ? 0.f:film.rfilter.kappa; - mainSensorData.Emin = film.rfilter.Emin; - mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.0f, 0.8f); - mainSensorData.outputFilePath = std::filesystem::path(film.outputFilePath); - // handle missing output path - if (mainSensorData.outputFilePath.empty()) - { - auto extensionStr = getFileExtensionFromFormat(mainSensorData.fileFormat); - if(shouldHaveSensorIdxInFileName) - mainSensorData.outputFilePath = std::filesystem::path("Render_" + mainFileName + "_Sensor_" + std::to_string(idx) + extensionStr); - else - mainSensorData.outputFilePath = std::filesystem::path("Render_" + mainFileName + extensionStr); - } - if(!isFileExtensionCompatibleWithFormat(mainSensorData.outputFilePath.extension().string(), mainSensorData.fileFormat)) - std::cout << "[ERROR] film.outputFilePath's extension is not compatible with film.fileFormat" << std::endl; - mainSensorData.samplesNeeded = sensor.sampler.sampleCount; - std::cout << "\t SamplesPerPixelNeeded = " << mainSensorData.samplesNeeded << std::endl; +// ... - const ext::MitsubaLoader::CElementSensor::PerspectivePinhole* persp = nullptr; - const ext::MitsubaLoader::CElementSensor::Orthographic* ortho = nullptr; - const ext::MitsubaLoader::CElementSensor::CameraBase* cameraBase = nullptr; - switch (sensor.type) - { - case ext::MitsubaLoader::CElementSensor::Type::PERSPECTIVE: - persp = &sensor.perspective; - cameraBase = persp; - std::cout << "\t Type = PERSPECTIVE" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::THINLENS: - persp = &sensor.thinlens; - cameraBase = persp; - std::cout << "\t Type = THINLENS" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::ORTHOGRAPHIC: - ortho = &sensor.orthographic; - cameraBase = ortho; - std::cout << "\t Type = ORTHOGRAPHIC" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::TELECENTRIC: - ortho = &sensor.telecentric; - cameraBase = ortho; - std::cout << "\t Type = TELECENTRIC" << std::endl; - break; - case ext::MitsubaLoader::CElementSensor::Type::SPHERICAL: - cameraBase = &sensor.spherical; - std::cout << "\t Type = SPHERICAL" << std::endl; - break; - default: - std::cout << "\tSensor Type is not valid" << std::endl; - return false; - } - mainSensorData.type = sensor.type; - - for (auto i=0; iclipPlanes[i]; - if ((plane!=core::vectorSIMDf()).any()) - { - mainSensorData.clipPlanes.push_back(plane); - printf("Found Clip Plane %f,%f,%f,%f\n",plane[0],plane[1],plane[2],plane[3]); - } - } - - mainSensorData.rotateSpeed = cameraBase->rotateSpeed; - mainSensorData.stepZoomSpeed = cameraBase->zoomSpeed; - mainSensorData.moveSpeed = cameraBase->moveSpeed; - - if(core::isnan(mainSensorData.rotateSpeed)) - { - mainSensorData.rotateSpeed = DefaultRotateSpeed; - std::cout << "\t Camera Rotate Speed = " << mainSensorData.rotateSpeed << " = [Default Value]" << std::endl; - } - else - std::cout << "\t Camera Rotate Speed = " << mainSensorData.rotateSpeed << std::endl; - - if(core::isnan(mainSensorData.stepZoomSpeed)) - std::cout << "\t Camera Step Zoom Speed [Linear] = " << "[Value will be deduced from Scene Bounds] " << std::endl; - else - std::cout << "\t Camera Step Zoom Speed [Linear] = " << mainSensorData.stepZoomSpeed << std::endl; - - if(core::isnan(mainSensorData.moveSpeed)) - std::cout << "\t Camera Move Speed = " << "[Value will be deduced from Scene Bounds] " << std::endl; - else - std::cout << "\t Camera Move Speed = " << mainSensorData.moveSpeed << std::endl; - - float defaultZoomSpeedMultiplier = std::pow(DefaultSceneDiagonal, DefaultZoomSpeed / DefaultSceneDiagonal); - mainSensorData.interactiveCamera = smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true); - - nbl::core::vectorSIMDf mainCamPos; - nbl::core::vectorSIMDf mainCamUp; - nbl::core::vectorSIMDf mainCamView; - // need to extract individual components from matrix to camera - { - auto relativeTransform = sensor.transform.matrix.extractSub3x4(); - if (applicationState.isInteractiveMode && (idx == applicationState.startSensorID) && (core::abs(applicationState.interactiveCameraViewMatrix.getPseudoDeterminant().x) > 1e-6f)) - { - if (!applicationState.interactiveCameraViewMatrix.getInverse(relativeTransform)) - printf("[ERROR]: Previously saved interactive camera's view matrix is not invertible.\n"); - - if (applicationState.isInteractiveViewMatrixLH) - { - // invert signs in the first col only - relativeTransform.rows[0].x *= -1.f; - relativeTransform.rows[1].x *= -1.f; - relativeTransform.rows[2].x *= -1.f; - } - else - { - // invert signs both in the first and third cols - relativeTransform.rows[0].x *= -1.f; - relativeTransform.rows[1].x *= -1.f; - relativeTransform.rows[2].x *= -1.f; - - relativeTransform.rows[0].z *= -1.f; - relativeTransform.rows[1].z *= -1.f; - relativeTransform.rows[2].z *= -1.f; - } - } - - if (relativeTransform.getPseudoDeterminant().x < 0.f) - mainSensorData.rightHandedCamera = false; - else - mainSensorData.rightHandedCamera = true; - - std::cout << "\t IsRightHanded=" << ((mainSensorData.rightHandedCamera) ? "TRUE" : "FALSE") << std::endl; - mainCamPos = relativeTransform.getTranslation(); - - std::cout << "\t Camera Position = <" << mainCamPos.x << "," << mainCamPos.y << "," << mainCamPos.z << ">" << std::endl; - - auto tpose = core::transpose(core::matrix4SIMD(relativeTransform)); - mainCamUp = tpose.rows[1]; - mainCamView = tpose.rows[2]; - - std::cout << "\t Camera Reconstructed UpVector = <" << mainCamUp.x << "," << mainCamUp.y << "," << mainCamUp.z << ">" << std::endl; - std::cout << "\t Camera Reconstructed Forward = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; - } - - float realFoVDegrees; - auto width = film.cropWidth; - auto height = film.cropHeight; - - float aspectRatio = float(width) / float(height); - auto convertFromXFoV = [=](float fov) -> float - { - float aspectX = tan(core::radians(fov)*0.5f); - return core::degrees(atan(aspectX/aspectRatio)*2.f); - }; - - float nearClip = cameraBase->nearClip; - float farClip = cameraBase->farClip; - if(farClip > nearClip * 10'000.0f) - std::cout << "[WARN] Depth Range is too big: nearClip = " << nearClip << ", farClip = " << farClip << std::endl; if (mainSensorData.type == ext::MitsubaLoader::CElementSensor::Type::SPHERICAL) { - mainSensorData.width = film.width; - mainSensorData.height = film.height; - mainSensorData.cropWidth = film.cropWidth; - mainSensorData.cropHeight = film.cropHeight; - mainSensorData.cropOffsetX = film.cropOffsetX; - mainSensorData.cropOffsetY = film.cropOffsetY; - +#ifdef 0 // camera setup cubemap nbl::core::vectorSIMDf camViews[6] = { nbl::core::vectorSIMDf(-1, 0, 0, 0), // -X @@ -819,7 +342,7 @@ int main(int argc, char** argv) nbl::core::vectorSIMDf(0, 0, -1, 0), // -Z nbl::core::vectorSIMDf(0, 0, +1, 0), // +Z }; - + const nbl::core::vectorSIMDf upVectors[6] = { nbl::core::vectorSIMDf(0, +1, 0, 0), // +Y @@ -836,15 +359,6 @@ int main(int argc, char** argv) for(uint32_t i = 0; i < 6; ++i) { - SensorData cubemapFaceSensorData = mainSensorData; - cubemapFaceSensorData.envmap = true; - - if (mainSensorData.cropWidth != mainSensorData.cropHeight) - { - std::cout << "[ERROR] Cannot generate cubemap faces where film.cropWidth and film.cropHeight are not equal. (Aspect Ratio must be 1)" << std::endl; - assert(false); - } - // FIXME: suffix added after extension cubemapFaceSensorData.outputFilePath.replace_extension(); constexpr const char* suffixes[6] = @@ -858,184 +372,38 @@ int main(int argc, char** argv) }; cubemapFaceSensorData.outputFilePath += suffixes[i]; - cubemapFaceSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); - auto& staticCamera = cubemapFaceSensorData.staticCamera; - - const auto& camView = camViews[i]; - const auto& upVector = upVectors[i]; - - staticCamera->setPosition(mainCamPos.getAsVector3df()); - staticCamera->setTarget((mainCamPos + camView).getAsVector3df()); - staticCamera->setUpVector(upVector); + staticCamera->setTarget((mainCamPos + camViews[i]).getAsVector3df()); + staticCamera->setUpVector(upVectors[i]); const float w = float(cubemapFaceSensorData.width)/float(cubemapFaceSensorData.cropWidth); const float h = float(cubemapFaceSensorData.height)/float(cubemapFaceSensorData.cropHeight); - const auto fov = atanf(h)*2.f; - const auto aspectRatio = h/w; + const auto fov = 45 degree nondiag; + const auto aspectRatio = 1.f; if (mainSensorData.rightHandedCamera) staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(fov, aspectRatio, nearClip, farClip)); else staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(fov, aspectRatio, nearClip, farClip)); - - cubemapFaceSensorData.interactiveCamera = smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true); - cubemapFaceSensorData.resetInteractiveCamera(); - sensors.push_back(cubemapFaceSensorData); } +#endif } else { - mainSensorData.width = film.cropWidth; - mainSensorData.height = film.cropHeight; - - if(film.cropOffsetX != 0 || film.cropOffsetY != 0) - { - std::cout << "[WARN] CropOffsets are non-zero. cropping is not supported for non cubemap renders." << std::endl; - } - - mainSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); - auto& staticCamera = mainSensorData.staticCamera; - - staticCamera->setPosition(mainCamPos.getAsVector3df()); - - { - auto target = mainCamView+mainCamPos; - std::cout << "\t Camera Target = <" << target.x << "," << target.y << "," << target.z << ">" << std::endl; - staticCamera->setTarget(target.getAsVector3df()); - } - - { - auto declaredUp = cameraBase->up; - auto reconstructedRight = core::cross(declaredUp,mainCamView); - auto actualRight = core::cross(mainCamUp,mainCamView); - // special formulation avoiding multiple sqrt and inversesqrt to preserve precision - const float dp = core::dot(reconstructedRight,actualRight).x/core::sqrt((core::dot(reconstructedRight,reconstructedRight)*core::dot(actualRight,actualRight)).x); - const float pb = core::dot(declaredUp,mainCamView).x/core::sqrt((core::dot(declaredUp,declaredUp)*core::dot(mainCamView,mainCamView)).x); - std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; - if (dp>0.97f && dp<1.03f && abs(pb)<0.9996f) - staticCamera->setUpVector(declaredUp); - else - staticCamera->setUpVector(mainCamUp); - } - - // - if (ortho) - { - const auto scale = sensor.transform.matrix.extractSub3x4().getScale(); - const float volumeX = 2.f*scale.x; - const float volumeY = (2.f/aspectRatio)*scale.y; - if (mainSensorData.rightHandedCamera) - staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixOrthoRH(volumeX, volumeY, nearClip, farClip)); - else - staticCamera->setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixOrthoLH(volumeX, volumeY, nearClip, farClip)); - } - else if (persp) - { - switch (persp->fovAxis) - { - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::X: - realFoVDegrees = convertFromXFoV(persp->fov); - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::Y: - realFoVDegrees = persp->fov; - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::DIAGONAL: - { - float aspectDiag = tan(core::radians(persp->fov)*0.5f); - float aspectY = aspectDiag/core::sqrt(1.f+aspectRatio*aspectRatio); - realFoVDegrees = core::degrees(atan(aspectY)*2.f); - } - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::SMALLER: - if (width < height) - realFoVDegrees = convertFromXFoV(persp->fov); - else - realFoVDegrees = persp->fov; - break; - case ext::MitsubaLoader::CElementSensor::PerspectivePinhole::FOVAxis::LARGER: - if (width < height) - realFoVDegrees = persp->fov; - else - realFoVDegrees = convertFromXFoV(persp->fov); - break; - default: - realFoVDegrees = NAN; - assert(false); - break; - } - core::matrix4SIMD projMat; - projMat.setTranslation(core::vectorSIMDf(persp->shiftX,-persp->shiftY,0.f,1.f)); - if (mainSensorData.rightHandedCamera) - projMat = core::concatenateBFollowedByA(projMat,core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(realFoVDegrees), aspectRatio, nearClip, farClip)); - else - projMat = core::concatenateBFollowedByA(projMat,core::matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(realFoVDegrees), aspectRatio, nearClip, farClip)); - staticCamera->setProjectionMatrix(projMat); - } - else - { - assert(false); - } - - mainSensorData.resetInteractiveCamera(); - sensors.push_back(mainSensorData); +// camera setup non spherical } return true; }; - // Always add all the sensors because the interactive mode wants all the sensors. - for(uint32_t s = 0u; s < globalMeta->m_global.m_sensors.size(); ++s) - { - std::cout << "Sensors[" << s << "] = " << std::endl; - const auto& sensor = globalMeta->m_global.m_sensors[s]; - extractAndAddToSensorData(sensor, s); - } - auto driver = device->getVideoDriver(); core::smart_refctd_ptr renderer = core::make_smart_refctd_ptr(driver,device->getAssetManager(),smgr,applicationState.isDenoiseDeferred); renderer->initSceneResources(meshes,"LowDiscrepancySequenceCache.bin"); - // free memory - meshes = {}; - device->getAssetManager()->clearAllGPUObjects(); - - RaytracerExampleEventReceiver receiver; - device->setEventReceiver(&receiver); - // Deduce Move and Zoom Speeds if it is nan - auto sceneBoundsExtent = renderer->getSceneBound().getExtent(); - auto sceneDiagonal = sceneBoundsExtent.getLength(); +// free memory +meshes = {}; +device->getAssetManager()->clearAllGPUObjects(); - for(uint32_t s = 0u; s < sensors.size(); ++s) - { - auto& sensorData = sensors[s]; - - float linearStepZoomSpeed = sensorData.stepZoomSpeed; - if(core::isnan(sensorData.stepZoomSpeed)) - { - linearStepZoomSpeed = sceneDiagonal * (DefaultZoomSpeed / DefaultSceneDiagonal); - } - - // Set Zoom Multiplier - { - float logarithmicZoomSpeed = std::pow(sceneDiagonal, linearStepZoomSpeed / sceneDiagonal); - sensorData.stepZoomSpeed = logarithmicZoomSpeed; - sensorData.getInteractiveCameraAnimator()->setStepZoomMultiplier(logarithmicZoomSpeed); - printf("[INFO] Sensor[%d] Camera Step Zoom Speed deduced from scene bounds = %f [Linear], %f [Logarithmic] \n", s, linearStepZoomSpeed, logarithmicZoomSpeed); - } - - if(core::isnan(sensorData.moveSpeed)) - { - float newMoveSpeed = DefaultMoveSpeed * (sceneDiagonal / DefaultSceneDiagonal); - sensorData.moveSpeed = newMoveSpeed; - sensorData.getInteractiveCameraAnimator()->setMoveSpeed(newMoveSpeed); - printf("[INFO] Sensor[%d] Camera Move Speed deduced from scene bounds = %f\n", s, newMoveSpeed); - } - - assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getRotateSpeed())); - //assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getStepZoomSpeed())); - assert(!core::isnan(sensorData.getInteractiveCameraAnimator()->getMoveSpeed())); - } core::SRange nonInteractiveSensors = { nullptr, nullptr }; if (!applicationState.isInteractiveMode) @@ -1051,18 +419,7 @@ int main(int argc, char** argv) } assert(nonInteractiveSensors.size() <= sensors.size()); - auto reloadApplication = [argv]() - { - printf("[INFO]: Reloading..\n"); - - // Set up the special reload condition. - const char* cmdLineParams = "-SCENE="; - HINSTANCE result = ShellExecuteA(NULL, "open", argv[0], cmdLineParams, NULL, SW_SHOWNORMAL); - if ((uint64_t)result <= 32) - printf("[ERROR]: Failed to reload.\n"); - else - exit(0); - }; +// ... // Render To file int32_t prevWidth = 0; diff --git a/22_RaytracedAO/mergeCubemap.bat b/22_RaytracedAO/mergeCubemap.bat deleted file mode 100644 index eda734094..000000000 --- a/22_RaytracedAO/mergeCubemap.bat +++ /dev/null @@ -1,32 +0,0 @@ -@echo off - -REM the ordering of the cubemap faces is irrelevant as long as extractCubemap knows what has been merged together here -set first=%1 -set second=%2 -set third=%3 -set fourth=%4 -set fifth=%5 -set sixth=%6 -set output=%~dpn7 - -REM examplary usage: -REM mergeCubemap.bat first.png second.png third.png fourth.png fifth.png sixth.png outputImageName - -REM set image size -for /f "tokens=*" %%s in ('magick identify -format "%%w" %first%') do set sz=%%s - -REM set image fromat -for /f "tokens=*" %%s in ('magick identify -format "%%m" %first%') do set format=%%s - -set /a szx2=2*sz -set /a outputWidth=3*sz -set /a outputHeight=2*sz - -magick convert -size %outputwidth%x%outputHeight% canvas:none ^ --draw "image over 0,0 0,0 '%sixth%'" ^ --draw "image over %sz%,0 0,0 '%fourth%'" ^ --draw "image over %szx2%,0 0,0 '%third%'" ^ --draw "image over 0,%sz% 0,0 '%first%'" ^ --draw "image over %sz%,%sz% 0,0 '%fifth%'" ^ --draw "image over %szx2%,%sz% 0,0 '%second%'" ^ -%output%.%format% \ No newline at end of file diff --git a/22_RaytracedAO/pipeline.groovy b/22_RaytracedAO/pipeline.groovy deleted file mode 100644 index 04729bc71..000000000 --- a/22_RaytracedAO/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CRaytracedAOBuilder extends IBuilder -{ - public CRaytracedAOBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CRaytracedAOBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/22_RaytracedAO/raytraceCommon.h b/22_RaytracedAO/raytraceCommon.h index 595fc7198..40397e84f 100644 --- a/22_RaytracedAO/raytraceCommon.h +++ b/22_RaytracedAO/raytraceCommon.h @@ -83,79 +83,5 @@ struct SLight **/ }; - - -// -#include -#ifdef __cplusplus -struct alignas(16) StaticViewData_t -#else -struct StaticViewData_t -#endif -{ -#ifdef __cplusplus - uint16_t imageDimensions[2]; - uint8_t maxPathDepth; - uint8_t noRussianRouletteDepth; - uint16_t samplesPerPixelPerDispatch; - uint32_t sampleSequenceStride : 31; - uint32_t hideEnvmap : 1; -#else - uint imageDimensions; - uint maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch; - uint sampleSequenceStride_hideEnvmap; -#endif - float envMapPDFNormalizationFactor; - nbl_glsl_RWMC_CascadeParameters cascadeParams; -}; -#ifndef __cplusplus -uvec2 getImageDimensions(in StaticViewData_t data) -{ - return uvec2( - bitfieldExtract(data.imageDimensions, 0,16), - bitfieldExtract(data.imageDimensions,16,16) - ); -} -#endif - - -struct RaytraceShaderCommonData_t -{ - float rcpFramesDispatched; - uint frameLowDiscrepancySequenceShift; - uint pathDepth_rayCountWriteIx; // depth=0 if path tracing disabled - float textureFootprintFactor; - // need to be at the end because of some PC -> OpenGL Uniform mapping bug - // PERSPECTIVE - // mat3(viewDirReconFactors)*vec3(uv,1) or hitPoint-viewDirReconFactors[3] - // ORTHO - // viewDirReconFactors[2]=V - mat4x3 viewDirReconFactors; - -#ifdef __cplusplus - uint32_t getPathDepth() const - { - return nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,0,RAYCOUNT_SHIFT); - } - void setPathDepth(const uint32_t depth) - { - pathDepth_rayCountWriteIx = nbl::core::bitfieldInsert(pathDepth_rayCountWriteIx,depth,0,RAYCOUNT_SHIFT); - } - - uint32_t getReadIndex() const - { - const uint32_t index = nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - if (index) - return index-1; - return RAYCOUNT_N_BUFFERING-1; - } - void advanceWriteIndex() - { - const uint32_t writeIx = nbl::core::bitfieldExtract(pathDepth_rayCountWriteIx,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - pathDepth_rayCountWriteIx = nbl::core::bitfieldInsert(pathDepth_rayCountWriteIx,writeIx+1,RAYCOUNT_SHIFT,RAYCOUNT_N_BUFFERING_LOG2); - } -#endif -}; - #include #endif \ No newline at end of file diff --git a/22_RaytracedAO/test_scenes.txt b/22_RaytracedAO/test_scenes.txt deleted file mode 100644 index e751c9bbc..000000000 --- a/22_RaytracedAO/test_scenes.txt +++ /dev/null @@ -1,46 +0,0 @@ -; Here is my Commented line that batch file will skip (started with semicolons) -; "relative/dir/from/bin/folder/to/scene.zip something.xml -; Copy your test files into "Scenes" folder besides "bin" -"../../media/mitsuba/staircase2.zip scene.xml" -"..\Scenes\unity.zip 33_render_1_1.xml" -"..\Scenes\unity.zip 34_render_2_1.xml" -"..\Scenes\unity.zip 35_render_3_1.xml" -"..\Scenes\unity.zip 36_render_4_2.xml" -"..\Scenes\unity.zip 37_render_5_2.xml" -"..\Scenes\unity.zip 38_render_6_2.xml" -"..\Scenes\unity.zip 39_render_7_2.xml" -"..\Scenes\unity.zip 40_render_8_2.xml" -"..\Scenes\unity.zip 41_render_9_2.xml" -"..\Scenes\unity.zip 45_render_10_2.xml" -"..\Scenes\unity.zip 46_render_11_2.xml" -"..\Scenes\unity.zip 47_render_12_2.xml" -"..\Scenes\unity.zip 48_render_13_2.xml" -"..\Scenes\unity.zip 49_render_14_2.xml" -"..\Scenes\unity.zip 50_render_15_2.xml" -"..\Scenes\unity.zip 51_render_16_2.xml" -"..\Scenes\unity.zip 52_render_17_2.xml" -"..\Scenes\unity.zip 53_render_18_1.xml" -"..\Scenes\unity.zip 54_render_19_2.xml" -"..\Scenes\unity.zip 55_render_20_2.xml" -"..\Scenes\31521.zip 19_render_0_1.xml" -"..\Scenes\31797.zip 6_render_0_2.xml" -"..\Scenes\32222 does not stop loading.zip 12_render_9_1.xml" -"..\Scenes\4k strange - all normalmapped.zip" -"..\Scenes\bathroom.zip" -"..\Scenes\bathroom2.zip" -"..\Scenes\bedroom.zip" -"..\Scenes\coffee.zip" -"..\Scenes\cornell-box.zip" -"..\Scenes\glass-of-water.zip" -"..\Scenes\kitchen.zip" -"..\Scenes\lamp.zip" -"..\Scenes\living-room-2.zip" -"..\Scenes\living-room-3.zip" -"..\Scenes\living-room.zip" -"..\Scenes\spaceship.zip" -"..\Scenes\staircase.zip" -"..\Scenes\strangetexturedball.zip 22_render_0_1.xml" -"..\Scenes\sunscene.zip 2_render_0_2.xml" -"..\Scenes\t1 normals crash.zip 16_render_0_1.xml" -"..\Scenes\veach-ajar.zip" -"..\Scenes\veach-bidir.zip" \ No newline at end of file diff --git a/22_RaytracedAO/virtualGeometry.glsl b/22_RaytracedAO/virtualGeometry.glsl deleted file mode 100644 index 422c939f9..000000000 --- a/22_RaytracedAO/virtualGeometry.glsl +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _VIRTUAL_GEOMETRY_GLSL_INCLUDED_ -#define _VIRTUAL_GEOMETRY_GLSL_INCLUDED_ - -#include "common.h" - -#define _NBL_VG_USE_SSBO -#define _NBL_VG_SSBO_DESCRIPTOR_SET 1 -#define _NBL_VG_USE_SSBO_UVEC2 -#define _NBL_VG_SSBO_UVEC2_BINDING 0 -#define _NBL_VG_USE_SSBO_INDEX -#define _NBL_VG_SSBO_INDEX_BINDING 1 -// TODO: remove after Doom Eternal position quantization trick -#define _NBL_VG_USE_SSBO_UVEC3 -#define _NBL_VG_SSBO_UVEC3_BINDING 2 -#include - - -#include - - -vec3 nbl_glsl_fetchVtxPos(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.padding1; - return nbl_glsl_VG_attribFetch_RGB32_SFLOAT(va,vtxID); -} - -vec3 nbl_glsl_fetchVtxNormal(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uint codedNormal = nbl_glsl_VG_attribFetch3u(va,vtxID)[0]; - return normalize(nbl_glsl_decodeRGB10A2_SNORM(codedNormal).xyz); -} - -vec2 nbl_glsl_fetchVtxUV(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) -{ - nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uvec2 codedUV = nbl_glsl_VG_attribFetch3u(va,vtxID).yz; - return vec2(uintBitsToFloat(codedUV.x), uintBitsToFloat(codedUV.y)); -} - - -#endif diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index a2feb2cb8..71b1dde16 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -4,6 +4,7 @@ if(NOT RES) endif() nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::FullScreenTriangle) if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) @@ -35,12 +36,6 @@ add_test(NAME NBL_IMAGE_HASH_RUN_TESTS ) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/present.frag.hlsl - app_resources/push_constants.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -61,7 +56,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -77,4 +71,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 750756321..e3c1c0cb7 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -561,7 +561,7 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B const std::string prettyJson = current.data.dump(4); if (options.verbose) - m_logger->log(prettyJson, ILogger::ELL_INFO); + m_logger->log("%s", ILogger::ELL_INFO, prettyJson); system::ISystem::future_t> future; m_system->createFile(future, current.path, system::IFileBase::ECF_WRITE); @@ -793,6 +793,7 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B }; cmdbuf->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->pushConstants(m_pipeline->getLayout(),hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(push_constants_t),&pc); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS,m_pipeline->getLayout(),3,1,&ds); diff --git a/27_MPMCScheduler/CMakeLists.txt b/27_MPMCScheduler/CMakeLists.txt index 92531a8d5..7d7cfd71c 100644 --- a/27_MPMCScheduler/CMakeLists.txt +++ b/27_MPMCScheduler/CMakeLists.txt @@ -3,14 +3,6 @@ include(common) nbl_create_executable_project("" "" "" "") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/mpmc_queue.hlsl - app_resources/schedulers/mpmc.hlsl - app_resources/shader.comp.hlsl - app_resources/workgroup/pool_allocator.hlsl - app_resources/workgroup/stack.hlsl -) set(JSON [=[ [ @@ -18,7 +10,6 @@ set(JSON [=[ "INPUT": "app_resources/shader.comp.hlsl", "KEY": "shader", "COMPILE_OPTIONS": ["-T", "cs_6_8"], - "DEPENDS": [], "CAPS": [] } ] @@ -27,7 +18,6 @@ set(JSON [=[ NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} @@ -43,4 +33,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/30_ComputeShaderPathTracer/CMakeLists.txt b/30_ComputeShaderPathTracer/CMakeLists.txt index 07b0fd396..1a0b0e9bd 100644 --- a/30_ComputeShaderPathTracer/CMakeLists.txt +++ b/30_ComputeShaderPathTracer/CMakeLists.txt @@ -11,6 +11,7 @@ if(NBL_BUILD_IMGUI) list(APPEND NBL_LIBRARIES imtestengine "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index 54bc64495..82ab9fb91 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -7,6 +7,7 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" +#include #include "nbl/this_example/common.hpp" @@ -22,7 +23,7 @@ using namespace nbl::examples; // TODO: share push constants struct PTPushConstant { - matrix4SIMD invMVP; + hlsl::float32_t4x4 invMVP; int sampleCount; int depth; }; @@ -841,9 +842,9 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B m_camera.setProjectionMatrix([&]() { - static matrix4SIMD projection; + static hlsl::float32_t4x4 projection; - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); return projection; }()); @@ -878,9 +879,9 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B // Set Camera { core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(60.0f), - WindowDimensions.x / WindowDimensions.y, + float(WindowDimensions.x / WindowDimensions.y), 0.01f, 500.0f ); @@ -955,7 +956,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); PTPushConstant pc; - viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.invMVP = hlsl::inverse(viewProjectionMatrix); pc.sampleCount = spp; pc.depth = depth; diff --git a/34_DebugDraw/CMakeLists.txt b/34_DebugDraw/CMakeLists.txt new file mode 100644 index 000000000..8d78f3de9 --- /dev/null +++ b/34_DebugDraw/CMakeLists.txt @@ -0,0 +1,9 @@ +if(NBL_BUILD_DEBUG_DRAW) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) +endif() diff --git a/34_DebugDraw/include/common.hpp b/34_DebugDraw/include/common.hpp new file mode 100644 index 000000000..aad9bdb1d --- /dev/null +++ b/34_DebugDraw/include/common.hpp @@ -0,0 +1,22 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/CEventCallback.hpp" +#include "nbl/examples/examples.hpp" + +#include "nbl/ext/DebugDraw/CDrawAABB.h" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/34_DebugDraw/main.cpp b/34_DebugDraw/main.cpp new file mode 100644 index 000000000..f2dd6210d --- /dev/null +++ b/34_DebugDraw/main.cpp @@ -0,0 +1,367 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "common.hpp" +#include + +class DebugDrawSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + + _NBL_STATIC_INLINE_CONSTEXPR uint32_t WIN_W = 1280, WIN_H = 720; + +public: + inline DebugDrawSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WIN_W; + params.height = WIN_H; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "DebugDrawSampleApp"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + { + constexpr float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + core::vectorSIMDf cameraPosition(14, 8, 12); + core::vectorSIMDf cameraTarget(0, 0, 0); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), float(WIN_W) / WIN_H, zNear, zFar); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, moveSpeed, rotateSpeed); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + ISwapchain::SCreationParams swapchainParams = { .surface = m_surface->getSurface() }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + auto* renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + + SPushConstantRange simplePcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = offsetof(ext::debug_draw::PushConstants, spc), + .size = sizeof(ext::debug_draw::SSinglePC) + }; + { + ext::debug_draw::DrawAABB::SCreationParameters params = {}; + params.transfer = getTransferUpQueue(); + params.assetManager = m_assetMgr; + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BOTH; + params.singlePipelineLayout = ext::debug_draw::DrawAABB::createPipelineLayoutFromPCRange(m_device.get(), simplePcRange); + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; + drawAABB = ext::debug_draw::DrawAABB::create(std::move(params)); + } + + m_window->setCaption("[Nabla Engine] Debug Draw App Test Demo"); + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + + return true; + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + + // render whole scene to offline frame buffer & submit + + auto* const cmdbuf = m_cmdBufs.data()[resourceIx].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("DebugDrawSampleApp IMGUI Frame"); + + { + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + camera.endInputProcessing(nextPresentationTimestamp); + } + + auto* queue = getGraphicsQueue(); + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WIN_W; + viewport.height = WIN_H; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + VkRect2D scissor{ + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() } + }; + cmdbuf->setScissor(0u, 1u, &scissor); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SRenderpassBeginInfo beginInfo = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + + cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + ext::debug_draw::DrawAABB::DrawParameters drawParams; + drawParams.commandBuffer = cmdbuf; + drawParams.cameraMat = camera.getConcatenatedMatrix(); + + if (!drawAABB->renderSingle(drawParams, testAABB, float32_t4{ 1, 0, 0, 1 })) + m_logger->log("Unable to draw AABB with single draw pipeline!", ILogger::ELL_ERROR); + { + using aabb_t = hlsl::shapes::AABB<3, float>; + using point_t = aabb_t::point_t; + + std::mt19937 gen(42); + std::uniform_real_distribution translate_dis(-50.f, 50.f); + std::uniform_real_distribution scale_dis(1.f, 10.f); + std::uniform_real_distribution color_dis(0.f, 1.f); + const uint32_t aabbCount = 200u; + + std::array aabbInstances; + for (auto i = 0u; i < aabbCount; i++) + { + point_t pmin = { translate_dis(gen), translate_dis(gen), translate_dis(gen) }; + point_t pmax = pmin + point_t{ scale_dis(gen), scale_dis(gen), scale_dis(gen) }; + aabb_t aabb = { pmin, pmax }; + + auto& instance = aabbInstances[i]; + instance.color = { color_dis(gen),color_dis(gen),color_dis(gen),1 }; + + hlsl::float32_t3x4 instanceTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(aabb); + instance.transform = math::linalg::promoted_mul(float32_t4x4(1), instanceTransform); + } + + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!drawAABB->render(drawParams, drawFinished, aabbInstances)) + m_logger->log("Unable to draw AABBs with instanced draw pipeline!", ILogger::ELL_ERROR); + } + + cmdbuf->endRenderPass(); + } + cmdbuf->endDebugMarker(); + cmdbuf->end(); + + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + if (queue->submit(infos) == IQueue::RESULT::SUCCESS) + { + const nbl::video::ISemaphore::SWaitInfo waitInfos[] = + { { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + } }; + + m_device->blockForSemaphores(waitInfos); // this is not solution, quick wa to not throw validation errors + } + else + --m_realFrameIx; + } + } + + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +private: + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr m_semaphore; + smart_refctd_ptr m_cmdPool; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + Camera camera; + video::CDumbPresentationOracle oracle; + + smart_refctd_ptr drawAABB; + hlsl::shapes::AABB<3, float> testAABB = hlsl::shapes::AABB<3, float>{ { -5, -5, -5 }, { 10, 10, -10 } }; +}; + +NBL_MAIN_FUNC(DebugDrawSampleApp) \ No newline at end of file diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt new file mode 100644 index 000000000..8c0fbae51 --- /dev/null +++ b/40_PathTracer/CMakeLists.txt @@ -0,0 +1,74 @@ +include(common) + +set(NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/src" +) +set(NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" + Nabla::ext::FullScreenTriangle + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" +) +set(NBL_EXAMPLE_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/io/CSceneLoader.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CSession.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CScene.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/CRenderer.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/resolve/CBasicRWMCResolver.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/renderer/present/CWindowPresenter.cpp" +) +nbl_create_executable_project("${NBL_EXAMPLE_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/pathtrace/previs.hlsl", + "KEY": "pathtrace_previs", + }, + { + "INPUT": "app_resources/pathtrace/beauty.hlsl", + "KEY": "pathtrace_beauty", + }, + { + "INPUT": "app_resources/pathtrace/debug.hlsl", + "KEY": "pathtrace_debug", + }, + { + "INPUT": "app_resources/present/default.hlsl", + "KEY": "present_default", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}/include" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + + diff --git a/40_PathTracer/app_resources/pathtrace/beauty.hlsl b/40_PathTracer/app_resources/pathtrace/beauty.hlsl new file mode 100644 index 000000000..2027e6d4d --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/beauty.hlsl @@ -0,0 +1,26 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SBeautyPushConstants pc; + + +struct[raypayload] BeautyPayload +{ + uint32_t instanceID : read(caller):write(closesthit); +// float16_t3 normal : read(caller):write(closesthit); +}; + +[shader("raygeneration")] +void raygen() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} + +[shader("miss")] +void miss(inout BeautyPayload payload) +{ +} \ No newline at end of file diff --git a/40_PathTracer/app_resources/pathtrace/debug.hlsl b/40_PathTracer/app_resources/pathtrace/debug.hlsl new file mode 100644 index 000000000..25e9d2664 --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/debug.hlsl @@ -0,0 +1,26 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SDebugPushConstants pc; + + +struct[raypayload] DebugPayload +{ + uint32_t instanceID : read(caller):write(closesthit); + uint32_t primitiveID : read(caller):write(closesthit); +}; + +[shader("raygeneration")] +void raygen() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} + +[shader("miss")] +void miss(inout DebugPayload payload) +{ +} \ No newline at end of file diff --git a/40_PathTracer/app_resources/pathtrace/previs.hlsl b/40_PathTracer/app_resources/pathtrace/previs.hlsl new file mode 100644 index 000000000..035088068 --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/previs.hlsl @@ -0,0 +1,25 @@ +#include "renderer/shaders/pathtrace/common.hlsl" +using namespace nbl::hlsl; +using namespace nbl::this_example; + +[[vk::push_constant]] SPrevisPushConstants pc; + + +struct[raypayload] PrevisPayload +{ + uint16_t materialID : read(caller):write(closesthit); +}; + +[shader("raygeneration")] +void raygen() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + + gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); +} + +[shader("miss")] +void miss(inout PrevisPayload payload) +{ +} diff --git a/40_PathTracer/app_resources/present/default.hlsl b/40_PathTracer/app_resources/present/default.hlsl new file mode 100644 index 000000000..dc857fb2d --- /dev/null +++ b/40_PathTracer/app_resources/present/default.hlsl @@ -0,0 +1,42 @@ +// Copyright (C) 2024-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "renderer/shaders/present/push_constants.hlsl" +// vertex shader is provided by the fullScreenTriangle extension +#include +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t SessionDSIndex = 0; +#include "renderer/shaders/session.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::this_example; +using namespace ext::FullScreenTriangle; + + +[[vk::push_constant]] SDefaultResolvePushConstants pc; + +[shader("pixel")] +float32_t4 present_default(SVertexAttributes vxAttr) : SV_Target0 +{ + float32_t3 tint = promote(1.f); + float32_t3 uv; + if (pc.isCubemap) + { + const float32_t4 ndc = float32_t4(vxAttr.uv*2.f-float32_t2(1,1),1.f,1.f); + float32_t4 tmp = mul(pc.cubemap().invProjView,ndc); + float32_t3 dir = tmp.xyz/tmp.www; + // TODO: convert dir to cubemap face, and the UV coord + tint = float32_t3(1,0,1); // right now go magenta error colour + } + else + { + const SDefaultResolvePushConstants::Regular regular = pc.regular(); + uv.xy = vxAttr.uv*regular.scale; + if (any(uv.xy>float32_t2(1,1))) + return promote(0.f); + uv.z = pc.layer; + if (any(regular._min>uv.xy) || any(regular._max data; + Material material; + core::matrix3x4SIMD transform; + +}; + +} + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ diff --git a/40_PathTracer/include/io/CSceneLoader.h b/40_PathTracer/include/io/CSceneLoader.h new file mode 100644 index 000000000..24e6ca490 --- /dev/null +++ b/40_PathTracer/include/io/CSceneLoader.h @@ -0,0 +1,278 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_LOADER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SCENE_LOADER_H_INCLUDED_ + + +#include "nabla.h" +#include "nbl/builtin/hlsl/cpp_compat/promote.hlsl" + +#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" + + +namespace nbl::this_example +{ + +class CSceneLoader : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + struct SCachedCreationParams + { + core::smart_refctd_ptr assMan = nullptr; + system::logger_opt_smart_ptr logger = nullptr; + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const + { + if (!assMan) + return false; + return true; + } + }; + static core::smart_refctd_ptr create(SCreationParams&& params); + + // When outputFilePath isn't set in Film Element in Mitsuba, use this to find the extension string. + static inline std::string_view fileExtensionFromFormat(ext::MitsubaLoader::CElementFilm::FileFormat format) + { + using FileFormat = ext::MitsubaLoader::CElementFilm::FileFormat; + switch (format) + { + case FileFormat::PNG: + return ".png"; + case FileFormat::OPENEXR: + return ".exr"; + case FileFormat::JPEG: + return ".jpg"; + default: + break; + } + return ""; + } + + struct SLoadResult + { + struct SSensor + { + using type_e = ext::MitsubaLoader::CElementSensor::Type; + + inline SSensor() = default; + inline SSensor(const SSensor&) = default; + inline SSensor(SSensor&&) = default; + inline SSensor& operator=(const SSensor&) = default; + inline SSensor& operator=(SSensor&&) = default; + + inline operator bool() const + { + return bool(constants) && mutableDefaults.valid(constants) && bool(dynamicDefaults); + } + + struct SConstants + { + constexpr static inline uint32_t MaxWidth = 0x1u<<(sizeof(uint16_t)*8-2); + constexpr static inline uint32_t MaxHeight = MaxWidth; + constexpr static inline uint32_t MaxCascadeCount = 15; + + inline operator bool() const + { + if (width <= 0 || width >= MaxWidth) + return false; + if (height <= 0 || height >= MaxHeight) + return false; + if (type != type_e::INVALID) + return false; + if (cascadeCount <= 0 || cascadeCount >= MaxCascadeCount) + return false; + return true; + } + + // where the FFT bloom kernel is + system::path bloomFilePath = {}; + // + uint32_t width = 0u; + uint32_t height = 0u; + // + type_e type = type_e::INVALID; + // + uint8_t cascadeCount : 4 = 1; + } constants = {}; + // these could theoretically change without recreating session resources + struct SMutable + { + constexpr static inline uint8_t MaxClipPlanes = 6; + + inline uint8_t getClipPlaneCount() const + { + using namespace nbl::hlsl; + for (uint8_t i=0; i(0.f); + const auto& rhs = clipPlanes[i].xyz; + if (any(glsl::notEqual(lhs,rhs))) + continue; + return i; + } + return MaxClipPlanes; + } + + inline bool valid(const SConstants& cst) const + { + // TODO more checks + return true; + } + + // inverse of view matrix, can include SCALE ! + hlsl::float32_t3x4 absoluteTransform; + // TODO: thin lens and telecentric support + struct Raygen + { + public: + enum class Type : uint8_t + { + Persp = 0, + Ortho = 1, + Env = 2 + }; + + // + inline Type getType() const + { + // note that actual matrix always requires columns to have Y- directions + if (encoded[1][1]<0.f) + return Type::Persp; + if (encoded[1][1]>0.f) + return Type::Ortho; + return Type::Env; + } + + // for a raygen shader to transform the [0,1]^2 NDC coord into a ray (without tMin/tStart) + // PERSP `dir = normalize(float3(pseudo_mul(mat,ndc),-1)); + // origin = -float32_t3(dir.xy/dir.z,nearClip);` + // ORTHO `origin = float32_t3(pseudo_mul(mat,ndc),-nearClip); + // dir = float32_t(0,0,-1)` + inline explicit operator hlsl::float32_t2x3() const + { + auto retval = encoded; + // y-axis column shall always be negative + if (encoded[1][1]>0.f) + { + retval[0][1] = -encoded[0][1]; + retval[1][1] = -encoded[1][1]; + } + return retval; + } + + // Whether Z+ or Z- is forward,and X- or X+ is right for the camera + inline bool isRightHanded() const {return encoded[0][0]>0.f;} + + private: + friend class CSceneLoader; + + hlsl::float32_t2x3 encoded = {}; + } raygen; + // + std::array clipPlanes = {}; + // denoiser and bloom require rendering with a "skirt" this controls the skirt size + int32_t cropWidth = 0u; + int32_t cropHeight = 0u; + int32_t cropOffsetX = 0u; + int32_t cropOffsetY = 0u; + // + float nearClip; + float farClip; + // + float cascadeLuminanceBase = core::nan(); + float cascadeLuminanceStart = core::nan(); + // + uint16_t hideEnvironment : 1 = false; + uint16_t russianRouletteDepth : 15 = 0x7fffu; + uint16_t maxPathDepth = 0; + } mutableDefaults = {}; + // these can change without having to reset accumulations, etc. + struct SDynamic + { + // For a legacy `smgr->addCameraSceneNodeModifiedMaya(nullptr, -1.0f * mainSensorData.rotateSpeed, 50.0f, mainSensorData.moveSpeed, -1, 2.0f, defaultZoomSpeedMultiplier, false, true)` + constexpr static inline float DefaultRotateSpeed = 300.0f; + constexpr static inline float DefaultZoomSpeed = 1.0f; + constexpr static inline float DefaultMoveSpeed = 100.0f; + constexpr static inline float DefaultSceneSize = 50.0f; // reference for default zoom and move speed; + // no constexpr std::pow + //constexpr static inline float DefaultZoomSpeedMultiplier = std::pow(DefaultSceneSize,DefaultZoomSpeed/DefaultSceneSize); + + struct SPostProcess + { + std::filesystem::path bloomFilePath; + float bloomScale = 0.0f; + float bloomIntensity = 0.0f; + std::string tonemapperArgs = ""; + }; + + // + inline operator bool() const + { + // TODO more checks + return !hlsl::isnan(moveSpeed); + } + + // members + system::path outputFilePath = {}; + SPostProcess postProc = {}; + // even though spherical can't rotate, the preview camera can + hlsl::float32_t3 up = {}; + float rotateSpeed = core::nan(); + union + { + struct SZoomable // spherical can't zoom + { + float speed = core::nan(); + } zoomable = {}; + }; + // + float moveSpeed = core::nan(); + // + uint32_t samplesNeeded = 0u; + float kappa = 0.f; + float Emin = 0.05f; + } dynamicDefaults = {}; + + }; + + inline operator bool() const + { + if (!scene || sensors.empty()) + return false; + return true; + } + + // + core::smart_refctd_ptr scene = {}; + // + core::vector sensors; + // TODO: for Material Compiler + //std::future compileShadersFuture = {}; + }; + struct SLoadParams + { + system::path relPath = ""; + system::path workingDirectory = ""; + }; + SLoadResult load(SLoadParams&& _params); + + protected: + struct SConstructorParams : SCachedCreationParams + { + }; + inline CSceneLoader(SConstructorParams&& _params) : m_params(std::move(_params)) {} + virtual inline ~CSceneLoader() {} + + SConstructorParams m_params; +}; + +} + +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_LOADER_CPP_ +extern template struct nbl::system::impl::to_string_helper; +#endif + +#endif diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h new file mode 100644 index 000000000..9283b338e --- /dev/null +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -0,0 +1,197 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_RENDERER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_RENDERER_H_INCLUDED_ + + +#include "renderer/CScene.h" +#include "renderer/CSession.h" + +#include "renderer/shaders/pathtrace/push_constants.hlsl" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + + +namespace nbl::this_example +{ + +class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + friend struct SSubmitInfo; + public: + // + constexpr static video::SPhysicalDeviceFeatures RequiredDeviceFeatures() + { + video::SPhysicalDeviceFeatures retval = {}; + retval.rayTracingPipeline = true; + retval.accelerationStructure = true; + return retval; + } + // + constexpr static video::SPhysicalDeviceFeatures PreferredDeviceFeatures() + { + auto retval = RequiredDeviceFeatures(); + retval.accelerationStructureHostCommands = true; + return retval; + } +#if 0 // see TODO in main.cpp + constexpr static video::SPhysicalDeviceLimits RequiredDeviceLimits() + { + video::SPhysicalDeviceLimits retval = {}; + retval.shaderStorageImageReadWithoutFormat = true; + return retval; + } +#endif + // + template + static inline core::smart_refctd_ptr loadPrecompiledShader( + asset::IAssetManager* assMan, video::ILogicalDevice* device, system::logger_opt_ptr logger={} + ) + { + return loadPrecompiledShader_impl(assMan,builtin::build::get_spirv_key(device),logger); + } + + struct SCachedCreationParams + { + //! Brief guideline to good path depth limits + // Want to see stuff with indirect lighting on the other side of a pane of glass + // 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light + // Want to see through a glass box, vase, or office + // 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light + // pick higher numbers for better GI and less bias + static inline constexpr uint32_t DefaultPathDepth = 8u; + // TODO: Upload only a subsection of the sample sequence to the GPU, so we can use more samples without trashing VRAM + static inline constexpr uint32_t MaxFreeviewSamples = 0x10000u; + + inline operator bool() const + { + if (!graphicsQueue || !computeQueue || !uploadQueue) + return false; + if (!utilities) + return false; + if (graphicsQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + if (computeQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + if (uploadQueue->getOriginDevice()!=utilities->getLogicalDevice()) + return false; + return true; + } + + video::CThreadSafeQueueAdapter* graphicsQueue = nullptr; + video::CThreadSafeQueueAdapter* computeQueue = nullptr; + video::CThreadSafeQueueAdapter* uploadQueue = nullptr; + // + core::smart_refctd_ptr utilities = nullptr; + // can be null + system::logger_opt_smart_ptr logger = nullptr; + }; + struct SCreationParams : SCachedCreationParams + { + system::path sampleSequenceCache; + asset::IAssetManager* assMan; + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); + + // + inline const SCachedCreationParams& getCreationParams() const { return m_creation; } + + // + inline system::logger_opt_ptr getLogger() const {return m_creation.logger.get().get();} + + // + inline video::ILogicalDevice* getDevice() const {return m_creation.utilities->getLogicalDevice();} + + struct SCachedConstructionParams + { + constexpr static inline uint8_t FramesInFlight = 3; + core::smart_refctd_ptr semaphore; + + // per pipeline UBO for other pipelines + core::smart_refctd_ptr uboDSLayout; + // descriptor set for a scene shall contain sampled textures and compiled materials + core::smart_refctd_ptr sceneDSLayout; + // descriptor set for sensors + core::smart_refctd_ptr sensorDSLayout; + + // temporary + std::array,uint8_t(CSession::RenderMode::Count)> shaders; + std::array,uint8_t(CSession::RenderMode::Count)> renderingLayouts; + // TODO +// std::array,uint8_t(CSession::RenderMode::Count)> genericPipelines; + + // + core::smart_refctd_ptr commandBuffers[FramesInFlight]; + }; + // + inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + + // + core::smart_refctd_ptr createScene(CScene::SCreationParams&& _params); + + // + struct SSubmit final : core::Uncopyable + { + public: + inline SSubmit() {} + inline SSubmit(CRenderer* _renderer, video::IGPUCommandBuffer* _cb) : renderer(_renderer), cb(_cb) {assert(operator bool());} + + inline operator bool() const {return cb;} + inline operator video::IGPUCommandBuffer*() const {return cb;} + + // returns semaphore signalled by submit + video::IQueue::SSubmitInfo::SSemaphoreInfo operator()(std::span extraWaits); + + asset::PIPELINE_STAGE_FLAGS stageMask = asset::PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + private: + CRenderer* renderer = nullptr; + video::IGPUCommandBuffer* cb = nullptr; + }; + SSubmit render(CSession* session); + + protected: + struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + { + + // Each Atom of the sample sequence provides 3N dimensions (3 for BxDF, 3 for NEE, etc.) + // Then Atoms are ordered by sampleID, then dimension (cache will be fully trashed by tracing TLASes until next bounce) +#if 0 + // semi persistent data + struct SampleSequence + { + public: + static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t); + SampleSequence() : bufferView() {} + + // one less because first path vertex uses a different sequence + static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;} + nbl::core::smart_refctd_ptr createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount); + + // from cache + void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr&& buff); + // regenerate + nbl::core::smart_refctd_ptr createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount); + + auto getBufferView() const {return bufferView;} + + private: + nbl::core::smart_refctd_ptr bufferView; + } sampleSequence; + + // Resources used for envmap sampling + nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; +#endif + }; + inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), + m_frameIx(m_construction.semaphore->getCounterValue()) {} + virtual inline ~CRenderer() {} + + static core::smart_refctd_ptr loadPrecompiledShader_impl(asset::IAssetManager* assMan, const core::string& key, system::logger_opt_ptr logger); + + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; + uint64_t m_frameIx; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/CScene.h b/40_PathTracer/include/renderer/CScene.h new file mode 100644 index 000000000..babcd14cf --- /dev/null +++ b/40_PathTracer/include/renderer/CScene.h @@ -0,0 +1,97 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SCENE_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SCENE_H_INCLUDED_ + + +#include "io/CSceneLoader.h" +#include "renderer/CSession.h" +#include "renderer/shaders/scene.hlsl" + + +namespace nbl::this_example +{ +class CRenderer; + +class CScene : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + struct SCachedCreationParams + { + }; + struct SCreationParams : SCachedCreationParams + { + CSceneLoader::SLoadResult load = {}; + video::CAssetConverter* converter = nullptr; + + inline operator bool() const + { + if (!load) + return false; + // converter can be null, we can make a new one + return true; + } + }; + + // + inline CRenderer* getRenderer() const {return m_construction.renderer.get();} + + // + inline video::IGPURayTracingPipeline* getPipeline(const CSession::RenderMode mode) const + { + return m_construction.pipelines[static_cast(mode)].get(); + } + + // + inline const auto& getSBT(const CSession::RenderMode mode) const {return m_construction.sbts[static_cast(mode)];} + + // + inline const video::IGPUDescriptorSet* getDescriptorSet() const {return m_construction.sceneDS->getDescriptorSet();} + + using sensor_t = CSceneLoader::SLoadResult::SSensor; + // + inline std::span getSensors() const {return m_construction.sensors;} + + // + core::smart_refctd_ptr createSession(const CSession::SCreationParams& sensor); + + protected: + friend class CRenderer; + struct SCachedConstructorParams + { + // + hlsl::shapes::AABB<> sceneBound; + // + core::vector sensors; + // backward link for reference counting + core::smart_refctd_ptr renderer; + // specialized per-scene pipelines + core::smart_refctd_ptr pipelines[uint8_t(CSession::RenderMode::Count)]; + // + video::IGPURayTracingPipeline::SShaderBindingTable sbts[uint8_t(CSession::RenderMode::Count)]; + // descriptor set for a scene shall contain sampled textures and compiled materials + core::smart_refctd_ptr sceneDS; + // main TLAS + core::smart_refctd_ptr TLAS; + }; + struct SConstructorParams : SCachedCreationParams, SCachedConstructorParams + { + // sensor list can be empty, we can just make one up as we go along + inline operator bool() const + { + for (uint8_t i=0; i(CSession::RenderMode::Count); i++) + if (const auto* pipeline=pipelines[i].get(); !pipeline || !sbts[i].valid(pipeline->getCreationFlags())) + return false; + return renderer && sceneDS; + } + }; + inline CScene(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} + virtual inline ~CScene() {} + + SCachedCreationParams m_creation; + SCachedConstructorParams m_construction; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h new file mode 100644 index 000000000..f3c5c1d75 --- /dev/null +++ b/40_PathTracer/include/renderer/CSession.h @@ -0,0 +1,158 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_SESSION_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_SESSION_H_INCLUDED_ + + +#include "io/CSceneLoader.h" + +#include "renderer/shaders/session.hlsl" +#include "renderer/shaders/pathtrace/push_constants.hlsl" + + +namespace nbl::this_example +{ +class CScene; + +class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + using sensor_t = CSceneLoader::SLoadResult::SSensor; + using sensor_type_e = sensor_t::SMutable::Raygen::Type; + enum class RenderMode : uint8_t + { + Previs, + Beauty, + Debug, + Count + }; + struct SCachedCreationParams + { + RenderMode mode = RenderMode::Beauty; + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const {return sensor;} + + const sensor_t* sensor; + }; + + // + bool init(video::IGPUCommandBuffer* cb); + + // + inline bool isInitialized() const {return bool(m_active.immutables);} + + // heavy VRAM data and data only needed during an active session + struct SImageWithViews + { + inline operator bool() const + { + return image && !views.empty() && views.begin()->second; + } + + inline video::IGPUImageView* getView(const asset::E_FORMAT format) const + { + if (const auto found=views.find(format); found!=views.end()) + return found->second.get(); + return nullptr; + } + + core::smart_refctd_ptr image = {}; + core::unordered_map> views = {}; + }; + struct SActiveResources + { + struct SImmutables + { + inline operator bool() const + { + return bool(scrambleKey) && sampleCount && rwmcCascades && albedo && normal && motion && mask && ds; + } + + // QUESTION: No idea how to marry RWMC with Temporal Denoise, do we denoise separately per cascade? + // ANSWER: RWMC relies on many spp, can use denoised/reprojected to confidence measures from other cascades. + // Shouldn't touch the previous frame, denoiser needs to know what was on screen last frame, only touch current. + // QUESTION: with temporal denoise do we turn the `sampleCount` into a `sequenceOffset` texutre? + SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; + // stores all the sensor data required + core::smart_refctd_ptr ds = {}; + }; + SImmutables immutables = {}; + SSensorDynamics currentSensorState = {}, prevSensorState = {}; + }; + + // + inline const SActiveResources& getActiveResources() const {return m_active;} + + // + bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); + + // + bool update(const SSensorDynamics& newVal); + + // TODO: figure this out + inline float getProgress() const + { + return 0.f; + } + + // + inline void deinit() + { + m_active = {}; + } + + // + struct SConstructionParams : SCachedCreationParams + { + core::string name = "TODO from `sensor`"; + core::smart_refctd_ptr scene; + SResolveConstants initResolveConstants; + SSensorUniforms uniforms; + SSensorDynamics initDynamics; + hlsl::uint16_t2 cropOffsets; + hlsl::uint16_t2 cropResolution; + sensor_type_e type; + }; + inline const SConstructionParams& getConstructionParams() const {return m_params;} + + private: + friend class CScene; + inline CSession(SConstructionParams&& _params) : m_params(std::move(_params)) {} + + const SConstructionParams m_params; + SActiveResources m_active = {}; +}; + +} + +// +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + private: + using enum_t = nbl::this_example::CSession::RenderMode; + + public: + static inline std::string __call(const enum_t value) + { + switch (value) + { + case enum_t::Beauty: + return "Beauty"; + case enum_t::Previs: + return "Previs"; + case enum_t::Debug: + return "Debug"; + default: + break; + } + return ""; + } +}; +} +#endif diff --git a/40_PathTracer/include/renderer/SAASequence.h b/40_PathTracer/include/renderer/SAASequence.h new file mode 100644 index 000000000..460e9ee69 --- /dev/null +++ b/40_PathTracer/include/renderer/SAASequence.h @@ -0,0 +1,1046 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl::this_example +{ + +// +struct SAASequence +{ + using type_t = nbl::hlsl::float32_t2; + + inline operator std::span() const {return {Data,Size};} + + static constexpr inline uint32_t Size = 1024; + constexpr static inline type_t Data[Size] = + { + {0.229027962000000, 0.100901043000000}, + {0.934988661250000, 0.900492937500000}, + {0.693936740750000, 0.477888665000000}, + {0.396013875250000, 0.867381653000000}, + {0.151208663250000, 0.331649132250000}, + {0.919338615000000, 0.306386117750000}, + {0.454737456500000, 0.597940860250000}, + {0.911951413000000, 0.584874565000000}, + {0.471331207500000, 0.117509299250000}, + {0.724981748000000, 0.988645892000000}, + {0.227727943750000, 0.553082892250000}, + {0.927148254750000, 0.059077206250000}, + {0.170420940250000, 0.853803466500000}, + {0.369496963250000, 0.372492160250000}, + {0.709055501500000, 0.719526612750000}, + {0.708593019750000, 0.236308825250000}, + {0.053515783250000, 0.244794542562500}, + {0.759417624125000, 0.846532545187500}, + {0.572365454937500, 0.341559262437500}, + {0.269128942562500, 0.962581831375000}, + {0.246508261687500, 0.286661635812500}, + {0.819542439062500, 0.459099133812500}, + {0.411348913687500, 0.737420359250000}, + {0.896647944437500, 0.717554343125000}, + {0.358057598000000, 0.050206801437500}, + {0.605871046250000, 0.779868041500000}, + {0.036816445812500, 0.506511135625000}, + {0.806931985937500, 0.138270723062500}, + {0.045020470000000, 0.818334270875000}, + {0.433264399500000, 0.254739200375000}, + {0.556258709500000, 0.559776624000000}, + {0.611048395312500, 0.162518625750000}, + {0.028918631812500, 0.053438072375000}, + {0.856252533125000, 0.916712681500000}, + {0.580344816187500, 0.463534157062500}, + {0.291334488000000, 0.774756179000000}, + {0.157847279187500, 0.464948199125000}, + {0.775478249937500, 0.320623736250000}, + {0.306258709500000, 0.653526624000000}, + {0.798533046937500, 0.552896543187500}, + {0.349953270437500, 0.123764825500000}, + {0.534027961437500, 0.969931745937500}, + {0.122488661312500, 0.681742937625000}, + {0.849003468812500, 0.216845413250000}, + {0.145343900750000, 0.962506045625000}, + {0.395929912437500, 0.488477370312500}, + {0.675219736437500, 0.601237158875000}, + {0.728921568625000, 0.053308823500000}, + {0.153721825125000, 0.145597505062500}, + {0.852763510375000, 0.797682223125000}, + {0.644595719312500, 0.367380713687500}, + {0.475934665312500, 0.787623234375000}, + {0.037670496437500, 0.386130180750000}, + {0.916111850937500, 0.403604173437500}, + {0.307256453062500, 0.518207928812500}, + {0.836158139312500, 0.677526975812500}, + {0.291525812500000, 0.197831715312500}, + {0.632543215125000, 0.896220934750000}, + {0.039235045687500, 0.629605464812500}, + {0.927263875375000, 0.179881653187500}, + {0.036335975187500, 0.990626511375000}, + {0.458406617875000, 0.372877193062500}, + {0.545614665812500, 0.676662283062500}, + {0.606815968812500, 0.044970413250000}, + {0.031533697125000, 0.184836288625000}, + {0.943869562500000, 0.830155934062500}, + {0.607026984312500, 0.286243495000000}, + {0.385468447812500, 0.923477959062500}, + {0.211591778000000, 0.432717372437500}, + {0.959561740812500, 0.477888665062500}, + {0.340921091062500, 0.599871303750000}, + {0.770926812125000, 0.740443845937500}, + {0.492972183312500, 0.243769330562500}, + {0.520086204062500, 0.865883539250000}, + {0.194132187625000, 0.711586172812500}, + {0.867832801875000, 0.029377324812500}, + {0.018898352500000, 0.755166315812500}, + {0.294110519250000, 0.340476317312500}, + {0.645436781125000, 0.669120978187500}, + {0.537010584750000, 0.070669853500000}, + {0.161951413000000, 0.209874565062500}, + {0.786335975187500, 0.990626511375000}, + {0.525681985937500, 0.419520723062500}, + {0.287619562500000, 0.834550465312500}, + {0.100299557750000, 0.367542953000000}, + {0.787670496437500, 0.386130180750000}, + {0.425010132750000, 0.666850725937500}, + {0.959417841312500, 0.712724761625000}, + {0.259027114250000, 0.027505482375000}, + {0.706747124500000, 0.863983912687500}, + {0.118758709500000, 0.559776624000000}, + {0.979834653750000, 0.076596529437500}, + {0.076814113250000, 0.879551982187500}, + {0.458038607062500, 0.495297691687500}, + {0.676899749875000, 0.533654791000000}, + {0.739509651750000, 0.162886922875000}, + {0.130635833000000, 0.032884578937500}, + {0.995486845875000, 0.879726983937500}, + {0.681683761437500, 0.415213866187500}, + {0.471888733500000, 0.975077322375000}, + {0.002080578437500, 0.292317740812500}, + {0.982026984312500, 0.286243495000000}, + {0.291525812500000, 0.713456715312500}, + {0.803515783250000, 0.619794542562500}, + {0.363736251000000, 0.241491573500000}, + {0.581375603187500, 0.850024182625000}, + {0.126134788437500, 0.739345154625000}, + {0.807256990625000, 0.025225260812500}, + {0.214063133312500, 0.979178170312500}, + {0.279120068187500, 0.455460706437500}, + {0.521614411125000, 0.748128257250000}, + {0.541375661500000, 0.191916865812500}, + {0.092374240812500, 0.093123040062500}, + {0.819780017000000, 0.863865176562500}, + {0.723535390937500, 0.290673655562500}, + {0.333626471625000, 0.991508772375000}, + {0.180081879937500, 0.273337083437500}, + {0.884853249937500, 0.353826861250000}, + {0.486489450437500, 0.649922456187500}, + {0.970355124125000, 0.588720045187500}, + {0.411054041562500, 0.190728892687500}, + {0.670557598000000, 0.782628676437500}, + {0.176686781125000, 0.590995978187500}, + {0.923484185187500, 0.119472166250000}, + {0.229834653750000, 0.826596529437500}, + {0.402229645500000, 0.427815757250000}, + {0.614887300500000, 0.582390020187500}, + {0.721331207625000, 0.117509299250000}, + {0.221780261562500, 0.160787322875000}, + {0.980871046250000, 0.779868041500000}, + {0.521614411125000, 0.498128257250000}, + {0.462698109750000, 0.855009158437500}, + {0.102148981812500, 0.485181351500000}, + {0.790505847500000, 0.272359588500000}, + {0.357263913000000, 0.553624565062500}, + {0.852875617687500, 0.518271589687500}, + {0.412788910312500, 0.072860258937500}, + {0.739509651750000, 0.912886922875000}, + {0.244715387500000, 0.610882883562500}, + {0.931245437000000, 0.247161473000000}, + {0.118495619500000, 0.827404835625000}, + {0.356241537562500, 0.307793951312500}, + {0.739954645312500, 0.601971750750000}, + {0.652229645500000, 0.240315757250000}, + {0.085230272750000, 0.149967825937500}, + {0.790487853250000, 0.802468641250000}, + {0.742972183312500, 0.431269330562500}, + {0.338023546687500, 0.864140358375000}, + {0.161359195250000, 0.386030244500000}, + {0.979622565375000, 0.415764143437500}, + {0.344324410875000, 0.743490102812500}, + {0.850000234687500, 0.588036936812500}, + {0.478921568625000, 0.053308823500000}, + {0.575878945812500, 0.904948635625000}, + {0.066809364125000, 0.711985215062500}, + {0.842374240812500, 0.093123040062500}, + {0.072833622000000, 0.943689044750000}, + {0.473982478062500, 0.309619342562500}, + {0.643468702500000, 0.727011596187500}, + {0.661784804062500, 0.096504548000000}, + {0.075593410125000, 0.020665263437500}, + {0.846367111937500, 0.980869489750000}, + {0.584417841312500, 0.402177886625000}, + {0.419264650000000, 0.807665176000000}, + {0.108911798812500, 0.274823750687500}, + {0.842214949125000, 0.395649388625000}, + {0.424460011250000, 0.541515061937500}, + {0.914875915625000, 0.525088448500000}, + {0.276815978250000, 0.138406141250000}, + {0.682946765937500, 0.941192325375000}, + {0.243922631125000, 0.674414353500000}, + {0.983747165312500, 0.225123234375000}, + {0.209039534812500, 0.919381743937500}, + {0.317979460312500, 0.396931190375000}, + {0.595789254625000, 0.645833852812500}, + {0.589063133312500, 0.229178170312500}, + {0.201732996437500, 0.034567680750000}, + {0.911951413000000, 0.959874565062500}, + {0.669465614812500, 0.307270670687500}, + {0.442773254937500, 0.918452206312500}, + {0.228659227750000, 0.498372525687500}, + {0.864786425062500, 0.258916160937500}, + {0.366015783250000, 0.682294542562500}, + {0.832368054687500, 0.749523853312500}, + {0.475148582250000, 0.180782790250000}, + {0.543804934062500, 0.806559289687500}, + {0.041831345187500, 0.574164114062500}, + {0.981787063687500, 0.014769301562500}, + {0.167325380812500, 0.796656456375000}, + {0.305883847687500, 0.260168413875000}, + {0.736593646187500, 0.544303438875000}, + {0.595631980687500, 0.113338942562500}, + {0.233747165312500, 0.225123234375000}, + {0.881420496437500, 0.854880180750000}, + {0.514120916562500, 0.361726452125000}, + {0.262088408375000, 0.897305108937500}, + {0.040764352812500, 0.448613484812500}, + {0.882527922875000, 0.453355770312500}, + {0.486593646187500, 0.544303438875000}, + {0.944193581437500, 0.650074503000000}, + {0.403764392000000, 0.003513614062500}, + {0.647805652187500, 0.839498260375000}, + {0.004346402437500, 0.700568695812500}, + {0.863684364125000, 0.149485215062500}, + {0.075593410125000, 0.770665263437500}, + {0.260573230937500, 0.378374937125000}, + {0.606947383687500, 0.518907935937500}, + {0.522543332062500, 0.131538315687500}, + {0.115674527437500, 0.213752289562500}, + {0.978861550187500, 0.943531534250000}, + {0.716222608750000, 0.357993041500000}, + {0.396123640562500, 0.988911053812500}, + {0.116417439062500, 0.427849133812500}, + {0.960375986437500, 0.355143408875000}, + {0.396123640562500, 0.613911053812500}, + {0.771872079375000, 0.679610301562500}, + {0.407651999187500, 0.129979780250000}, + {0.610967390562500, 0.957538983500000}, + {0.099030910125000, 0.622227763437500}, + {0.792605235062500, 0.213450866625000}, + {0.231787063687500, 0.764769301562500}, + {0.345102996500000, 0.451097146437500}, + {0.537639116937500, 0.609792796562500}, + {0.670557598000000, 0.032628676437500}, + {0.161148929812500, 0.091845178375000}, + {0.915446201000000, 0.774126136937500}, + {0.542495165812500, 0.275647345250000}, + {0.316935423562500, 0.923529210750000}, + {0.209068937250000, 0.340693571625000}, + {0.770926812125000, 0.490443845937500}, + {0.462732614000000, 0.712224844000000}, + {0.887121046250000, 0.643149291500000}, + {0.302373640562500, 0.082661053812500}, + {0.728921568625000, 0.803308823500000}, + {0.181258709500000, 0.653526624000000}, + {0.977987853250000, 0.146218641250000}, + {0.016702341062500, 0.927996303750000}, + {0.467374240812500, 0.431013665062500}, + {0.706158315437500, 0.659556033875000}, + {0.669301523312500, 0.186625825562500}, + {0.039461819812500, 0.116237049750000}, + {0.798533046937500, 0.927896543187500}, + {0.636245165812500, 0.463147345250000}, + {0.358057598000000, 0.800206801437500}, + {0.057953992187500, 0.323742603312500}, + {0.838196808062500, 0.318240323625000}, + {0.288441098000000, 0.563378403500000}, + {0.981947383687500, 0.518907935937500}, + {0.348181288187500, 0.183212688250000}, + {0.506420496437500, 0.917380180750000}, + {0.164875915625000, 0.525088448500000}, + {0.787802165812500, 0.082912283062500}, + {0.134409779187500, 0.902448199125000}, + {0.408376747562500, 0.326245734125000}, + {0.584561740812500, 0.727888665062500}, + {0.534266910125000, 0.009900787187500}, + {0.192731703031250, 0.122610961484375}, + {0.886403666453125, 0.919165570765625}, + {0.740362459437500, 0.479082682703125}, + {0.381143881328125, 0.825372076343750}, + {0.177058600328125, 0.355660703968750}, + {0.880411986109375, 0.304388585781250}, + {0.489954645359375, 0.601971750750000}, + {0.931245437015625, 0.622161472968750}, + {0.495691442609375, 0.089534956375000}, + {0.748617226093750, 0.964032599359375}, + {0.200716102781250, 0.538594564312500}, + {0.902762098828125, 0.040629656437500}, + {0.167366403703125, 0.826817667671875}, + {0.326986691484375, 0.360298081343750}, + {0.725793624375000, 0.689620323765625}, + {0.725003755000000, 0.197653489734375}, + {0.019203528312500, 0.219887995546875}, + {0.789461819796875, 0.866237049781250}, + {0.602987853250000, 0.364968641265625}, + {0.286530910140625, 0.997227763453125}, + {0.197691088203125, 0.299653371203125}, + {0.864509651765625, 0.475386922921875}, + {0.409509265921875, 0.695098575390625}, + {0.924966150343750, 0.731175805390625}, + {0.366549151562500, 0.016182260046875}, + {0.620446765921875, 0.753692325390625}, + {0.004126035234375, 0.512365679515625}, + {0.759204111453125, 0.126627783906250}, + {0.039461819796875, 0.866237049781250}, + {0.385324830531250, 0.282537197156250}, + {0.529500805046875, 0.539659192578125}, + {0.620486845921875, 0.129726983984375}, + {0.040287232453125, 0.022961294593750}, + {0.871648411546875, 0.886075859718750}, + {0.567731703031250, 0.497610961484375}, + {0.271219649968750, 0.785940731265625}, + {0.149813081953125, 0.495059302156250}, + {0.752354406031250, 0.336633136296875}, + {0.267950605953125, 0.630717656421875}, + {0.763064970921875, 0.526211358984375}, + {0.350302165859375, 0.082912283093750}, + {0.541375661546875, 0.941916865828125}, + {0.067888875390625, 0.648631653203125}, + {0.817282235640625, 0.240645457843750}, + {0.176686781125000, 0.965995978171875}, + {0.414407018781250, 0.458335411421875}, + {0.635381359671875, 0.622395865796875}, + {0.696580598671875, 0.010305563015625}, + {0.146140435203125, 0.181972166265625}, + {0.853197227578125, 0.768215064734375}, + {0.631158461921875, 0.330667103468750}, + {0.443098689046875, 0.770526325000000}, + {0.008860189078125, 0.404241883828125}, + {0.920499240812500, 0.436873040078125}, + {0.274931749687500, 0.517395920968750}, + {0.872488661328125, 0.681742937687500}, + {0.273658139312500, 0.240026975812500}, + {0.686178846875000, 0.902720720890625}, + {0.022328994562500, 0.659601535312500}, + {0.889064677375000, 0.139944156000000}, + {0.041831345203125, 0.949164114093750}, + {0.443262299140625, 0.313206182765625}, + {0.553107937421875, 0.637161480234375}, + {0.576361266343750, 0.010049207671875}, + {0.024757727531250, 0.155556940859375}, + {0.954885609765625, 0.864774783453125}, + {0.576988498046875, 0.268435650828125}, + {0.378272153750000, 0.889096529468750}, + {0.243922631171875, 0.424414353515625}, + {0.993449504812500, 0.487462829328125}, + {0.315047772046875, 0.590538342781250}, + {0.757436479140625, 0.715431613031250}, + {0.454737456671875, 0.222940860375000}, + {0.506538910328125, 0.822860258968750}, + {0.223798019234375, 0.699851317375000}, + {0.839514399500000, 0.012551700359375}, + {0.013378945812500, 0.811198635640625}, + {0.259404890625000, 0.333637616328125}, + {0.674460011265625, 0.635265061968750}, + {0.552179912484375, 0.113477370312500}, + {0.133506990359375, 0.242482936484375}, + {0.792605235078125, 0.963450866640625}, + {0.556245437015625, 0.434661472968750}, + {0.302640369765625, 0.866357903265625}, + {0.104025812484375, 0.322831715312500}, + {0.788292439093750, 0.420036633796875}, + {0.383947288453125, 0.645595154640625}, + {0.987679025703125, 0.720586141078125}, + {0.310166960328125, 0.049274940406250}, + {0.692634651765625, 0.826949422921875}, + {0.066739180750000, 0.551367061812500}, + {0.954885609765625, 0.114774783453125}, + {0.106252533187500, 0.916712681484375}, + {0.490362459437500, 0.479082682703125}, + {0.646028602781250, 0.509297689312500}, + {0.696508847734375, 0.182043413890625}, + {0.167639399500000, 0.008157169109375}, + {0.942731703031250, 0.935110961484375}, + {0.682010510203125, 0.383364961312500}, + {0.444750986453125, 0.993815283906250}, + {0.012293182515625, 0.265019109265625}, + {0.943520700468750, 0.285664643703125}, + {0.256436740812500, 0.727888665078125}, + {0.792605235078125, 0.588450866640625}, + {0.323828669343750, 0.228345414000000}, + {0.589727949703125, 0.818705937671875}, + {0.146647944500000, 0.717554343109375}, + {0.763378945812500, 0.061198635640625}, + {0.245217761562500, 0.944967010375000}, + {0.302009651765625, 0.475386922921875}, + {0.508157018781250, 0.708335411421875}, + {0.552058600328125, 0.230660703968750}, + {0.076470961921875, 0.065042103468750}, + {0.839060384390625, 0.826948487828125}, + {0.743383847734375, 0.260168413890625}, + {0.361729406031250, 0.961633136296875}, + {0.130411986109375, 0.304388585781250}, + {0.913057411375000, 0.372578939312500}, + {0.450791960328125, 0.635700721656250}, + {0.994715387546875, 0.610882883562500}, + {0.396123640625000, 0.238911053828125}, + {0.635171568625000, 0.803308823531250}, + {0.134436274500000, 0.588235294109375}, + {0.893091363734375, 0.085389815609375}, + {0.204885609765625, 0.864774783453125}, + {0.419763913046875, 0.397374565093750}, + {0.589063133281250, 0.604178170375000}, + {0.692634651765625, 0.076949422921875}, + {0.192731703031250, 0.185110961484375}, + {0.951814247656250, 0.756306315203125}, + {0.506689186515625, 0.439765218421875}, + {0.456461826015625, 0.821001137000000}, + {0.083707248625000, 0.461775383828125}, + {0.764249240812500, 0.280623040078125}, + {0.323579912671875, 0.557221730578125}, + {0.818079645359375, 0.508221750750000}, + {0.435674328421875, 0.095052115796875}, + {0.725148582281250, 0.930782790234375}, + {0.211591777984375, 0.620217372437500}, + {0.901467761562500, 0.194967010375000}, + {0.114509651765625, 0.873824422921875}, + {0.350302165859375, 0.270412283093750}, + {0.713196808109375, 0.568240323640625}, + {0.631158461921875, 0.205667103468750}, + {0.121648411546875, 0.136075859718750}, + {0.807256990687500, 0.775225260828125}, + {0.748441255000000, 0.385153489734375}, + {0.322881990687500, 0.822100260828125}, + {0.170499240812500, 0.436873040078125}, + {0.976735045687500, 0.379605464812500}, + {0.326988498046875, 0.705935650828125}, + {0.849030910140625, 0.622227763453125}, + {0.456628942609375, 0.025081831375000}, + {0.603718978906250, 0.881272112125000}, + {0.087671365468750, 0.733711546609375}, + {0.858316099875000, 0.063684800093750}, + {0.079233855890625, 0.980882302687500}, + {0.498831558375000, 0.275019330593750}, + {0.683006746078125, 0.696560873750000}, + {0.634421685203125, 0.070155760015625}, + {0.100941098000000, 0.000878403515625}, + {0.868983666328125, 0.946905808593750}, + {0.622823333015625, 0.407884578921875}, + {0.380701413046875, 0.772374565093750}, + {0.070966777984375, 0.276467372437500}, + {0.869730392156250, 0.411764705875000}, + {0.390973727828125, 0.533716984406250}, + {0.934919978109375, 0.561328326953125}, + {0.267725152796875, 0.170350753109375}, + {0.650556467859375, 0.939171186375000}, + {0.208092013671875, 0.656130963328125}, + {0.939854406031250, 0.211633136296875}, + {0.227987853250000, 0.896218641265625}, + {0.362037827187500, 0.411778179156250}, + {0.575564970921875, 0.682461358984375}, + {0.603861550250000, 0.193531534234375}, + {0.245936791328125, 0.056280808593750}, + {0.931245437015625, 0.997161472968750}, + {0.674341363734375, 0.272889815609375}, + {0.475148582281250, 0.930782790234375}, + {0.196690920375000, 0.490305748390625}, + {0.823073230953125, 0.290484312156250}, + {0.349801672015625, 0.643614082703125}, + {0.816809364156250, 0.711985215093750}, + {0.442773254953125, 0.168452206343750}, + {0.559175124515625, 0.768645847968750}, + {0.012608312281250, 0.564660480046875}, + {0.951732996484375, 0.034567680765625}, + {0.130635833015625, 0.782884578921875}, + {0.295859197828125, 0.295320202140625}, + {0.712431749687500, 0.517395920968750}, + {0.572626461875000, 0.068089897125000}, + {0.211591777984375, 0.245217372437500}, + {0.901756746078125, 0.821560873750000}, + {0.512364601359375, 0.315328221531250}, + {0.275838593406250, 0.932598880093750}, + {0.007956102718750, 0.451497525703125}, + {0.924966150343750, 0.481175805390625}, + {0.454495282953125, 0.559257955593750}, + {0.978187903312500, 0.673257136171875}, + {0.416103249937500, 0.041326861265625}, + {0.664447403859375, 0.864416693968750}, + {0.033521988046875, 0.696631179015625}, + {0.852837228421875, 0.184355089812500}, + {0.090934062750000, 0.810372893375000}, + {0.275746963312500, 0.411554660312500}, + {0.588037245453125, 0.558795337875000}, + {0.554922654015625, 0.160357524531250}, + {0.072833622000000, 0.193689044750000}, + {0.964063133281250, 0.979178170375000}, + {0.708517234312500, 0.319548392906250}, + {0.432256990687500, 0.962725260828125}, + {0.068079645359375, 0.414471750750000}, + {0.963190877593750, 0.324420555781250}, + {0.411502470921875, 0.573086358984375}, + {0.800162124781250, 0.669611615750000}, + {0.387554934109375, 0.150309289718750}, + {0.579945004250000, 0.965966294140625}, + {0.065522102093750, 0.599326277234375}, + {0.761255117500000, 0.204583567718750}, + {0.196950541453125, 0.770728070765625}, + {0.344324410937500, 0.493490102843750}, + {0.510111550250000, 0.568531534234375}, + {0.636389399500000, 0.008157169109375}, + {0.128530229140625, 0.090431613031250}, + {0.883566727531250, 0.752475196796875}, + {0.552206093281250, 0.309003302484375}, + {0.348181288187500, 0.933212688265625}, + {0.227987853250000, 0.364968641265625}, + {0.771924527437500, 0.448127289609375}, + {0.489679912484375, 0.745313307812500}, + {0.927148254953125, 0.684077206343750}, + {0.264066255000000, 0.103903489734375}, + {0.740057568187500, 0.764054456484375}, + {0.148947313656250, 0.630208463203125}, + {0.974161986109375, 0.179388585781250}, + {0.010457836296875, 0.893541028515625}, + {0.498441255000000, 0.385153489734375}, + {0.744468904421875, 0.637380397421875}, + {0.678301531546875, 0.136423458078125}, + {0.010191088203125, 0.112153371203125}, + {0.774757727531250, 0.905556940859375}, + {0.674229406031250, 0.446008136296875}, + {0.319922419343750, 0.784986039000000}, + {0.011042923812500, 0.349437248625000}, + {0.821379264859375, 0.354136628500000}, + {0.257237357453125, 0.579287821171875}, + {0.948151308765625, 0.522112716656250}, + {0.318520700468750, 0.160664643703125}, + {0.543804934109375, 0.900309289718750}, + {0.130607996843750, 0.519919121093750}, + {0.811627065421875, 0.071665408953125}, + {0.160867175625000, 0.931752899046875}, + {0.428297719390625, 0.362355138953125}, + {0.609505036140625, 0.690144858781250}, + {0.504804041609375, 0.003228892687500}, + {0.216196606265625, 0.064729040234375}, + {0.901736845921875, 0.879726983984375}, + {0.708719649968750, 0.453909481265625}, + {0.415337952218750, 0.849024716109375}, + {0.134853249937500, 0.353826861265625}, + {0.903787097500000, 0.267080047484375}, + {0.479025812484375, 0.572831715312500}, + {0.876605124109375, 0.604345045187500}, + {0.456461826015625, 0.071001137000000}, + {0.709494562484375, 0.955644215312500}, + {0.231947383703125, 0.518907935984375}, + {0.932230392156250, 0.013327205875000}, + {0.145086204046875, 0.865883539265625}, + {0.350930456281250, 0.348899376265625}, + {0.725034838203125, 0.739106496203125}, + {0.739954645359375, 0.226971750750000}, + {0.042605235078125, 0.213450866640625}, + {0.811627065421875, 0.821665408953125}, + {0.587735274500000, 0.312719600875000}, + {0.307230392156250, 0.950827205875000}, + {0.217487057734375, 0.273792953031250}, + {0.854401308765625, 0.440081466656250}, + {0.384747995484375, 0.706561433531250}, + {0.899206688062500, 0.699456330843750}, + {0.334068937265625, 0.028193571656250}, + {0.576864665859375, 0.801662283093750}, + {0.041360180171875, 0.539240991515625}, + {0.780622165328125, 0.170435734406250}, + {0.025074889437500, 0.841885738250000}, + {0.412686740812500, 0.274763665078125}, + {0.551686781125000, 0.512870978171875}, + {0.574633261734375, 0.138224135796875}, + {0.058436791328125, 0.056280808593750}, + {0.822110274500000, 0.890844600875000}, + {0.618449504812500, 0.487462829328125}, + {0.264066255000000, 0.807028489734375}, + {0.132527922859375, 0.453355770328125}, + {0.807953992203125, 0.323742603312500}, + {0.302148254953125, 0.684077206343750}, + {0.794171695281250, 0.522748994546875}, + {0.372686140625000, 0.110004803828125}, + {0.507741981953125, 0.951245734125000}, + {0.107097304093750, 0.651192048015625}, + {0.822833622000000, 0.193689044750000}, + {0.181245437015625, 0.997161472968750}, + {0.384747995484375, 0.456561433531250}, + {0.662226998781250, 0.569583683906250}, + {0.727197083078125, 0.021632137984375}, + {0.184988661328125, 0.150492937687500}, + {0.873243045828125, 0.810942332640625}, + {0.684963615078125, 0.357045297593750}, + {0.461525611203125, 0.759528012437500}, + {0.025718904421875, 0.410817897421875}, + {0.897009311359375, 0.420948834468750}, + {0.263037063734375, 0.546019301578125}, + {0.857097304093750, 0.651192048015625}, + {0.252866199843750, 0.205957512640625}, + {0.665602115484375, 0.895166775859375}, + {0.056996963312500, 0.684992160312500}, + {0.918804934109375, 0.150309289718750}, + {0.019203528312500, 0.969887995546875}, + {0.485853633703125, 0.339220435984375}, + {0.509352115484375, 0.676416775859375}, + {0.564952190359375, 0.039350341546875}, + {0.061178846875000, 0.152720720890625}, + {0.979027962734375, 0.850901043359375}, + {0.618986693593750, 0.251067502968750}, + {0.416788412578125, 0.890801763843750}, + {0.191935423609375, 0.392279210781250}, + {0.959424527437500, 0.448127289609375}, + {0.360967390625000, 0.582538983515625}, + {0.802390435203125, 0.744472166265625}, + {0.498617226093750, 0.214032599359375}, + {0.542495165859375, 0.838147345281250}, + {0.211457644609375, 0.742513028953125}, + {0.837488317609375, 0.030941206375000}, + {0.038430456281250, 0.786399376265625}, + {0.290324504812500, 0.370275329328125}, + {0.678301531546875, 0.667673458078125}, + {0.510613942796875, 0.106955928328125}, + {0.170736691484375, 0.235298081343750}, + {0.759083993796875, 0.997656627843750}, + {0.539572313656250, 0.380208463203125}, + {0.255388875390625, 0.867381653203125}, + {0.071379264859375, 0.354136628500000}, + {0.758860189078125, 0.404241883828125}, + {0.420321786390625, 0.636298924375000}, + {0.978659227718750, 0.748372525703125}, + {0.287503755000000, 0.010153489734375}, + {0.739679912484375, 0.870313307812500}, + {0.089315978250000, 0.513406141265625}, + {0.943869562484375, 0.080155934062500}, + {0.076564677375000, 0.913381656000000}, + {0.444542439093750, 0.459099133796875}, + {0.633162999796875, 0.540307445062500}, + {0.709818581500000, 0.157887002984375}, + {0.167325380828125, 0.046656456390625}, + {0.977987853250000, 0.896218641265625}, + {0.652229645515625, 0.427815757218750}, + {0.492972183375000, 0.993769330593750}, + {0.040505847500000, 0.272359588500000}, + {0.962978249937500, 0.260076861265625}, + {0.302640369765625, 0.741357903265625}, + {0.768208405500000, 0.610922261187500}, + {0.318273390046875, 0.193320190000000}, + {0.619905641343750, 0.853941035859375}, + {0.181258709546875, 0.747276624031250}, + {0.757229657953125, 0.013359518093750}, + {0.244715387546875, 0.985882883562500}, + {0.259008847734375, 0.494543413890625}, + {0.554055652187500, 0.714498260375000}, + {0.534027961468750, 0.219931745984375}, + {0.069780017046875, 0.113865176609375}, + {0.848982478109375, 0.872119342578125}, + {0.713196808109375, 0.271365323640625}, + {0.363736251062500, 0.991491573531250}, + {0.150433761421875, 0.282401366203125}, + {0.901208663437500, 0.331649132359375}, + {0.463675308375000, 0.681269330593750}, + {0.949633261734375, 0.606974135796875}, + {0.384851754687500, 0.208333852843750}, + {0.668614601359375, 0.752828221531250}, + {0.181245437015625, 0.622161472968750}, + {0.895086204046875, 0.115883539265625}, + {0.193869562484375, 0.830155934062500}, + {0.387335340921875, 0.396347453890625}, + {0.564854406031250, 0.586633136296875}, + {0.745691442609375, 0.089534956375000}, + {0.245486845921875, 0.129726983984375}, + {0.982811359250000, 0.811790368250000}, + {0.536503468843750, 0.474657913296875}, + {0.489679912484375, 0.870313307812500}, + {0.067888875390625, 0.492381653203125}, + {0.751398662484375, 0.307813307812500}, + {0.352987853250000, 0.521218641265625}, + {0.869468904421875, 0.543630397421875}, + {0.400000938734375, 0.102147747421875}, + {0.716287097500000, 0.892080047484375}, + {0.204945004250000, 0.590966294140625}, + {0.883506990359375, 0.242482936484375}, + {0.065143307734375, 0.844593734281250}, + {0.327001686515625, 0.299140218421875}, + {0.748446786390625, 0.573798924375000}, + {0.665686274500000, 0.213235294109375}, + {0.115683153500000, 0.178056211000000}, + {0.757883424281250, 0.796209072156250}, + {0.713222390562500, 0.397222857359375}, + {0.362679025703125, 0.845586141078125}, + {0.147009311359375, 0.420948834468750}, + {0.954472218843750, 0.404345413296875}, + {0.363836204046875, 0.740883539265625}, + {0.821904890625000, 0.583637616328125}, + {0.492832801906250, 0.029377324812500}, + {0.599161986109375, 0.929388585781250}, + {0.096331207625000, 0.695634299281250}, + {0.848982478109375, 0.122119342578125}, + {0.099003468843750, 0.966845413296875}, + {0.462431749687500, 0.267395920968750}, + {0.677354460328125, 0.725544471656250}, + {0.650008709546875, 0.122276624031250}, + {0.123243045828125, 0.060942332640625}, + {0.822833622000000, 0.943689044750000}, + {0.586591777984375, 0.432717372437500}, + {0.408903485687500, 0.776738982078125}, + {0.080479406031250, 0.305383136296875}, + {0.818079645359375, 0.414471750750000}, + {0.431931985968750, 0.513270723078125}, + {0.903169756421875, 0.555146535265625}, + {0.285517059468750, 0.166546514046875}, + {0.675219736453125, 0.976237158906250}, + {0.238942076937500, 0.643155269500000}, + {0.964063133281250, 0.229178170375000}, + {0.192731703031250, 0.935110961484375}, + {0.348181288187500, 0.401962688265625}, + {0.618003220203125, 0.658636770343750}, + {0.588190877593750, 0.199420555781250}, + {0.216958200468750, 0.007344331203125}, + {0.883506990359375, 0.992482936484375}, + {0.636042923812500, 0.294749748625000}, + {0.481304934109375, 0.900309289718750}, + {0.237679025703125, 0.470586141078125}, + {0.831750421625000, 0.262285054609375}, + {0.341264392046875, 0.675388614109375}, + {0.864509651765625, 0.725386922921875}, + {0.481304934109375, 0.150309289718750}, + {0.507180456281250, 0.786399376265625}, + {0.033602444796875, 0.600612049781250}, + {0.962250867203125, 0.054211353312500}, + {0.151703992203125, 0.761242603312500}, + {0.261464799453125, 0.261330050531250}, + {0.740667841375000, 0.511552886640625}, + {0.615093996609375, 0.088785852218750}, + {0.228861550250000, 0.193531534234375}, + {0.920420940359375, 0.853803466546875}, + {0.541111850968750, 0.345010423484375}, + {0.305472183375000, 0.900019330593750}, + {0.044319650703125, 0.478886922328125}, + {0.892725152796875, 0.482850753109375}, + {0.490667841375000, 0.511552886640625}, + {0.971780261562500, 0.629537322875000}, + {0.387554934109375, 0.056559289718750}, + {0.662939186515625, 0.814765218421875}, + {0.052390435203125, 0.744472166265625}, + {0.826564677375000, 0.163381656000000}, + {0.103197227578125, 0.768215064734375}, + {0.306265020015625, 0.415613958984375}, + {0.575564970921875, 0.526211358984375}, + {0.505411986109375, 0.183294835781250}, + {0.079233855890625, 0.230882302687500}, + {0.939854406031250, 0.961633136296875}, + {0.747488661328125, 0.369242937687500}, + {0.399926992500000, 0.954583567718750}, + {0.122446606265625, 0.392854040234375}, + {0.987719736453125, 0.351237158906250}, + {0.425219736453125, 0.601237158906250}, + {0.774082801906250, 0.642658574812500}, + {0.416299322109375, 0.161398788656250}, + {0.602987853250000, 0.989968641265625}, + {0.089514399500000, 0.575051700359375}, + {0.786335975187500, 0.240626511406250}, + {0.231815968843750, 0.794970413296875}, + {0.318750014656250, 0.443002308546875}, + {0.505566445812500, 0.623698635640625}, + {0.637067640531250, 0.039603148984375}, + {0.180883847734375, 0.072668413890625}, + {0.917325380828125, 0.796656456390625}, + {0.526756746078125, 0.259060873750000}, + {0.334878599875000, 0.893762925093750}, + {0.225425998046875, 0.315310650828125}, + {0.794319650703125, 0.478886922328125}, + {0.445957801906250, 0.724689824812500}, + {0.915440720703125, 0.656963966125000}, + {0.302640369765625, 0.116357903265625}, + {0.698976641187500, 0.774455388406250}, + {0.177148254953125, 0.684077206343750}, + {0.949633261734375, 0.138224135796875}, + {0.045314677375000, 0.913381656000000}, + {0.458038607125000, 0.401547691687500}, + {0.713980484156250, 0.625879926296875}, + {0.643255797593750, 0.155068738921875}, + {0.025074889437500, 0.091885738250000}, + {0.766702341031250, 0.927996303765625}, + {0.664447403859375, 0.489416693968750}, + {0.352787232453125, 0.772961294593750}, + {0.025478249937500, 0.320623736265625}, + {0.854074830531250, 0.352849697156250}, + {0.286530910140625, 0.622227763453125}, + {0.977727943875000, 0.553082892328125}, + {0.350685461906250, 0.153596186453125}, + {0.557230392156250, 0.880514705875000}, + {0.153169756421875, 0.555146535265625}, + {0.789461819796875, 0.116237049781250}, + {0.168804934109375, 0.900309289718750}, + {0.377355421296875, 0.330038940000000}, + {0.612679025703125, 0.720586141078125}, + {0.508506990359375, 0.054982936484375}, + {0.196917624109375, 0.096532545187500}, + {0.910867175625000, 0.931752899046875}, + {0.728171685203125, 0.443690916265625}, + {0.435674328421875, 0.845052115796875}, + {0.182230392156250, 0.325827205875000}, + {0.884087952218750, 0.286524716109375}, + {0.448834987281250, 0.579381097156250}, + {0.884436274500000, 0.588235294109375}, + {0.439752211921875, 0.123635853468750}, + {0.688559795171875, 0.981591765453125}, + {0.198151308765625, 0.522112716656250}, + {0.901703992203125, 0.011242603312500}, + {0.128530229140625, 0.840431613031250}, + {0.317826892046875, 0.321872989109375}, + {0.696508847734375, 0.744543413890625}, + {0.688847218843750, 0.216845413296875}, + {0.004724588125000, 0.188791578953125}, + {0.787802165859375, 0.832912283093750}, + {0.567052101359375, 0.371480565281250}, + {0.302148254953125, 0.996577206343750}, + {0.243986693593750, 0.251067502968750}, + {0.825795788375000, 0.474201552750000}, + {0.430456688062500, 0.699456330843750}, + {0.931258709546875, 0.747276624031250}, + {0.325564970921875, 0.057461358984375}, + {0.575724486109375, 0.777044835781250}, + {0.058436791328125, 0.525030808593750}, + {0.759808761421875, 0.157401366203125}, + {0.010191088203125, 0.862153371203125}, + {0.411476654468750, 0.296344298265625}, + {0.505607996843750, 0.519919121093750}, + {0.563898662484375, 0.182813307812500}, + {0.007229657953125, 0.013359518093750}, + {0.826564677375000, 0.913381656000000}, + {0.615744562484375, 0.440019215312500}, + {0.273049196593750, 0.752824731078125}, + {0.126134788453125, 0.489345154640625}, + {0.776528750687500, 0.346344691359375}, + {0.271093019843750, 0.673808825390625}, + {0.766504140937500, 0.549462718109375}, + {0.321690920375000, 0.115305748390625}, + {0.552058600328125, 0.980660703968750}, + {0.086158139312500, 0.677526975812500}, + {0.868983666328125, 0.196905808593750}, + {0.133506990359375, 0.992482936484375}, + {0.428389216390625, 0.479768192062500}, + {0.642183234343750, 0.594837245046875}, + {0.693414534828125, 0.060006743953125}, + {0.177263875390625, 0.179881653203125}, + {0.822881453125000, 0.799457928828125}, + {0.657602996515625, 0.342698708937500}, + {0.447833622000000, 0.795251544750000}, + {0.049908315421875, 0.432993533953125}, + {0.884744019140625, 0.384203634359375}, + {0.303495121031250, 0.531469981562500}, + {0.829156508796875, 0.635903919875000}, + {0.279003945812500, 0.225261135640625}, + {0.626154293906250, 0.912625724750000}, + {0.009546365468750, 0.639961546609375}, + {0.886403666453125, 0.169165570765625}, + {0.052259883703125, 0.979845435984375}, + {0.459494562484375, 0.330644215312500}, + {0.524813081953125, 0.651309302156250}, + {0.606787063734375, 0.014769301578125}, + {0.010457836296875, 0.143541028515625}, + {0.947626461875000, 0.818089897125000}, + {0.571917624109375, 0.284032545187500}, + {0.416299322109375, 0.911398788656250}, + {0.236033046906250, 0.396646543203125}, + {0.990797719390625, 0.456105138953125}, + {0.333626471656250, 0.616508772390625}, + {0.787209695250000, 0.716482502812500}, + {0.439756778562500, 0.194376370593750}, + {0.552179912484375, 0.863477370312500}, + {0.243449504812500, 0.737462829328125}, + {0.822881453125000, 0.049457928828125}, + {0.058436791328125, 0.806280808593750}, + {0.267237456671875, 0.347940860375000}, + {0.659100916609375, 0.628228892687500}, + {0.525109796625000, 0.082597554609375}, + {0.135951233515625, 0.201639822421875}, + {0.761255117500000, 0.954583567718750}, + {0.556490320328125, 0.399823343937500}, + {0.258466777984375, 0.838967372437500}, + {0.104074830531250, 0.352849697156250}, + {0.803389216390625, 0.386018192062500}, + {0.392068237890625, 0.666186056234375}, + {0.984505036140625, 0.690144858781250}, + {0.277932351500000, 0.052908284062500}, + {0.713196808109375, 0.833865323640625}, + {0.102915496515625, 0.537034646437500}, + {0.979027962734375, 0.100901043359375}, + {0.106115002812500, 0.885378765484375}, + {0.473302101359375, 0.455953221531250}, + {0.685199429843750, 0.551526284734375}, + {0.706158315421875, 0.128306033953125}, + {0.147265783328125, 0.057294542578125}, + {0.959039534828125, 0.919381743953125}, + {0.646013875390625, 0.398631653203125}, + {0.458517234312500, 0.944548392906250}, + {0.056758487734375, 0.295253452109375}, + {0.988836204046875, 0.303383539265625}, + {0.254309364156250, 0.711985215093750}, + {0.775799306890625, 0.567053766171875}, + {0.340921091031250, 0.224871303765625}, + {0.620506746078125, 0.821560873750000}, + {0.160012464359375, 0.690720392781250}, + {0.788430456281250, 0.036399376265625}, + {0.220355124109375, 0.963720045187500}, + {0.287648582281250, 0.493282790234375}, + {0.536503468843750, 0.724657913296875}, + {0.552148254953125, 0.246577206343750}, + {0.116843560203125, 0.107753416265625}, + {0.842374240812500, 0.843123040078125}, + {0.696912145562500, 0.310820697562500}, + {0.330881907437500, 0.958779769828125}, + {0.169338615078125, 0.306386117906250}, + {0.927058600328125, 0.355660703968750}, + {0.477915496515625, 0.630784646437500}, + {0.947107614062500, 0.571599844062500}, + {0.411298019234375, 0.231101317375000}, + {0.634196201015625, 0.774126137000000}, + {0.133506990359375, 0.617482936484375}, + {0.911148929828125, 0.091845178421875}, + {0.229027962734375, 0.850901043359375}, + {0.433436791328125, 0.431280808593750}, + {0.570900611203125, 0.618903012437500}, + {0.691994851500000, 0.099783284062500}, + {0.212500058671875, 0.147009234203125}, + {0.951732996484375, 0.784567680765625}, + {0.538479240078125, 0.442006235093750}, + {0.495691442609375, 0.839534956375000}, + {0.092295940359375, 0.445600341546875}, + {0.806758487734375, 0.295253452109375}, + {0.374367956281250, 0.505149376265625}, + {0.822584307093750, 0.538276272453125}, + {0.381143881328125, 0.075372076343750}, + {0.746648411546875, 0.886075859718750}, + {0.225690524703125, 0.572657414093750}, + {0.908135803781250, 0.224055233687500}, + {0.100557411375000, 0.856953939312500}, + {0.326749240812500, 0.257185540078125}, + {0.703382877203125, 0.594522854968750}, + {0.635381359671875, 0.247395865796875}, + {0.091503945812500, 0.170573635640625}, + {0.773093560203125, 0.773280760015625}, + {0.699864601359375, 0.424703221531250}, + {0.337978249937500, 0.822576861265625}, + {0.177278602781250, 0.415547689312500}, + {0.950716102781250, 0.382344564312500}, + {0.345102996515625, 0.701097146437500}, + {0.850941098000000, 0.563378403515625}, + {0.447833622000000, 0.045251544750000}, + {0.584417841375000, 0.933427886640625}, + {0.117581880000000, 0.710837083484375}, + {0.864601654468750, 0.093219298265625}, + {0.115674527437500, 0.963752289609375}, + {0.438901999203125, 0.285253217765625}, + {0.641702341031250, 0.693621303765625}, + {0.662939186515625, 0.064765218421875}, + {0.102763510390625, 0.047682223171875}, + {0.821904890625000, 0.958637616328125}, + {0.617697313656250, 0.380208463203125}, + {0.387554934109375, 0.806559289718750}, + {0.096212938656250, 0.263020963203125}, + {0.867422654015625, 0.379107524531250}, + {0.383386910937500, 0.555990102843750}, + {0.880607996843750, 0.519919121093750}, + {0.252252211921875, 0.162698353468750}, + {0.642183234343750, 0.969837245046875}, + {0.196583993796875, 0.685156627843750}, + {0.963190877593750, 0.199420555781250}, + {0.199633261734375, 0.888224135796875}, + {0.350685461906250, 0.434846186453125}, + {0.614399749906250, 0.627404791062500}, + {0.608747165328125, 0.225123234406250}, + {0.244958663437500, 0.019149132359375}, + {0.880021551015625, 0.966470884812500}, + {0.686627065421875, 0.259165408953125}, + {0.463246963312500, 0.880304660312500}, + {0.209424527437500, 0.448127289609375}, + {0.850148582281250, 0.305782790234375}, + {0.321802423796875, 0.647870625703125}, + {0.853921568625000, 0.709558823531250}, + {0.496648411546875, 0.136075859718750}, + {0.534266910125000, 0.759900787234375}, + {0.018208405500000, 0.610922261187500}, + {0.981815968843750, 0.044970413296875}, + {0.147265783328125, 0.807294542578125}, + {0.278764594718750, 0.293912076453125}, + {0.697168203437500, 0.508447093109375}, + {0.569936479140625, 0.090431613031250}, + {0.199633261734375, 0.231974135796875}, + {0.878530229140625, 0.840431613031250}, + {0.530111691484375, 0.313423081343750}, + {0.287503755000000, 0.877340989734375}, + {0.020926812156250, 0.490443845953125}, + {0.925118529859375, 0.463551966546875}, + {0.460281853234375, 0.514086682671875}, + {0.946583993796875, 0.685156627843750}, + {0.432953992203125, 0.011242603312500}, + {0.681996963312500, 0.841242160312500}, + {0.035440756328125, 0.741967535328125}, + {0.826814113265625, 0.129551982203125}, + {0.102763510390625, 0.797682223171875}, + {0.257250986453125, 0.431315283906250}, + {0.601735045687500, 0.535855464812500}, + {0.523947313656250, 0.161458463203125}, + {0.089514399500000, 0.200051700359375}, + {0.998871711468750, 0.969931745984375}, + {0.725557411375000, 0.325703939312500}, + {0.411054041609375, 0.940728892687500}, + {0.092214949156250, 0.395649388656250}, + {0.947571808109375, 0.318240323640625}, + {0.385150528859375, 0.598791693968750}, + {0.760340045031250, 0.666060247875000}, + {0.385468447859375, 0.173477959078125}, + {0.619715387546875, 0.985882883562500}, + {0.110693313734375, 0.604613051578125}, + {0.759083993796875, 0.247656627843750}, + {0.198151308765625, 0.803362716656250}, + {0.368295322046875, 0.475490672062500}, + {0.522431987421875, 0.579676484406250}, + {0.668614601359375, 0.002828221531250}, + {0.159061291453125, 0.115786836312500}, + {0.885343915375000, 0.797979216453125}, + {0.502049350968750, 0.290322923484375}, + {0.361960011265625, 0.916515061968750}, + {0.212257727531250, 0.374306940859375}, + {0.808006746078125, 0.446560873750000}, + {0.489786425109375, 0.696416160921875}, + {0.914775229140625, 0.625807223171875}, + {0.272394756234375, 0.082105300000000}, + {0.697833622000000, 0.795251544750000}, + {0.146614411140625, 0.654378257218750}, + {0.959039534828125, 0.169381743953125}, + {0.056931985968750, 0.888270723078125}, + {0.493765020015625, 0.415613958984375}, + {0.727875617718750, 0.674521589734375}, + {0.646232996843750, 0.172262871093750}, + {0.052267234312500, 0.085173392906250}, + {0.794171695281250, 0.897748994546875}, + {0.664407018781250, 0.458335411421875}, + {0.318273390046875, 0.755820190000000}, + {0.046247165328125, 0.350123234406250}, + {0.865799202015625, 0.333466330906250}, + {0.285498339406250, 0.583855022421875}, + {0.956147102093750, 0.505576277234375}, + {0.318949826718750, 0.141763441546875}, + {0.510429611953125, 0.887119489765625}, + {0.184919978109375, 0.561328326953125}, + {0.759417624109375, 0.096532545187500}, + {0.130403602781250, 0.937032064312500}, + {0.394595719296875, 0.367380713734375}, + {0.567731703031250, 0.747610961484375}, + {0.538716806515625, 0.039836274843750} + }; +}; + +} \ No newline at end of file diff --git a/40_PathTracer/include/renderer/present/CWindowPresenter.h b/40_PathTracer/include/renderer/present/CWindowPresenter.h new file mode 100644 index 000000000..08715445f --- /dev/null +++ b/40_PathTracer/include/renderer/present/CWindowPresenter.h @@ -0,0 +1,94 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_WINDOW_PRESENTER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ + + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include "renderer/CRenderer.h" +#include "renderer/present/IPresenter.h" + +#include "renderer/shaders/present/push_constants.hlsl" + + +namespace nbl::this_example +{ + +class CWindowPresenter : public IPresenter +{ + public: + using swapchain_resources_t = video::CDefaultSwapchainFramebuffers; + static const video::IGPURenderpass::SCreationParams::SSubpassDependency Dependencies[3]; + + struct SCachedCreationParams + { + core::smart_refctd_ptr winMgr = nullptr; + // for the UI, 1080p with 50% scaling + hlsl::uint16_t2 minResolution = {1248,688}; + }; + struct SCreationParams : IPresenter::SCachedCreationParams, SCachedCreationParams + { + inline operator bool() const {return assMan && winMgr && api && callback;} + + core::smart_refctd_ptr api = {}; + core::smart_refctd_ptr callback = {}; + std::string_view initialWindowCaption = ""; + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); + + // + inline const video::ISurface* getSurface() const {return m_construction.surface->getSurface();} + + // + inline const SCachedCreationParams& getCreationParams() const {return m_creation;} + + // + inline ui::ICursorControl* getCursorControl() const {return m_construction.cursorControl;} + + // + inline const video::IGPURenderpass* getRenderpass() const {return getSwapchainResources()->getRenderpass();} + + // + bool irrecoverable() const {return m_construction.surface->irrecoverable() || !m_construction.surface->isWindowOpen();} + + protected: + using surface_t = video::CSimpleResizeSurface; + struct SCachedConstructionParams + { + core::smart_refctd_ptr surface; + ui::IWindow* window; + ui::ICursorControl* cursorControl; + hlsl::float64_t2 aspectRatioRange; + hlsl::uint16_t2 maxResolution; + }; + struct SConstructorParams : IPresenter::SCachedCreationParams, SCachedCreationParams, SCachedConstructionParams + { + }; + inline CWindowPresenter(SConstructorParams&& _params) : IPresenter(std::move(_params)), m_creation(std::move(_params)), m_construction(std::move(_params)), m_pushConstants({}) {} + // + bool init_impl(CRenderer* renderer) override; + + // + clock_t::time_point acquire_impl(const CSession* session, video::ISemaphore::SWaitInfo* p_currentImageAcquire) override; + bool beginRenderpass_impl() override; + inline bool present(const video::IQueue::SSubmitInfo::SSemaphoreInfo& readyToPresent) override + { + return m_construction.surface->present(m_currentImageIndex,{&readyToPresent,1}); + } + + inline video::ISurface* getSurface() {return m_construction.surface->getSurface();} + + inline swapchain_resources_t* getSwapchainResources() {return static_cast(m_construction.surface->getSwapchainResources());} + inline const swapchain_resources_t* getSwapchainResources() const {return static_cast(m_construction.surface->getSwapchainResources());} + + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; + core::smart_refctd_ptr m_present; + SDefaultResolvePushConstants m_pushConstants; + uint8_t m_currentImageIndex = ~0u; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/present/IPresenter.h b/40_PathTracer/include/renderer/present/IPresenter.h new file mode 100644 index 000000000..405e60289 --- /dev/null +++ b/40_PathTracer/include/renderer/present/IPresenter.h @@ -0,0 +1,204 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_I_PRESENTER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_I_PRESENTER_H_INCLUDED_ + + +#include "renderer/CScene.h" +#include "renderer/CSession.h" + +#include "renderer/shaders/pathtrace/push_constants.hlsl" + + +namespace nbl::this_example +{ + +class IPresenter : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + constexpr static inline uint8_t CircularBufferSize = 4; + + struct SCachedCreationParams + { + core::smart_refctd_ptr assMan = nullptr; + system::logger_opt_smart_ptr logger = nullptr; + }; + // + inline const SCachedCreationParams& getCreationParams() const {return m_creation;} + + // + inline bool init(CRenderer* renderer) + { + if (m_queue) + return isInitialized(); + + auto& logger = m_creation.logger; + auto* device = renderer->getDevice(); + m_queue = renderer->getCreationParams().graphicsQueue; + + bool success = false; + auto deinit = core::makeRAIIExiter([&]()->void{ + if (success) + return; + m_semaphore = nullptr; + std::fill(m_cmdbufs.begin(),m_cmdbufs.end(),nullptr); + }); + + using namespace nbl::system; + if (!(m_semaphore=device->createSemaphore(m_presentCount))) + { + logger.log("`IPresenter::init` failed to create a semaphore!",ILogger::ELL_ERROR); + return false; + } + + for (auto& cmdbuf : m_cmdbufs) + { + using namespace nbl::video; + auto pool=device->createCommandPool(m_queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!pool || !pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(logger.get()))) + { + logger.log("`IPresenter::init` failed to create Command Buffer!",ILogger::ELL_ERROR); + return false; + } + } + + return success = init_impl(renderer); + } + inline bool isInitialized() const {return bool(m_semaphore);} + + // + inline video::IQueue* getQueue() const {return m_queue;} + // + inline video::ILogicalDevice* getDevice() const {return const_cast(m_semaphore->getOriginDevice());} + + // + virtual bool irrecoverable() const {return false;} + + // returns expected presentation time for frame pacing + using clock_t = std::chrono::steady_clock; + inline clock_t::time_point acquire(const CSession* background) + { + auto expectedPresent = clock_t::time_point::min(); // invalid value + m_currentImageAcquire = {}; + if (!background) + { + m_currentSessionDS = nullptr; + return expectedPresent; + } + m_currentSessionDS = background->getActiveResources().immutables.ds; + return acquire_impl(background,&m_currentImageAcquire); + } + + // + inline video::IGPUCommandBuffer* beginRenderpass() + { + if (!isInitialized() || !m_currentImageAcquire.semaphore) + return nullptr; + + using namespace nbl::video; + if (m_presentCount>=CircularBufferSize) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_presentCount+1-CircularBufferSize + } + }; + if (getDevice()->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return {}; + } + + auto* const cb = getCurrentCmdBuffer(); + cb->getPool()->reset(); + if (!cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return nullptr; + + if (!beginRenderpass_impl()) + { + cb->end(); + return nullptr; + } + return cb; + } + + // + inline bool endRenderpassAndPresent(const video::IQueue::SSubmitInfo::SSemaphoreInfo& extraSubmitWait) + { + using namespace nbl::asset; + using namespace nbl::video; + auto* const cb = getCurrentCmdBuffer(); + if (cb->getState()!=IGPUCommandBuffer::STATE::RECORDING) + return false; + + if (!endRenderpass() || !cb->end()) + return false; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_presentCount, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb} + }; + const IQueue::SSubmitInfo::SSemaphoreInfo wait[] = + { + { + .semaphore = const_cast(m_currentImageAcquire.semaphore), + .value = m_currentImageAcquire.value, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + }, + extraSubmitWait + }; + IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = wait, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + if (!extraSubmitWait.semaphore) + infos->waitSemaphores = {wait,1}; + + if (m_queue->submit(infos)!=IQueue::RESULT::SUCCESS) + { + m_presentCount--; + return false; + } + return present(*rendered); + } + + protected: + inline IPresenter(SCachedCreationParams&& _params) : m_creation(std::move(_params)) {} + virtual bool init_impl(CRenderer* renderer) = 0; + + virtual clock_t::time_point acquire_impl(const CSession* background, video::ISemaphore::SWaitInfo* p_currentImageAcquire) = 0; + virtual bool beginRenderpass_impl() = 0; + virtual bool endRenderpass() + { + return getCurrentCmdBuffer()->endRenderPass(); + } + virtual bool present(const video::IQueue::SSubmitInfo::SSemaphoreInfo& readyToPresent) = 0; + + inline video::IGPUDescriptorSet* getCurrentSessionDS() const {return m_currentSessionDS.get();} + inline video::IGPUCommandBuffer* getCurrentCmdBuffer() const {return m_cmdbufs[m_presentCount % CircularBufferSize].get();} + + private: + SCachedCreationParams m_creation; + video::CThreadSafeQueueAdapter* m_queue = nullptr; + core::smart_refctd_ptr m_semaphore = {}; + std::array, CircularBufferSize> m_cmdbufs = {}; + video::ISemaphore::SWaitInfo m_currentImageAcquire = {}; + core::smart_refctd_ptr m_currentSessionDS = {}; + uint64_t m_presentCount = 0; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h new file mode 100644 index 000000000..339e7cb7c --- /dev/null +++ b/40_PathTracer/include/renderer/resolve/CBasicRWMCResolver.h @@ -0,0 +1,104 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_C_BASIC_RWMC_RESOLVER_H_INCLUDED_ + + +#include "renderer/CRenderer.h" +#include "renderer/resolve/IResolver.h" +#include "renderer/shaders/resolve/rwmc.hlsl" + + +namespace nbl::this_example +{ + +class CBasicRWMCResolver : public IResolver +{ + public: + enum class AutoExposure : uint8_t + { + GeometricAverage, + Median, + Count + }; + enum class Tonemapping : uint8_t + { + Reinhard, + ACES, + Count + }; + + // + struct SCachedCreationParams + { + }; + struct SCreationParams : SCachedCreationParams + { + inline operator bool() const {return renderer;} + + CRenderer* renderer; + }; + static core::smart_refctd_ptr create(SCreationParams&& _params); + + // + inline const SCachedCreationParams& getCreationParams() const { return m_creation; } + + struct SCachedConstructionParams + { + core::smart_refctd_ptr layout; + // TODO: autoexposure + core::smart_refctd_ptr lumaMeasure; + // TODO: motion vector stuff + // rwmc resolve, apply exposure, interleave into OptiX input formats + core::smart_refctd_ptr rwmcResolve; + // TODO: OIDN denoise + // deinterlave from OptiX output format, perform first axis of FFT + core::smart_refctd_ptr postDenoise; // TODO + // second axis FFT, spectrum multiply and iFFT + core::smart_refctd_ptr secondAxisBloom; // TODO + // first axis iFFT, tonemap, encode into final EXR format + core::smart_refctd_ptr secondAxisFFTTonemap; // TODO + // + core::smart_refctd_ptr persistentExposureArgs; + // + core::smart_refctd_ptr bloomKernelSpectrum; + }; + // + inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} + + // + inline uint64_t computeScratchSize(const CSession* session) const override + { + if (!session) + return 0ull; + switch (session->getConstructionParams().mode) + { + case CSession::RenderMode::Previs: [[fallthrough]]; + case CSession::RenderMode::Debug: + return 0ull; + case CSession::RenderMode::Beauty: + return 0ull; // for now, as long as we blit + default: + break; + } + assert(false); // unimplemented + return ~0ull; + } + // + bool resolve(video::IGPUCommandBuffer* cb, video::IGPUBuffer* scratch) override; + + protected: + struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams + { + }; + inline CBasicRWMCResolver(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)) {} + + bool changeSession_impl() override; + + SCachedCreationParams m_creation; + SCachedConstructionParams m_construction; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/resolve/IResolver.h b/40_PathTracer/include/renderer/resolve/IResolver.h new file mode 100644 index 000000000..74e708edf --- /dev/null +++ b/40_PathTracer/include/renderer/resolve/IResolver.h @@ -0,0 +1,47 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_THIS_EXAMPLE_I_RESOLVER_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_I_RESOLVER_H_INCLUDED_ + + +#include "renderer/CSession.h" + + +namespace nbl::this_example +{ + +class IResolver : public core::IReferenceCounted, public core::InterfaceUnmovable +{ + public: + // + inline CSession* getActiveSession() {return m_activeSession.get();} + inline const CSession* getActiveSession() const {return m_activeSession.get();} + + // + virtual uint64_t computeScratchSize(const CSession* session) const = 0; + inline uint64_t computeScratchSize() const {return computeScratchSize(m_activeSession.get());} + + // + inline bool changeSession(core::smart_refctd_ptr&& session) + { + m_activeSession = std::move(session); + if (!m_activeSession || !m_activeSession->isInitialized() || !changeSession_impl()) + { + m_activeSession = {}; + return false; + } + return true; + } + + // + virtual bool resolve(video::IGPUCommandBuffer* cv, video::IGPUBuffer* scratch) = 0; + + protected: + virtual bool changeSession_impl() = 0; + + core::smart_refctd_ptr m_activeSession; +}; + +} +#endif diff --git a/40_PathTracer/include/renderer/shaders/common.hlsl b/40_PathTracer/include/renderer/shaders/common.hlsl new file mode 100644 index 000000000..178159e62 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/common.hlsl @@ -0,0 +1,9 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + + +#endif // _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl new file mode 100644 index 000000000..1a94419e4 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/pathtrace/common.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ + + +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +namespace nbl +{ +namespace this_example +{ +NBL_CONSTEXPR uint32_t SceneDSIndex = 0; +NBL_CONSTEXPR uint32_t SessionDSIndex = 1; +} +} +#include "renderer/shaders/scene.hlsl" +#include "renderer/shaders/session.hlsl" +#include "renderer/shaders/pathtrace/push_constants.hlsl" + + +#endif // _NBL_THIS_EXAMPLE_PATHTRACE_COMMON_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl new file mode 100644 index 000000000..ae12d441f --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl @@ -0,0 +1,48 @@ +#ifndef _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ + + +#include "renderer/shaders/session.hlsl" + +#include + + +// no uint16_t to be used because its going to be a push constant +namespace nbl +{ +namespace this_example +{ +struct SSensorDynamics +{ + // assuming input will be ndc = [-1,1]^2 x {-1} + hlsl::float32_t3x4 ndcToRay; + hlsl::float32_t tMax; + // we can adaptively sample per-pixel, but + uint32_t minSPP : MAX_SPP_LOG2; + uint32_t maxSPP : MAX_SPP_LOG2; + uint32_t unused : BOOST_PP_SUB(32,BOOST_PP_MUL(MAX_SPP_LOG2,2)); +}; + +struct SPrevisPushConstants : SSensorDynamics +{ +}; + +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_SPP_PER_DISPATCH_LOG2 5 +struct SBeautyPushConstants : SSensorDynamics +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSppPerDispatchLog2 = MAX_SPP_PER_DISPATCH_LOG2; + + uint32_t maxSppPerDispatch : MAX_SPP_PER_DISPATCH_LOG2; + uint32_t unused : 27; +}; +#undef MAX_SPP_PER_DISPATCH_LOG2 + +struct SDebugPushConstants : SSensorDynamics +{ + // some enum/choice of what to debug +}; + +} +} +#endif // _NBL_THIS_EXAMPLE_PATHTRACE_PUSH_CONSTANTS_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl new file mode 100644 index 000000000..6ffcd74f0 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/present/push_constants.hlsl @@ -0,0 +1,60 @@ +#ifndef _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ + + +#include "renderer/shaders/resolve/rwmc.hlsl" + + +// no uint16_t to be used because its going to be a push constant +namespace nbl +{ +namespace this_example +{ +using namespace nbl::hlsl; + +struct SDefaultResolvePushConstants +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t ImageCount = 16; + + struct Regular + { + // if more than 1.f + float32_t2 scale; + // to visualize what will get cropped out + float32_t2 _min,_max; + }; + struct Cubemap + { + // theoretically we only need inverse of product of 3x3 view with very sparse 4x4 + float32_t4x4 invProjView; + }; +#ifndef __HLSL_VERSION + union + { + Regular regular; + Cubemap cubemap; + }; +#else + // note how this is a conversion to a copy, and not handing out of a reference + // Ergo, its not a true "union" + inline Regular regular() + { + Regular retval; + retval.scale = __union.invProjView[0].xy; + retval._min = __union.invProjView[0].zw; + retval._max = __union.invProjView[1].xy; + return retval; + } + inline Cubemap cubemap() {return __union;} + + Cubemap __union; +#endif + // 3 extra bits for cube layer + uint32_t isCubemap : 1; + uint32_t layer : MAX_CASCADE_COUNT_LOG2; + uint32_t imageIndex : BOOST_PP_SUB(31,MAX_CASCADE_COUNT_LOG2); +}; + +} +} +#endif // _NBL_THIS_EXAMPLE_PRESENT_PUSH_CONSTANTS_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl b/40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl new file mode 100644 index 000000000..692a6fb16 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/resolve/rwmc.hlsl @@ -0,0 +1,35 @@ +#ifndef _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ + + +#include "renderer/shaders/common.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + +#include +#include + +namespace nbl +{ +namespace this_example +{ +// We do it so weirdly because https://github.com/microsoft/DirectXShaderCompiler/issues/7131 +#define MAX_CASCADE_COUNT_LOG2 3 + +// no uint16_t to be used because its going to be a push constant +struct SResolveConstants // TODO: move somewhere +{ + struct SProtoRWMC + { + hlsl::float32_t initialEmin; + hlsl::float32_t reciprocalBase; + hlsl::float32_t reciprocalKappa; + hlsl::float32_t colorReliabilityFactor; + } rwmc; + uint64_t cascadeCount : BOOST_PP_ADD(MAX_CASCADE_COUNT_LOG2,1); + uint64_t scratchBDA : BOOST_PP_SUB(63,MAX_CASCADE_COUNT_LOG2); +}; + +} +} +#endif // _NBL_THIS_EXAMPLE_RWMC_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/scene.hlsl b/40_PathTracer/include/renderer/shaders/scene.hlsl new file mode 100644 index 000000000..4418d3040 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/scene.hlsl @@ -0,0 +1,68 @@ +#ifndef _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ + + +#include "renderer/shaders/common.hlsl" + +namespace nbl +{ +namespace this_example +{ +struct SSceneUniforms +{ + struct SInit + { + // +// bda_t pQuantizedSequence; + // because the PDF is rescaled to log2(luma)/log2(Max)*255 + // and you get it out as `exp2(texValue)*factor` + hlsl::float32_t envmapPDFNormalizationFactor; + hlsl::float16_t envmapScale; + uint16_t unused; + } init; +}; + +struct SceneDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // RGB9E5 post multiplied by a max value + NBL_CONSTEXPR_STATIC_INLINE uint32_t Envmap = 1; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 2; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 3; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 4; + // UINT8 log2(luma) meant for stochastic descent or querying the PDF of the Warp Map + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapPDF = 5; + // R16G16_UNORM or R32G32_SFLOAT (depending on envmap resolution) meant for skipping stochastic descent + NBL_CONSTEXPR_STATIC_INLINE uint32_t EnvmapWarpMap = 6; +}; + +struct SceneDSBindingCounts +{ + // Mostly held back by Intel ARC, important to not have more than this many light geometries, can increase to + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxDescriptorSetUpdateAfterBindAccelerationStructures&platform=all + // https://vulkan.gpuinfo.org/displayextensionproperty.php?extensionname=VK_KHR_acceleration_structure&extensionproperty=maxPerStageDescriptorUpdateAfterBindAccelerationStructures&platform=all + NBL_CONSTEXPR_STATIC_INLINE uint32_t TLASes = 65535; + // Reasonable combo (esp if we implement a cache over the DS) + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 128; + // Spec mandated minimum + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampledImages = 500000; +}; + +#ifdef __HLSL_VERSION +[[vk::binding(SceneDSBindings::UBO,SceneDSIndex)]] ConstantBuffer gScene; +// could be float32_t3 +[[vk::binding(SceneDSBindings::Envmap,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmap; +[[vk::binding(SceneDSBindings::Envmap,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapSampler; +[[vk::binding(SceneDSBindings::TLASes,SceneDSIndex)]] RaytracingAccelerationStructure gTLASes[SceneDSBindingCounts::TLASes]; +[[vk::binding(SceneDSBindings::Samplers,SceneDSIndex)]] SamplerState gSamplers[SceneDSBindingCounts::Samplers]; +[[vk::binding(SceneDSBindings::SampledImages,SceneDSIndex)]] Texture2DArray gSampledImages[SceneDSBindingCounts::SampledImages]; +// could be float32_t +[[vk::binding(SceneDSBindings::EnvmapPDF,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmapPDF; +[[vk::binding(SceneDSBindings::EnvmapPDF,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapPDFSampler; +// could be float32_t2 +[[vk::binding(SceneDSBindings::EnvmapWarpMap,SceneDSIndex)]] [[vk::combinedImageSampler]] Texture2D gEnvmapWarpMap; +[[vk::binding(SceneDSBindings::EnvmapWarpMap,SceneDSIndex)]] [[vk::combinedImageSampler]] SamplerState gEnvmapWarpMapSampler; +#endif +} +} +#endif // _NBL_THIS_EXAMPLE_SCENE_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl new file mode 100644 index 000000000..9b13b1126 --- /dev/null +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -0,0 +1,111 @@ +#ifndef _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ +#define _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ + + +#include "renderer/shaders/resolve/rwmc.hlsl" + + +namespace nbl +{ +namespace this_example +{ +#define MAX_SPP_LOG2 15 +NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; +// need to be able to count (represent) both 0 and Max +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; + +struct SSensorUniforms +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t ScrambleKeyTextureSize = 512; + +#define MAX_PATH_DEPTH_LOG2 7 + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxCascadeCountLog2 = MAX_CASCADE_COUNT_LOG2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; + + hlsl::float32_t2 rcpPixelSize; + hlsl::rwmc::SplattingParameters splatting; + hlsl::uint16_t2 renderSize; + // bitfield + uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; + uint16_t unused0 : BOOST_PP_SUB(16,MAX_CASCADE_COUNT_LOG2); + // bitfield + uint16_t unused1 : 1; + uint16_t hideEnvironment : 1; + uint16_t lastPathDepth : MAX_PATH_DEPTH_LOG2; + uint16_t lastNoRussianRouletteDepth : MAX_PATH_DEPTH_LOG2; +}; +#undef MAX_PATH_DEPTH_LOG2 + +struct SensorDSBindings +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t UBO = 0; + // R32G32_UINT storage texture (can get animated/rearranged) + NBL_CONSTEXPR_STATIC_INLINE uint32_t ScrambleKey = 1; + // R16_UINT Per Pixel Sample Count (so don't need to read all RWMC cascades) + NBL_CONSTEXPR_STATIC_INLINE uint32_t SampleCount = 2; + // R64_UINT with packing RGB14E6 or RGB14E7 and using rest for spp in the cascade + NBL_CONSTEXPR_STATIC_INLINE uint32_t RWMCCascades = 3; + // RGB5E9 + NBL_CONSTEXPR_STATIC_INLINE uint32_t Beauty = 4; + // R10G10B10_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Albedo = 5; + // modified R10G10B10_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Normal = 6; + // modified R10G10B10_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Motion = 7; + // R16_UNORM + NBL_CONSTEXPR_STATIC_INLINE uint32_t Mask = 8; + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 9; + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t AsSampledImages = 10; + + enum class SampledImageIndex : uint16_t + { + ScrambleKey = ScrambleKey-ScrambleKey, + SampleCount = SampleCount-ScrambleKey, + RWMCCascades = RWMCCascades-ScrambleKey, + Beauty = Beauty-ScrambleKey, + Albedo = Albedo-ScrambleKey, + Normal = Normal-ScrambleKey, + Motion = Motion-ScrambleKey, + Mask = Mask-ScrambleKey, + Count + }; +}; + +struct SensorDSBindingCounts +{ + // + NBL_CONSTEXPR_STATIC_INLINE uint32_t Samplers = 1; + NBL_CONSTEXPR_STATIC_INLINE uint32_t AsSampledImages = SensorDSBindings::Samplers-SensorDSBindings::ScrambleKey; +}; + + +#ifdef __HLSL_VERSION +[[vk::binding(SensorDSBindings::UBO,SessionDSIndex)]] ConstantBuffer gSensor; +// could be uint32_t2 +[[vk::binding(SensorDSBindings::ScrambleKey,SessionDSIndex)]] RWTexture2DArray gScrambleKey; +// could be uint32_t or even uint16_t +[[vk::binding(SensorDSBindings::SampleCount,SessionDSIndex)]] RWTexture2DArray gSampleCount; +// could be uint32_t2 +[[vk::binding(SensorDSBindings::RWMCCascades,SessionDSIndex)]] RWTexture2DArray gRWMCCascades; +// could be uint32_t +[[vk::binding(SensorDSBindings::Beauty,SessionDSIndex)]] RWTexture2DArray gBeauty; +[[vk::binding(SensorDSBindings::Albedo,SessionDSIndex)]] RWTexture2DArray gAlbedo; +// thse two are snorm but stored as unorm, care needs to be taken to map: +// [-1,1] <-> [0,1] but with 0 being exactly representable, so really [-1,1] <-> [1/1023,1] +// Requires x*1022.f/2046.f+1024.f/2046.f shift/adjust for accumulation and storage +// Then to decode back into [-1,1] need max(y*2046.f/1022.f-1024.f/1022.f,-1) = x +[[vk::binding(SensorDSBindings::Normal,SessionDSIndex)]] RWTexture2DArray gNormal; +[[vk::binding(SensorDSBindings::Motion,SessionDSIndex)]] RWTexture2DArray gMotion; +// could be float32_t +[[vk::binding(SensorDSBindings::Mask,SessionDSIndex)]] RWTexture2DArray gMask; +// +[[vk::binding(SensorDSBindings::Samplers,SessionDSIndex)]] SamplerState gSensorSamplers[SensorDSBindingCounts::Samplers]; +// +[[vk::binding(SensorDSBindings::AsSampledImages,SessionDSIndex)]] Texture2DArray gSensorTextures[SensorDSBindingCounts::AsSampledImages]; +#endif +} +} +#endif // _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ diff --git a/40_PathTracer/main.cpp b/40_PathTracer/main.cpp new file mode 100644 index 000000000..c86be6909 --- /dev/null +++ b/40_PathTracer/main.cpp @@ -0,0 +1,592 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + +#include "nbl/examples/examples.hpp" + +#include "renderer/CRenderer.h" +#include "renderer/resolve/CBasicRWMCResolver.h" +#include "renderer/present/CWindowPresenter.h" + +#include "nlohmann/json.hpp" + + +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::application_templates; +using namespace nbl::examples; +using namespace nbl::this_example; + +// TODO: move to argument parsing class +struct AppArguments +{ + bool headless = false; +}; + + +class PathTracingApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + + // TODO: move to Nabla proper + static inline void jsonizeGitInfo(nlohmann::json& target, const nbl::gtml::GitInfo& info) + { + target["isPopulated"] = info.isPopulated; + if (info.hasUncommittedChanges.has_value()) + target["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); + else + target["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; + + target["commitAuthorName"] = info.commitAuthorName; + target["commitAuthorEmail"] = info.commitAuthorEmail; + target["commitHash"] = info.commitHash; + target["commitShortHash"] = info.commitShortHash; + target["commitDate"] = info.commitDate; + target["commitSubject"] = info.commitSubject; + target["commitBody"] = info.commitBody; + target["describe"] = info.describe; + target["branchName"] = info.branchName; + target["latestTag"] = info.latestTag; + target["latestTagName"] = info.latestTagName; + } + + inline void printGitInfos() const + { + nlohmann::json j; + + auto& modules = j["modules"]; + jsonizeGitInfo(modules["nabla"],nbl::gtml::nabla_git_info); + jsonizeGitInfo(modules["dxc"],nbl::gtml::dxc_git_info); + + m_logger->log("Build Info:\n%s",ILogger::ELL_INFO,j.dump(4).c_str()); + } + + + public: + inline PathTracingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline IAPIConnection::SFeatures getAPIFeaturesToEnable() override + { + auto retval = device_base_t::getAPIFeaturesToEnable(); + if (m_args.headless) + retval.swapchainMode = E_SWAPCHAIN_MODE::ESM_NONE; + return retval; + } + + inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override + { + auto retval = device_base_t::getRequiredDeviceFeatures(); + return retval.unionWith(CRenderer::RequiredDeviceFeatures()); + } + + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + if (m_args.headless) + retval.swapchainMode = E_SWAPCHAIN_MODE::ESM_NONE; + return retval.unionWith(CRenderer::PreferredDeviceFeatures()); + } + + inline SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + auto retval = device_base_t::getRequiredDeviceLimits(); + // TODO: need union/superset + retval.shaderStorageImageReadWithoutFormat = true; + return retval; + } + + inline void filterDevices(nbl::core::set& physicalDevices) const override + { + device_base_t::filterDevices(physicalDevices); + std::erase_if(physicalDevices,[&](const IPhysicalDevice* device)->bool + { + const auto& props = device->getMemoryProperties(); + uint64_t largestVRAMHeap = 0; + using heap_flags_e = IDeviceMemoryAllocation::E_MEMORY_HEAP_FLAGS; + for (uint32_t h=0; hgetDirectVRAMAccessMemoryTypeBits(); + for (uint32_t t=0; t>t)&0x1u) && props.memoryHeaps[props.memoryTypes[t].heapIndex].size==largestVRAMHeap) + return false; + m_logger->log("Filtering out Device %p (%s) due to lack of ReBAR",ILogger::ELL_WARNING,device,device->getProperties().deviceName); + return true; + } + ); + } + + inline nbl::core::vector getSurfaces() const override + { + if (m_args.headless) + return {}; + + if (!m_presenter) + { + const_cast&>(m_presenter) = CWindowPresenter::create({ + { + .assMan = m_assetMgr, + .logger = smart_refctd_ptr(m_logger) + }, + { + .winMgr = m_winMgr + }, + m_api, + make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)), + "Path Tracer" + }); + } + + if (m_presenter) + { + const auto* presenter = m_presenter.get(); + return { {presenter->getSurface()/*,EQF_NONE*/} }; + } + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // TODO: parse the arguments + m_args.headless = false; + + if (!m_args.headless) + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + if (m_args.headless) + { + if (!BasicMultiQueueApplication::onAppInitialized(smart_refctd_ptr(system))) + return false; + } + else if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + printGitInfos(); + + // + if (!m_args.headless && !m_presenter) + return logFail("Failed to create CWindowPresenter"); + + // + m_renderer = CRenderer::create({ + { + .graphicsQueue = getGraphicsQueue(), + .computeQueue = getComputeQueue(), + .uploadQueue = getTransferUpQueue(), + .utilities = smart_refctd_ptr(m_utils) + }, + "TODO Sample sequence cache", + m_assetMgr.get() + }); + if (!m_renderer) + return logFail("Failed to create CRenderer"); + + // + if (!m_args.headless && !m_presenter->init(m_renderer.get())) + return logFail("Failed to initialize CWindowPresenter"); + + // + m_resolver = CBasicRWMCResolver::create({ + {}, + m_renderer.get() + }); + if (!m_resolver) + return logFail("Failed to create CBasicRWMCResolver"); + + // set up the scene loader + m_sceneLoader = CSceneLoader::create({ + { + .assMan = smart_refctd_ptr(m_assetMgr), + .logger = smart_refctd_ptr(m_logger) + } + }); + + // TODO: tmp code + { + m_api->startCapture(); + auto scene_daily_pt = m_renderer->createScene({ + .load = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/daily_pt.xml", + .workingDirectory = localOutputCWD + }), + .converter = nullptr + }); + // the UI would have you load the zip first, then present a dropdown of what to load + // but still need to support archive mount for cmdline load + #if 0 // this particular zip goes down an unsupported path in our zip loader + auto scene_bedroom = m_sceneLoader->load({ + .relPath = sharedInputCWD/"mitsuba/bedroom.zip/scene.xml", + .workingDirectory = localOutputCWD + }); + #endif + m_api->endCapture(); + + // quick test code + nbl::core::vector sensors(3,scene_daily_pt->getSensors().front()); + { + sensors[1].mutableDefaults.cropWidth = 640; + sensors[1].mutableDefaults.cropHeight = 360; + sensors[1].mutableDefaults.cropOffsetX = 0; + sensors[1].mutableDefaults.cropOffsetY = 0; + } + { + sensors[2].mutableDefaults.cropWidth = 5120; + sensors[2].mutableDefaults.cropHeight = 2880; + sensors[2].mutableDefaults.cropOffsetX = 128; + sensors[2].mutableDefaults.cropOffsetY = 128; + } + for (auto i=1; i<3; i++) + { + sensors[i].constants.width = sensors[i].mutableDefaults.cropWidth+2*sensors[i].mutableDefaults.cropOffsetX; + sensors[i].constants.height = sensors[i].mutableDefaults.cropHeight+2*sensors[i].mutableDefaults.cropOffsetY; + } + sensors.erase(sensors.begin()); + for (const auto& sensor : sensors) + m_sessionQueue.push( + scene_daily_pt->createSession({ + {.mode=CSession::RenderMode::Debug},&sensor + }) + ); + } + + return true; + +#if 0 // ui + // gui descriptor setup + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ETC_REPEAT; + params.TextureWrapV = ETC_REPEAT; + params.TextureWrapW = ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getGraphicsQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(m_cameraSetting.fov), + io.DisplaySize.x / io.DisplaySize.y, + m_cameraSetting.zNear, + m_cameraSetting.zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); + Light m_oldLight = m_light; + int light_type = m_light.type; + ImGui::ListBox("LightType", &light_type, s_lightTypeNames, ELT_COUNT); + m_light.type = static_cast(light_type); + if (m_light.type == ELT_DIRECTIONAL) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + } + else if (m_light.type == ELT_POINT) + { + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + } + else if (m_light.type == ELT_SPOT) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + + float32_t dOuterCutoff = hlsl::degrees(acos(m_light.outerCutoff)); + if (ImGui::SliderFloat("Light Outer Cutoff", &dOuterCutoff, 0.0f, 45.0f)) + { + m_light.outerCutoff = cos(hlsl::radians(dOuterCutoff)); + } + } + ImGui::Checkbox("Use Indirect Command", &m_useIndirectCommand); + if (m_light != m_oldLight) + { + m_frameAccumulationCounter = 0; + } + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::End(); + } + ); +#endif + } + +#if 0 // gui + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } +#endif + + inline void workLoopBody() override + { + CSession* session; + volatile bool skip = false; // skip using the debugger + for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f || skip;) + { + skip = false; + if (m_sessionQueue.empty()) + { + if (!m_args.headless) + handleInputs(); + return; + } + session = m_sessionQueue.front().get(); + // init + m_utils->autoSubmit({.queue=getGraphicsQueue()},[&session](SIntendedSubmitInfo& info)->bool + { + return session->init(info.getCommandBufferForRecording()->cmdbuf); + } + ); + m_resolver->changeSession(std::move(m_sessionQueue.front())); + m_sessionQueue.pop(); + } + + m_api->startCapture(); + IQueue::SSubmitInfo::SSemaphoreInfo rendered = {}; + { + auto deferredSubmit = m_renderer->render(session); + if (deferredSubmit) + { + IGPUCommandBuffer* const cb = deferredSubmit; + if (!m_args.headless || session->getProgress()>=1.f) + { + m_resolver->resolve(cb,nullptr); + } + rendered = deferredSubmit({}); + } + } + m_api->endCapture(); + + if (m_args.headless) + return; + handleInputs(); + if (!keepRunning()) + return; + + m_presenter->acquire(session); + auto* const cb = m_presenter->beginRenderpass(); + { + // can do additional stuff like ImGUI work here + } + m_presenter->endRenderpassAndPresent(rendered); +#if 0 // gui + +// ... + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + m_ui.manager->render(cmdbuf, waitInfo); + + { + { + + updateGUIDescriptorSet(); + + } + } +#endif + } + + inline void handleInputs() + { + if (m_args.headless) + return; + + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + +// const auto& io = ImGui::GetIO(); + static std::chrono::microseconds previousEventTimestamp{}; + m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + } + }, m_logger.get() + ); + m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get() + ); +#if 0 // ui + const SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const nbl::ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + m_ui.manager->update(params); +#endif + } + + inline bool keepRunning() override + { + if (m_args.headless) + { + if (auto* const currentSession=m_resolver->getActiveSession(); m_sessionQueue.empty() && (!currentSession || currentSession->getProgress()>=1.f)) + return false; + return true; + } + else + return !m_presenter->irrecoverable(); + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + + private: + AppArguments m_args = {}; + // + smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + // + smart_refctd_ptr m_presenter; + // + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_resolver; + // + smart_refctd_ptr m_sceneLoader; + // + nbl::core::queue> m_sessionQueue; + +#if 0 // gui + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + core::smart_refctd_ptr m_guiDescriptorSetPool; +#endif + +}; +NBL_MAIN_FUNC(PathTracingApp) \ No newline at end of file diff --git a/40_PathTracer/src/io/CSceneLoader.cpp b/40_PathTracer/src/io/CSceneLoader.cpp new file mode 100644 index 000000000..4507c64dd --- /dev/null +++ b/40_PathTracer/src/io/CSceneLoader.cpp @@ -0,0 +1,541 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#define _NBL_THIS_EXAMPLE_C_SCENE_LOADER_CPP_ +#include "io/CSceneLoader.h" + +#include "nbl/builtin/hlsl/testing/relative_approx_compare.hlsl" + +#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" + +#include "nlohmann/json.hpp" + + +// +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + public: + static inline std::string __call(const nbl::this_example::CSceneLoader::SLoadResult::SSensor& value) + { + nlohmann::json j; + j["valid"] = bool(value); + + auto& constants = j["constants"]; + { + auto& mutableDefaults = j["mutableDefaults"]; + const auto& _mutableDefaults = value.mutableDefaults; + mutableDefaults["absoluteTransform"] = system::to_string(_mutableDefaults.absoluteTransform); + { + auto& raygen = mutableDefaults["raygen"]; + const auto& _raygen = _mutableDefaults.raygen; + raygen["virtualPlaneFromNDC"] = system::to_string(hlsl::float32_t2x3(_raygen)); + { + auto& clipPlanes = mutableDefaults["clipPlanes"]; + for (uint8_t i=0,count=_mutableDefaults.getClipPlaneCount(); i CSceneLoader::create(SCreationParams&& _params) +{ + if (!_params) + return nullptr; + SConstructorParams params = {std::move(_params)}; + + // add the loaders + { + auto* const assMan = params.assMan.get(); + auto* const system = assMan->getSystem(); + + bool success = true; + success = success && assMan->addAssetLoader(make_smart_refctd_ptr(smart_refctd_ptr(system)))!=0xdeadbeefu; + // some of our test scenes won't load without the `.serialized` support + success = success && assMan->addAssetLoader(make_smart_refctd_ptr()) != 0xdeadbeefu; + + if (!success) + { + params.logger.log("Could not add Mitsuba Asset Loaders", ILogger::ELL_ERROR); + return nullptr; + } + } + + return core::smart_refctd_ptr(new CSceneLoader(std::move(params)),core::dont_grab); +} + +auto CSceneLoader::load(SLoadParams&& _params) -> SLoadResult +{ + IAssetLoader::SAssetLoadParams params = {}; + params.workingDirectory = _params.workingDirectory; + auto logger = params.logger = m_params.logger.get().get(); + + auto* const assMan = m_params.assMan.get(); + // handle archive stuff + const auto relPath = _params.relPath.lexically_normal(); + auto* const system = assMan->getSystem(); + core::stack archiveStack; + for (auto it=relPath.begin(); it!=relPath.end();) + { + const auto ext = (it++)->extension().string(); + if (strcmpi(ext.c_str(),".zip")==0) + { + // some N4950 defect makes it impossible + //const auto archPath = system::path(relPath.begin(),it); + const auto archPath = std::accumulate(relPath.begin(),it,system::path(),[](const system::path& lhs, const system::path& rhs)->system::path + { + return lhs/rhs; + } + ); + auto archive = system->openFileArchive(archPath); + archiveStack.push(archive.get()); + system->mount(std::move(archive)); + } + } + + const auto relPathStr = relPath.string(); + auto asset = assMan->getAsset(relPathStr,params); + if (asset.getContents().empty()) + { + logger.log( + "Failed to Load Mitsuba scene from \"%s\" with working directory \"%s\"", + ILogger::ELL_ERROR,relPathStr.c_str(),_params.workingDirectory.lexically_normal().string().c_str() + ); + return {}; + } + logger.log("Loaded %s",ILogger::ELL_INFO,relPathStr.c_str()); + + // now unmount the archives + for (; !archiveStack.empty(); archiveStack.pop()) + system->unmount(archiveStack.top()); + + const auto type = asset.getAssetType(); + if (type!=IAsset::E_TYPE::ET_SCENE) + { + logger.log("But did not load an `ICPUScene` type is %S",ILogger::ELL_ERROR,system::to_string(type)); + return {}; + } + + const auto* const untypedMeta = asset.getMetadata(); + if (!untypedMeta || strcmpi(untypedMeta->getLoaderName(),CMitsubaMetadata::LoaderName)!=0) + { + logger.log("Loaded an ICPUScene but without `CMistubaMetadata`",ILogger::ELL_ERROR); + return {}; + } + const auto* const meta = static_cast(untypedMeta); + + // + auto& integrator = meta->m_global.m_integrator; + + // TODO: compute/get this from minumum extent of scene + float sceneSize = 50.f; + + // + core::vector sensors; + auto& _sensors = meta->m_global.m_sensors; + if (_sensors.empty()) + { + logger.log("The `CMistubaMetadata` contains no sensors",ILogger::ELL_ERROR); + return {}; + } + else + { + sensors.resize(_sensors.size()); + logger.log("Total number of Sensors = %d",ILogger::ELL_INFO,sensors.size()); + const bool shouldHaveSensorIdxInFileName = sensors.size()>1; + const auto mainFileName = relPath.filename(); + for (auto i=0; i::min) + { + logger.log("Sensor %s (%d-th in XML) has non invertible singular transformation!",ILogger::ELL_ERROR,id,i); + constants = {}; + continue; + } + // extract and remove scale, also make the transform right-handed + { + scaleRcp = rsqrt({ + dot(orientationT[0],orientationT[0]), + dot(orientationT[1],orientationT[1]), + dot(orientationT[2],orientationT[2]) + }); + // unflip X if left handed + if (det<0.f) + scaleRcp.x = -scaleRcp.x; + // Old Code View Matrix: + // LH X+ = Left, Y+ = Up, Z+ = Backward + // RH X+ = Right, Y+ = Up, Z+ = Forward + // Basically RH view matrix used to make the Forward direction Z-, so LH projection matrix flupped it to have Z+ and W+ (cancel out) + // The only thing that stayed was the flipping of the X direction. + // ------------------------------------------ + // If we're using our animators, then we can't have negative scales on odd number of axes + // the animators will re-create the camera from forward and up axes with right handed matrix + // New Sensor code should take a look at inverse Projection Matrix to determine the dNDC/dView directions + // nearPlaneCenter = mul(invProj,float(0,0,0,1)) = invProj.column[3] + // ndcXDir = normalize(invProj.column[0].xyz*nearPlaneCenter.w-nearPlaneCenter.xyz*invProj.column[3].w) = if regular matrix = normalize(invProj.column[0].xyz) + // ndcYDir = normalize(invProj.column[1].xyz*nearPlaneCenter.w-nearPlaneCenter.xyz*invProj.column[3].w) = if regular matrix = normalize(invProj.column[1].xyz) + for (auto r=0; r<3; r++) + { + orientationT[r] *= scaleRcp[r]; + absoluteTransform[r].xyz *= scaleRcp; + } + } + } + mutableDefaults.absoluteTransform = absoluteTransform; + } + // raygen + auto& ndc = mutableDefaults.raygen.encoded; + switch (_sensor.type) + { + case mts_sensor_t::Type::THINLENS: + logger.log("Sensor %s (%d-th in XML) is THINLENS, Depth of Field not implemented yet, demoting to PERSPECTIVE!",ILogger::ELL_WARNING,id,i); + [[fallthrough]]; + case mts_sensor_t::Type::PERSPECTIVE: + { + const auto& persp = _sensor.perspective; + // calculations for the projection plane behind the aperture (or in-front if thinking virtual) + const float halfFoVRad = hlsl::radians(persp.fov)*0.5f; + const auto halfSize = hlsl::tan(halfFoVRad); + // by default FoV is y-axis + float halfHeight = halfSize; + float halfWidth = halfSize; + // + const float aspectRatio = float(constants.width)/float(constants.height); + using fov_axis_e = mts_sensor_t::PerspectivePinhole::FOVAxis; + switch (persp.fovAxis) + { + case fov_axis_e::X: + halfHeight /= aspectRatio; + break; + case fov_axis_e::Y: + halfWidth *= aspectRatio; + break; + case fov_axis_e::DIAGONAL: + { + // halfSize^2 == halfWidth^2+halfHeight^2 == (1+aspectRatio^2)*halfHeight^2 + halfHeight /= hlsl::sqrt(1.f+aspectRatio*aspectRatio); + halfWidth = halfHeight*aspectRatio; + } + break; + case fov_axis_e::SMALLER: + if (aspectRatio<1.f) + halfHeight /= aspectRatio; + else + halfWidth *= aspectRatio; + break; + case fov_axis_e::LARGER: + if (aspectRatio<1.f) + halfWidth *= aspectRatio; + else + halfHeight /= aspectRatio; + break; + default: + break; + } + // max 1/4 circle + if (!(halfWidth>0.f && halfHeight>0.f)) + { + ndc[1][1] = core::nan(); + logger.log("Sensor %s (%d-th in XML) had a Field of View of %f degrees!",ILogger::ELL_ERROR,id,i,persp.fov); + break; + } + // elongating camera along Z will shrink the effective FOV + ndc[0] = float32_t3(scaleRcp.z/scaleRcp.x,0.f,hlsl::sign(scaleRcp.x)*persp.shiftX); + // column gets negated because in Vulkan NDC.y runs downwards + ndc[1] = -float32_t3(0.f,scaleRcp.z/scaleRcp.y,persp.shiftY)*halfHeight; + } + break; + case mts_sensor_t::Type::TELECENTRIC: + logger.log("Sensor %s (%d-th in XML) is TELECENTRIC, Depth of Field not implemented yet, demoting to ORTHOGRAPHIC!",ILogger::ELL_WARNING,id,i); + [[fallthrough]]; + case mts_sensor_t::Type::ORTHOGRAPHIC: + { + const auto& ortho = _sensor.orthographic; + // extract and negate the scale from the + ndc[0] = float32_t3(scaleRcp.x,0.f,0.f); + ndc[1] = float32_t3(0.f,scaleRcp.y*float(constants.height)/float(constants.width),0.f); + } + break; + case mts_sensor_t::Type::SPHERICAL: + // irrelevant for spherical cameras, we send rays everywhere + ndc[0] = promote(0); + ndc[1] = promote(0); + break; + default: + ndc[0][0] = core::nan(); + break; + } + if (hlsl::isnan(ndc[0][0])) + { + logger.log("Sensor %s (%d-th in XML) has invalid projection, had type %s!",ILogger::ELL_ERROR,id,i,system::to_string(_sensor.type).c_str()); + constants = {}; + continue; + } + // clip planes + auto outClipPlane = mutableDefaults.clipPlanes.begin(); + for (auto i=0; i(0.f); + if (any(glsl::notEqual(plane,rhs))) + { + if (outClipPlane>mutableDefaults.clipPlanes.end()) + { + logger.log("Sensor %s (%d-th in XML) has more than %d clip planes, ignoreing the rest!",ILogger::ELL_ERROR,id,i); + break; + } + *(outClipPlane++) = plane; + } + } + // ignore crops for spherical cameras + if (!isSpherical) + { + mutableDefaults.cropWidth = film.cropWidth; + mutableDefaults.cropHeight = film.cropHeight; + mutableDefaults.cropOffsetX = film.cropOffsetX; + mutableDefaults.cropOffsetY = film.cropOffsetY; + } + // + mutableDefaults.nearClip = base.nearClip; + mutableDefaults.farClip = base.farClip; + // + mutableDefaults.cascadeLuminanceBase = film.cascadeLuminanceBase; + mutableDefaults.cascadeLuminanceStart = film.cascadeLuminanceStart; + // + integrator.visit([&mutableDefaults](auto& var)->void + { + if constexpr (std::is_base_of_v>) + mutableDefaults.hideEnvironment = var.hideEnvironment; + } + ); + integrator.visit([&mutableDefaults](auto& var)->void + { + if constexpr (std::is_base_of_v>) + { + mutableDefaults.maxPathDepth = var.maxPathDepth; + mutableDefaults.russianRouletteDepth = var.russianRouletteDepth; + } + } + ); + } + { + using dyn_t = SLoadResult::SSensor::SDynamic; + dyn_t& dynamicDefaults = sensors[i].dynamicDefaults; + // output file settings + { + std::filesystem::path outputFilePath = film.outputFilePath; + // handle missing output path + if (outputFilePath.empty()) + { + const auto extensionStr = fileExtensionFromFormat(film.fileFormat); + core::string filename = "Render_" + mainFileName.stem().string(); + if(shouldHaveSensorIdxInFileName) + filename += "_Sensor_" + system::to_string(i) + extensionStr.data(); + else + filename += extensionStr; + logger.log("Sensor %s (%d-th in XML) has no output path, deduced to \"%s\"",ILogger::ELL_WARNING,id,i,filename.c_str()); + outputFilePath = filename; + } + std::string_view extension = ""; + bool invalid = false; + if (auto ext=outputFilePath.extension().string(); ext.size()>2) + { + extension = {ext.begin()+1,ext.end()}; + using format_e = ext::MitsubaLoader::CElementFilm::FileFormat; + switch (film.fileFormat) + { + case format_e::PNG: + invalid = strcmpi(extension.data(),"png")!=0; + break; + case format_e::OPENEXR: + invalid = strcmpi(extension.data(),"exr")!=0; + break; + case format_e::JPEG: + invalid = strcmpi(extension.data(),"jpg")!=0 && strcmpi(extension.data(),"jpe")!=0 && strcmpi(extension.data(),"jpeg")!=0 && + strcmpi(extension.data(),"jif")!=0 && strcmpi(extension.data(),"jfif")!=0 && strcmpi(extension.data(),"jfi")!=0; + break; + default: + break; + } + } + if (invalid) + { + logger.log("Sensor %s (%d-th in XML) has invalid format %d or extension \"%s\"",ILogger::ELL_ERROR,id,i,system::to_string(film.fileFormat),extension.data()); + dynamicDefaults = {}; + continue; + } + dynamicDefaults.outputFilePath = std::move(outputFilePath); +#if 0 // not part of the loader, do somewhere else + // + if (outputFilePath.is_relative()) + { + logger.log("Film output path is relative: \"%s\"",ILogger::ELL_INFO,outputFilePath.c_str()); + // output relative to output dir + // or the XML if so wished (walk backward and determine which directories are read only) + } +#endif + } + // post process + { + dynamicDefaults.postProc.bloomFilePath = film.denoiserBloomFilePath; + dynamicDefaults.postProc.bloomScale = film.denoiserBloomScale; + dynamicDefaults.postProc.bloomIntensity = film.denoiserBloomIntensity; + dynamicDefaults.postProc.tonemapperArgs = std::string(film.denoiserTonemapperArgs); + } + // up vector + { + // true forward may be Z+ or Z- + const auto viewSpaceZ = orientationT[2]; + // our "right" will only be X+ if forward is Z- + const auto reconstructedRight = cross(base.up,viewSpaceZ); + const auto actualRight = cross(orientationT[1],viewSpaceZ); + // but it doesn't matter here for this check (both will be flipped, dot product identical) + const float dp = dot(reconstructedRight,actualRight); + const float pb = dot(base.up,viewSpaceZ); + // special formulation avoiding multiple sqrt and inversesqrt to preserve precision + const auto reconstructedLen = hlsl::length(reconstructedRight); + logger.log("Camera Reconstructed Up Vector match score = %f",system::ILogger::ELL_INFO,dp/reconstructedLen); + const float64_t threshold = 0.9996*hlsl::length(base.up); + if (testing::relativeApproxCompare(dp,reconstructedLen,0.03f) && hlsl::abs(pb)setStepZoomMultiplier(logarithmicZoomSpeed); + } + else if (!hlsl::isnan(base.zoomSpeed)) + logger.log("Sensor %s (%d-th in XML) is SPHERICAL, zoom speed gets ignored!",ILogger::ELL_WARNING,id,i); + dynamicDefaults.samplesNeeded = _sensor.sampler.sampleCount; + dynamicDefaults.kappa = constants.cascadeCount<2 ? 0.f:film.rfilter.kappa; + dynamicDefaults.Emin = film.rfilter.Emin; + if (film.envmapRegularizationFactor>0.f) + logger.log("Sensor %s (%d-th in XML) `envmapRegularizationFactor=%f` is deprecated and ignored, we do MIS now",ILogger::ELL_WARNING,id,i,film.envmapRegularizationFactor); + } + } + // log + for (auto i=0; iclearAllAssetCache(); + // return + return { + .scene = IAsset::castDown(asset.getContents()[0]), + .sensors = std::move(sensors) + }; +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp new file mode 100644 index 000000000..ccb8d595d --- /dev/null +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -0,0 +1,710 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/CRenderer.h" +#include "renderer/SAASequence.h" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include +#include +#include +#include + + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::asset; +using namespace nbl::system; +using namespace nbl::video; + + +smart_refctd_ptr CRenderer::loadPrecompiledShader_impl(IAssetManager* assMan, const core::string& key, logger_opt_ptr logger) +{ + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = logger; + lp.workingDirectory = "app_resources"; // virtual root + auto assetBundle = assMan->getAsset(key,lp); + const auto assets = assetBundle.getContents(); + if (!assets.empty()) + if (auto shader = IAsset::castDown(*assets.begin()); shader) + return shader; + + logger.log("Failed to load precompiled shader %s", ILogger::ELL_ERROR, key.c_str()); + return nullptr; +} + +// +smart_refctd_ptr CRenderer::create(SCreationParams&& _params) +{ + if (!_params) + return nullptr; + SConstructorParams params = {std::move(_params)}; + + // + if (!params.logger.get()) + params.logger = smart_refctd_ptr(params.utilities->getLogger()); + logger_opt_ptr logger = params.logger.get().get(); + + // + auto checkNullObject = [¶ms,logger](auto& obj, const std::string_view debugName)->bool + { + if (!obj) + { + logger.log("Failed to Create %s Object!",ILogger::ELL_ERROR,debugName.data()); + return true; + } + obj->setObjectDebugName(debugName.data()); + return false; + }; + + // + ILogicalDevice* device = params.utilities->getLogicalDevice(); + + // + params.semaphore = device->createSemaphore(0); + if (checkNullObject(params.semaphore,"CRenderer Semaphore")) + return nullptr; + + // basic samplers + const auto samplerDefaultRepeat = device->createSampler({}); + + using render_mode_e = CSession::RenderMode; + // create the layouts + { + constexpr auto RTStages = hlsl::ShaderStage::ESS_ALL_RAY_TRACING;// | hlsl::ShaderStage::ESS_COMPUTE; + constexpr auto RenderingStages = RTStages | hlsl::ShaderStage::ESS_COMPUTE; + // descriptor + { + using binding_create_flags_t = IDescriptorSetLayoutBase::SBindingBase::E_CREATE_FLAGS; + constexpr IGPUDescriptorSetLayout::SBinding UBOBinding = { + .binding = SensorDSBindings::UBO, + .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = RenderingStages, + .count = 1 + }; + // the generic single-UBO + { + params.uboDSLayout = device->createDescriptorSetLayout({&UBOBinding,1}); + if (checkNullObject(params.uboDSLayout,"Generic Single UBO Layout")) + return nullptr; + } + constexpr auto DescriptorIndexingFlags = binding_create_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_create_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT; + // + auto singleStorageImage = [](const uint32_t binding)->IGPUDescriptorSetLayout::SBinding + { + return { + .binding = binding, + .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = RenderingStages, + .count = 1 + }; + }; + // TODO: provide these two samplers from Envmap Importance sampling extension + const auto samplerNearestRepeat = device->createSampler({ + { + .MinFilter = ISampler::E_TEXTURE_FILTER::ETF_NEAREST, + .MaxFilter = ISampler::E_TEXTURE_FILTER::ETF_NEAREST, + .MipmapMode = ISampler::E_SAMPLER_MIPMAP_MODE::ESMM_NEAREST, + .AnisotropicFilter = 0, + }, + 0.f, + 0.f, + 0.f + }); + // bindless everything + { + // TODO: provide these two samplers from Envmap Importance sampling extension + const auto samplerEnvmapPDF = samplerNearestRepeat; + const auto samplerEnvmapWarpmap = device->createSampler({ + { + .MinFilter = ISampler::E_TEXTURE_FILTER::ETF_LINEAR, + .MaxFilter = ISampler::E_TEXTURE_FILTER::ETF_LINEAR, + .MipmapMode = ISampler::E_SAMPLER_MIPMAP_MODE::ESMM_NEAREST, + .AnisotropicFilter = 0, + }, + 0.f, + 0.f, + 0.f + }); + std::initializer_list bindings = { + UBOBinding, + { + .binding = SceneDSBindings::Envmap, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerDefaultRepeat + }, + { + .binding = SceneDSBindings::TLASes, + .type = IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::TLASes + }, + { + .binding = SceneDSBindings::Samplers, + .type = IDescriptor::E_TYPE::ET_SAMPLER, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::Samplers + }, + { + .binding = SceneDSBindings::SampledImages, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = SceneDSBindingCounts::SampledImages + }, + { + .binding = SceneDSBindings::EnvmapPDF, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerEnvmapPDF + }, + { + .binding = SceneDSBindings::EnvmapWarpMap, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = DescriptorIndexingFlags, + .stageFlags = RTStages, + .count = 1, + .immutableSamplers = &samplerEnvmapWarpmap + } + }; + params.sceneDSLayout = device->createDescriptorSetLayout(bindings); + if (checkNullObject(params.sceneDSLayout,"Scene Descriptor Layout")) + return nullptr; + } + // the sensor layout + { + constexpr auto ResolveAndPresentStages = hlsl::ShaderStage::ESS_COMPUTE | hlsl::ShaderStage::ESS_FRAGMENT; + const auto defaultSampler = device->createSampler({ + { + .AnisotropicFilter = 0 + }, + 0.f, + 0.f, + 0.f + }); + std::initializer_list bindings = { + UBOBinding, + singleStorageImage(SensorDSBindings::ScrambleKey), + singleStorageImage(SensorDSBindings::SampleCount), + singleStorageImage(SensorDSBindings::Beauty), + singleStorageImage(SensorDSBindings::RWMCCascades), + singleStorageImage(SensorDSBindings::Albedo), + singleStorageImage(SensorDSBindings::Normal), + singleStorageImage(SensorDSBindings::Motion), + singleStorageImage(SensorDSBindings::Mask), + { + .binding = SensorDSBindings::Samplers, + .type = IDescriptor::E_TYPE::ET_SAMPLER, + .createFlags = binding_create_flags_t::ECF_NONE, + .stageFlags = ResolveAndPresentStages, + .count = SensorDSBindingCounts::Samplers, + .immutableSamplers = &defaultSampler + }, + { + .binding = SensorDSBindings::AsSampledImages, + .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = binding_create_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = ResolveAndPresentStages, + .count = SensorDSBindingCounts::AsSampledImages + } + }; + params.sensorDSLayout = device->createDescriptorSetLayout(bindings); + if (checkNullObject(params.sensorDSLayout,"Sensor Descriptor Layout")) + return nullptr; + } + } + + // but many push constant ranges + SPushConstantRange pcRanges[uint8_t(render_mode_e::Count)]; + auto setPCRange = [&pcRanges](const render_mode_e mode)->void + { + pcRanges[uint8_t(mode)] = {.stageFlags=RTStages,.offset=0,.size=sizeof(T)}; + }; + setPCRange.operator()(render_mode_e::Previs); + setPCRange.operator()(render_mode_e::Beauty); + setPCRange.operator()(render_mode_e::Debug); + for (uint8_t t=0; tcreatePipelineLayout({pcRanges+t,1},params.sceneDSLayout,params.sensorDSLayout); + string debugName = to_string(static_cast(t))+"Rendering Pipeline Layout"; + if (checkNullObject(params.renderingLayouts[t],debugName)) + return nullptr; + } + } + + // TODO: create the generic pipelines + params.shaders[uint8_t(render_mode_e::Previs)] = loadPrecompiledShader<"pathtrace_previs">(_params.assMan,device,logger); + params.shaders[uint8_t(render_mode_e::Beauty)] = loadPrecompiledShader<"pathtrace_beauty">(_params.assMan,device,logger); + params.shaders[uint8_t(render_mode_e::Debug)] = loadPrecompiledShader<"pathtrace_debug">(_params.assMan,device,logger); + for (auto i=0; i(i))); + return nullptr; + } + + // command buffers + for (uint8_t i=0; icreateCommandPool(params.graphicsQueue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE); + if (pool) + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,1,params.commandBuffers+i,smart_refctd_ptr(params.logger.get())); + if (checkNullObject(params.commandBuffers[i],"Graphics Command Buffer "+to_string(i))) + return nullptr; + } + + return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); +} + +core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& _params) +{ + if (!_params) + return nullptr; + + auto* const device = getDevice(); + auto converter = core::smart_refctd_ptr(_params.converter); + + CScene::SConstructorParams params = {std::move(_params)}; +// params.sceneBound = ; + params.sensors = std::move(_params.load.sensors); + params.renderer = smart_refctd_ptr(this); + { + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&m_construction.sceneDSLayout.get(),1}); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(m_construction.sceneDSLayout)); + if (!ds) + { + m_creation.logger.log("Failed to create a scene - failed descriptor set allocation!",ILogger::ELL_ERROR); + return nullptr; + } + params.sceneDS = make_smart_refctd_ptr(std::move(ds)); + } + + constexpr auto RenderModeCount = uint8_t(CSession::RenderMode::Count); + // create the pipelines + { + IGPURayTracingPipeline::SCreationParams creationParams[RenderModeCount] = {}; + using creation_flags_e = IGPURayTracingPipeline::SCreationParams::FLAGS; + auto flags = creation_flags_e::NO_NULL_MISS_SHADERS; + IGPURayTracingPipeline::SShaderSpecInfo missShaders[RenderModeCount] = {}; + { + for (uint8_t m=0; mcreateRayTracingPipelines(nullptr,creationParams,params.pipelines)) + { + m_creation.logger.log("Failed to create Path Tracing Pipelines",ILogger::ELL_ERROR); + return nullptr; + } + } + + // new cache if none provided + if (!converter) + converter = CAssetConverter::create({.device=device,.optimizer={}}); + + smart_refctd_ptr ubo; + { + struct Buffers final + { + using render_mode_e = CSession::RenderMode; + inline operator std::span() const {return {&ubo.get(),1+RenderModeCount};} + + smart_refctd_ptr ubo; + smart_refctd_ptr sbts[RenderModeCount]; + } tmpBuffers; + // + using buffer_usage_e = IGPUBuffer::E_USAGE_FLAGS; + constexpr auto BasicBufferUsages = buffer_usage_e::EUF_SHADER_DEVICE_ADDRESS_BIT; + { + tmpBuffers.ubo = ICPUBuffer::create({{.size=sizeof(SSceneUniforms),.usage=BasicBufferUsages|buffer_usage_e::EUF_UNIFORM_BUFFER_BIT},nullptr}); + auto& uniforms = *reinterpret_cast(tmpBuffers.ubo->getPointer()); + uniforms.init = {}; // TODO: fill with stuff + tmpBuffers.ubo->setContentHash(tmpBuffers.ubo->computeContentHash()); + } + // SBT + const auto& limits = device->getPhysicalDevice()->getLimits(); + assert(limits.shaderGroupBaseAlignment>=limits.shaderGroupHandleAlignment); + constexpr auto HandleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; + const auto handleSizeAligned = nbl::core::alignUp(HandleSize,limits.shaderGroupHandleAlignment); + for (uint8_t i=0; igetHitHandles(); + const auto missHandles = pipeline->getMissHandles(); + const auto callableHandles = pipeline->getCallableHandles(); + // + { + class CVectorBacked final : public core::refctd_memory_resource + { + public: + inline CVectorBacked(const size_t reservation) + { + storage.reserve(reservation*HandleSize); + } + + inline void* allocate(size_t bytes, size_t alignment) override + { + assert(bytes==storage.size()); + return storage.data(); + } + inline void deallocate(void* p, size_t bytes, size_t alignment) override {storage = {};} + + core::vector storage; + }; + auto memRsc = core::make_smart_refctd_ptr(hitHandles.size()+missHandles.size()+callableHandles.size()+1); + { + // TODO: move to material compiler + core::LinearAddressAllocatorST allocator(nullptr,0,0,limits.shaderGroupBaseAlignment,0x7fff0000u); + auto copyShaderHandles = [&](const std::span handles)->SBufferRange + { + SBufferRange range = {.size=handles.size()*handleSizeAligned}; + range.offset = allocator.alloc_addr(range.size,limits.shaderGroupBaseAlignment); + memRsc->storage.resize(allocator.get_allocated_size()); + uint8_t* out = memRsc->storage.data()+range.offset; + for (const auto& handle : handles) + { + memcpy(out,&handle,HandleSize); + out += handleSizeAligned; + } + return range; + }; + auto& sbt = params.sbts[i]; + sbt.raygen = copyShaderHandles({&pipeline->getRaygen(),1}); + sbt.miss.range = copyShaderHandles(pipeline->getMissHandles()); + sbt.hit.range = copyShaderHandles(pipeline->getHitHandles()); + sbt.callable.range = copyShaderHandles(pipeline->getCallableHandles()); + sbt.miss.stride = sbt.hit.stride = sbt.callable.stride = handleSizeAligned; + } + auto& sbtBuff = tmpBuffers.sbts[i]; + sbtBuff = ICPUBuffer::create({ + { + .size=memRsc->storage.size(),.usage=BasicBufferUsages|buffer_usage_e::EUF_SHADER_BINDING_TABLE_BIT + }, + /*.data = */memRsc->storage.data(), + /*.memoryResource = */memRsc + },core::adopt_memory); + sbtBuff->setContentHash(sbtBuff->computeContentHash()); + } + } + + // customized setup + struct MyInputs : CAssetConverter::SInputs + { + // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all + inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override + { + assert(memoryBacked); + return memoryBacked->getObjectType()!=IDeviceMemoryBacked::EOT_BUFFER ? (~0u):rebarMemoryTypes; + } + + uint32_t rebarMemoryTypes; + } inputs = {}; + inputs.logger = m_creation.logger.get().get(); + inputs.rebarMemoryTypes = device->getPhysicalDevice()->getDirectVRAMAccessMemoryTypeBits(); + // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in + // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) + struct MyAllocator final : public IDeviceMemoryAllocator + { + ILogicalDevice* getDeviceForAllocations() const override {return device;} + + SAllocation allocate(const SAllocateInfo& info) override + { + auto retval = device->allocate(info); + // map what is mappable by default so ReBAR checks succeed + if (retval.isValid() && retval.memory->isMappable()) + retval.memory->map({.offset=0,.length=info.size}); + return retval; + } + + ILogicalDevice* device; + } myalloc; + myalloc.device = device; + inputs.allocator = &myalloc; + + // TODO: construct the TLASes + core::vector tmpTLASes; + { + std::get>(inputs.assets) = tmpBuffers; + std::get>(inputs.assets) = tmpTLASes; + } + + CAssetConverter::SReserveResult reservation = converter->reserve(inputs); + { + bool success = true; + auto check = [&](const CAssetConverter::SInputs::asset_span_t references)->void + { + auto objects = reservation.getGPUObjects(); + auto referenceIt = references.begin(); + for (auto& object : objects) + { + auto* reference = *(referenceIt++); + if (!reference) + continue; + + success = bool(object.value); + if (!success) + { + inputs.logger.log("Failed to convert a CPU object to GPU of type %s!",ILogger::ELL_ERROR,system::to_string(reference->getAssetType())); + return; + } + } + }; + check.template operator()(tmpBuffers); + if (!success) + return nullptr; + } + + // convert + { + smart_refctd_ptr scratchAlloc; + { + constexpr auto scratchUsages = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT|IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + + constexpr uint16_t MaxAlignment = 256; + constexpr uint64_t MinAllocationSize = 1024; + const auto scratchSize = core::alignUp(hlsl::max(reservation.getMaxASBuildScratchSize(false),MinAllocationSize),MaxAlignment); + + auto scratchBuffer = device->createBuffer({{.size=scratchSize,.usage=scratchUsages}}); + + auto reqs = scratchBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= device->getPhysicalDevice()->getDirectVRAMAccessMemoryTypeBits(); + + auto allocation = device->allocate(reqs,scratchBuffer.get(),IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + allocation.memory->map({.offset=0,.length=reqs.size}); + + scratchAlloc = make_smart_refctd_ptr( + SBufferRange{0ull,scratchSize,std::move(scratchBuffer)}, + core::allocator(), MaxAlignment, MinAllocationSize + ); + } + + constexpr auto CompBufferCount = 2; + + std::array,CompBufferCount> compBufs = {}; + std::array compBufInfos = {}; + { + constexpr auto RequiredFlags = IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT; + auto pool = device->createCommandPool(m_creation.computeQueue->getFamilyIndex(),RequiredFlags); + if (!pool || !pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs)) + { + inputs.logger.log("Failed to create Command Buffers for the Compute Queue!",ILogger::ELL_ERROR); + return nullptr; + } + compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i=0; icreateSemaphore(0u); + + // TODO: `SIntendedSubmitInfo transfer` as well, because of images + SIntendedSubmitInfo compute = {}; + compute.queue = m_creation.computeQueue; + compute.scratchCommandBuffers = compBufInfos; + compute.scratchSemaphore = { + .semaphore = compSema.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + }; + struct MyParams final : CAssetConverter::SConvertParams + { + inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + + uint8_t finalUser; + } cvtParam = {}; + cvtParam.utilities = m_creation.utilities.get(); + cvtParam.compute = &compute; + cvtParam.scratchForDeviceASBuild = scratchAlloc.get(); + cvtParam.finalUser = m_creation.graphicsQueue->getFamilyIndex(); + + auto future = reservation.convert(cvtParam); + if (future.copy()!=IQueue::RESULT::SUCCESS) + { + inputs.logger.log("Failed to await `CAssetConverter::SReserveResult::convert(...)` submission semaphore!",ILogger::ELL_ERROR); + return nullptr; + } + + const auto buffers = reservation.getGPUObjects(); + ubo = buffers[0].value; + for (uint8_t i=0; i& stRange)->void + { + stRange.range.buffer = stRange.range.size ? buffer:nullptr; + }; + params.sbts[i].raygen.buffer = buffer; + setSBTBuffer(params.sbts[i].miss); + setSBTBuffer(params.sbts[i].hit); + setSBTBuffer(params.sbts[i].callable); + } + } + } + + // write into DS + { + vector infos; + vector writes; + auto* const ds = params.sceneDS->getDescriptorSet(); + auto addWrite = [&](const uint32_t binding, IGPUDescriptorSet::SDescriptorInfo&& info)->void + { + writes.emplace_back() = { + .dstSet = ds, + .binding = binding, + .arrayElement = 0, + .count = 1, + .info = reinterpret_cast(infos.size()) + }; + infos.push_back(std::move(info)); + }; + addWrite(SceneDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(SSceneUniforms),.buffer=std::move(ubo)}); + // TODO: Envmap + // TODO: TLASes + // TODO: Samplers + // TODO: Sampled Images + // TODO: Envmap PDF + // TODO: Envmap Warp Map + for (auto& write : writes) + write.info = infos.data()+reinterpret_cast(write.info); + device->updateDescriptorSets(writes,{}); + } + +#if 0 + float m_maxAreaLightLuma; + // Resources used for envmap sampling + nbl::core::smart_refctd_ptr m_finalEnvmap; +#endif + + // + if (!params) + { + m_creation.logger.log("Failed to create a scene!",ILogger::ELL_ERROR); + return nullptr; + } + return core::smart_refctd_ptr(new CScene(std::move(params)),core::dont_grab); +} + + +auto CRenderer::render(CSession* session) -> SSubmit +{ + if (!session || !session->isInitialized()) + return {}; + const auto& sessionParams = session->getConstructionParams(); + auto* const device = getDevice(); + + if (m_frameIx>=SCachedConstructionParams::FramesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_construction.semaphore.get(), + .value = m_frameIx+1-SCachedConstructionParams::FramesInFlight + } + }; + if (device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return {}; + } + const auto resourceIx = m_frameIx % SCachedConstructionParams::FramesInFlight; + + auto* const cb = m_construction.commandBuffers[resourceIx].get(); + cb->getPool()->reset(); + if (!cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return {}; + + const auto* const scene = session->getConstructionParams().scene.get(); + const auto mode = sessionParams.mode; + const auto& sessionResources = session->getActiveResources(); + const auto* const pipeline = scene->getPipeline(mode); + + bool success; + // push constants + { + switch (mode) + { + case CSession::RenderMode::Debug: + { + SDebugPushConstants pc = {sessionResources.currentSensorState}; + success = cb->pushConstants(pipeline->getLayout(),hlsl::ShaderStage::ESS_ALL_RAY_TRACING,0,sizeof(pc),&pc); + break; + } + default: + getLogger().log("Unimplemented RenderMode::%s !",ILogger::ELL_ERROR,system::to_string(mode).c_str()); + return {}; + } + } + // bind pipelines + success = success && cb->bindRayTracingPipeline(pipeline); + { + const IGPUDescriptorSet* sets[2] = {sessionParams.scene->getDescriptorSet(),sessionResources.immutables.ds.get()}; + success = success && cb->bindDescriptorSets(EPBP_RAY_TRACING,pipeline->getLayout(),0,2,sets); + } + + const auto renderSize = sessionParams.uniforms.renderSize; + success = success && cb->traceRays(scene->getSBT(mode),renderSize.x,renderSize.y,sessionParams.type!=CSession::sensor_type_e::Env ? 1:6); + + if (success) + return SSubmit(this,cb); + else + return {}; +} + +IQueue::SSubmitInfo::SSemaphoreInfo CRenderer::SSubmit::operator()(std::span extraWaits) +{ + if (!cb || !cb->end()) + return {}; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = renderer->m_construction.semaphore.get(), + .value = ++renderer->m_frameIx, + .stageMask = stageMask + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = {{.cmdbuf=cb}}; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = extraWaits, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + if (renderer->getCreationParams().graphicsQueue->submit(infos)!=IQueue::RESULT::SUCCESS) + { + renderer->m_frameIx--; + return {}; + } + return rendered[0]; +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CScene.cpp b/40_PathTracer/src/renderer/CScene.cpp new file mode 100644 index 000000000..631d5b9b4 --- /dev/null +++ b/40_PathTracer/src/renderer/CScene.cpp @@ -0,0 +1,78 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/limits.hlsl" + +#include "renderer/CRenderer.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::video; + +// +smart_refctd_ptr CScene::createSession(const CSession::SCreationParams& _params) +{ + if (!_params) + return nullptr; + + const auto& constants = _params.sensor->constants; + const auto& dynDefaults = _params.sensor->dynamicDefaults; + const auto& mutDefaults = _params.sensor->mutableDefaults; + const auto& raygen = mutDefaults.raygen; + + CSession::SConstructionParams params = {std::move(_params)}; + params.scene = smart_refctd_ptr(this); + params.cropOffsets = {mutDefaults.cropOffsetX,mutDefaults.cropOffsetY}; + params.cropResolution = {mutDefaults.cropWidth,mutDefaults.cropHeight}; + params.type = raygen.getType(); + + const uint16_t2 renderSize(constants.width,constants.height); + assert(all(params.cropOffsets(mutDefaults.maxPathDepth,1,0x1u<(mutDefaults.russianRouletteDepth,1,maxPathDepth); + params.uniforms = { + .rcpPixelSize = promote(1.f)/float32_t2(renderSize), + .splatting = {}, // TODO + .renderSize = renderSize, + .lastCascadeIndex = static_cast(constants.cascadeCount-1), + .hideEnvironment = mutDefaults.hideEnvironment, + .lastPathDepth = static_cast(maxPathDepth-1), + .lastNoRussianRouletteDepth = static_cast(russianRouletteDepth-1) + }; + } + + // + params.initDynamics = { + .ndcToRay = {}, // TODO + .tMax = mutDefaults.farClip, + .minSPP = core::min(dynDefaults.samplesNeeded,16), // for later enhancement + .maxSPP = dynDefaults.samplesNeeded + }; + + // + { + const auto reciprocalKappa = 1.f/dynDefaults.kappa; + params.initResolveConstants = { + .rwmc = { + .initialEmin = dynDefaults.Emin, + .reciprocalBase = 1.f/mutDefaults.cascadeLuminanceBase, + .reciprocalKappa = reciprocalKappa, + .colorReliabilityFactor = hlsl::mix(mutDefaults.cascadeLuminanceBase,1.f,reciprocalKappa) + }, + .cascadeCount = constants.cascadeCount + }; + } + + return smart_refctd_ptr(new CSession(std::move(params)),dont_grab); +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp new file mode 100644 index 000000000..a20a8a7ae --- /dev/null +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -0,0 +1,306 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/CRenderer.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::video; + +// +bool CSession::init(video::IGPUCommandBuffer* cb) +{ + auto renderer = m_params.scene->getRenderer(); + auto& logger = renderer->getCreationParams().logger; + auto device = renderer->getDevice(); + + auto& immutables = m_active.immutables; + + // create the descriptors + core::vector infos; + core::vector writes; + { + auto addWrite = [&](const uint32_t binding, IGPUDescriptorSet::SDescriptorInfo&& info)->void + { + writes.emplace_back() = { + .binding = binding, + .arrayElement = 0, + .count = 1, + .info = reinterpret_cast(infos.size()) + }; + infos.push_back(std::move(info)); + }; + + // + auto dedicatedAllocate = [&](IDeviceMemoryBacked* memBacked, const std::string_view debugName)->bool + { + if (!memBacked) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),debugName.data()); + return false; + } + memBacked->setObjectDebugName(debugName.data()); + + auto mreqs = memBacked->getMemoryReqs(); + mreqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + if (!device->allocate(mreqs,memBacked,IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE).isValid()) + { + logger.log("Could not allocate memory for Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),debugName.data()); + return false; + } + return true; + }; + + // create UBO + { + IGPUBuffer::SCreationParams params = {}; + params.size = sizeof(m_params.uniforms); + using usage_flags_e = IGPUBuffer::E_USAGE_FLAGS; + params.usage = usage_flags_e::EUF_UNIFORM_BUFFER_BIT |usage_flags_e::EUF_TRANSFER_DST_BIT | usage_flags_e::EUF_INLINE_UPDATE_VIA_CMDBUF; + auto ubo = device->createBuffer(std::move(params)); + if (!dedicatedAllocate(ubo.get(),"Sensor UBO")) + return false; + // pipeline barrier in `reset` will take care of sync for this + cb->updateBuffer({.size=sizeof(m_params.uniforms),.buffer=ubo},&m_params.uniforms); + addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); + } + + const auto allowedFormatUsages = device->getPhysicalDevice()->getImageFormatUsagesOptimalTiling(); + auto createImage = [&]( + const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, + const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT|IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT + ) -> SImageWithViews + { + SImageWithViews retval = {}; + { + { + IGPUImage::SCreationParams params = {}; + params.type = IGPUImage::E_TYPE::ET_2D; + params.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + params.format = format; + params.extent.width = resolution[0]; + params.extent.height = resolution[1]; + params.extent.depth = 1; + params.mipLevels = 1; + params.arrayLayers = layers; + using image_usage_e = IGPUImage::E_USAGE_FLAGS; + params.usage = image_usage_e::EUF_TRANSFER_DST_BIT|extraUsages; + if (m_params.type==sensor_type_e::Env) + { + params.arrayLayers *= 6; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_CUBE_COMPATIBLE_BIT; + } + viewFormats.set(format); + if (viewFormats.count()>1) + { + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + params.flags |= IGPUImage::E_CREATE_FLAGS::ECF_EXTENDED_USAGE_BIT; + } + params.viewFormats = viewFormats; + retval.image = device->createImage(std::move(params)); + if (!dedicatedAllocate(retval.image.get(),debugName)) + return {}; + } + const auto& params = retval.image->getCreationParameters(); + for (uint8_t f=0; f(f); + const auto thisFormatUsages = static_cast>(allowedFormatUsages[viewFormat]); + auto view = device->createImageView({ + .subUsages = retval.image->getCreationParameters().usage & thisFormatUsages, + .image = retval.image, + .viewType = IGPUImageView::E_TYPE::ET_2D_ARRAY, + .format = viewFormat + }); + string viewDebugName = string(debugName)+" "+to_string(viewFormat)+" View"; + if (!view) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),viewDebugName.c_str()); + return {}; + } + view->setObjectDebugName(viewDebugName.c_str()); + retval.views[viewFormat] = std::move(view); + } + } + return retval; + }; + auto addImageWrite = [&](const uint32_t binding, const smart_refctd_ptr& view)->void + { + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = view; + info.info.image.imageLayout = IGPUImage::LAYOUT::GENERAL; + addWrite(binding,std::move(info)); + }; + immutables.scrambleKey = createImage("Scramble Key",E_FORMAT::EF_R32G32_UINT,promote(SSensorUniforms::ScrambleKeyTextureSize),1); + auto scrambleKeyView = immutables.scrambleKey.views[E_FORMAT::EF_R32G32_UINT]; + addImageWrite(SensorDSBindings::ScrambleKey,scrambleKeyView); + + // create the render-sized images + auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SImageWithViews + { + return createImage(debugName,format,m_params.uniforms.renderSize,std::forward(args)...); + }; + immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT,1); + auto sampleCountView = immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]; + addImageWrite(SensorDSBindings::SampleCount,sampleCountView); + immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); + auto rwmcCascadesView = immutables.rwmcCascades.views[E_FORMAT::EF_R32G32_UINT]; + addImageWrite(SensorDSBindings::RWMCCascades,rwmcCascadesView); + immutables.beauty = createScreenSizedImage("Beauty",E_FORMAT::EF_E5B9G9R9_UFLOAT_PACK32,1,std::bitset().set(E_FORMAT::EF_R32_UINT)); + addImageWrite(SensorDSBindings::Beauty,immutables.beauty.views[E_FORMAT::EF_R32_UINT]); + immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + auto albedoView = immutables.albedo.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Albedo,albedoView); + // Normal and Albedo should have used `EF_A2B10G10R10_SNORM_PACK32` but Nvidia doesn't support + immutables.normal = createScreenSizedImage("Normal",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + auto normalView = immutables.normal.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Normal,normalView); + immutables.motion = createScreenSizedImage("Motion",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + auto motionView = immutables.motion.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; + addImageWrite(SensorDSBindings::Motion,motionView); + immutables.mask = createScreenSizedImage("Mask",E_FORMAT::EF_R16_UNORM,1); + auto maskView = immutables.mask.views[E_FORMAT::EF_R16_UNORM]; + addImageWrite(SensorDSBindings::Mask,maskView); + // shorthand a little bit + addImageWrite(SensorDSBindings::AsSampledImages,scrambleKeyView); + writes.back().count = SensorDSBindingCounts::AsSampledImages; + { + const auto oldSize = infos.size(); + infos.resize(oldSize +SensorDSBindingCounts::AsSampledImages,infos.back()); + const auto viewInfos = infos.data()+oldSize-1; + using index_e = SensorDSBindings::SampledImageIndex; + viewInfos[uint8_t(index_e::ScrambleKey)].desc = scrambleKeyView; + viewInfos[uint8_t(index_e::SampleCount)].desc = sampleCountView; + viewInfos[uint8_t(index_e::RWMCCascades)].desc = rwmcCascadesView; + viewInfos[uint8_t(index_e::Beauty)].desc = immutables.beauty.views[E_FORMAT::EF_E5B9G9R9_UFLOAT_PACK32]; + viewInfos[uint8_t(index_e::Albedo)].desc = albedoView; + viewInfos[uint8_t(index_e::Normal)].desc = normalView; + viewInfos[uint8_t(index_e::Motion)].desc = motionView; + viewInfos[uint8_t(index_e::Mask)].desc = maskView; + } + } + + // create descriptor set + { + auto layout = renderer->getConstructionParams().sensorDSLayout; + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout.get(),1}); + immutables.ds = pool->createDescriptorSet(std::move(layout)); + const char* DebugName = "Sensor Descriptor Set"; + if (!immutables.ds) + { + logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),DebugName); + return false; + } + immutables.ds->setObjectDebugName(DebugName); + for (auto& write : writes) + { + write.dstSet = immutables.ds.get(); + write.info = infos.data()+reinterpret_cast(write.info); + } + if (!device->updateDescriptorSets(writes,{})) + { + logger.log("Failed to write Sensor \"%s\"'s \"%s\" in CSession::init()",ILogger::ELL_ERROR,m_params.name.c_str(),DebugName); + return false; + } + } + + if (!immutables || !reset(m_params.initDynamics,cb)) + { + logger.log("Could not Init Session for sensor \"%s\" failed to reset!",ILogger::ELL_ERROR,m_params.name.c_str()); + deinit(); + return false; + } + +// TODO: fill scramble Key with noise + + return true; +} + +bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) +{ + if (!isInitialized()) + return false; + + auto* const renderer = m_params.scene->getRenderer(); + auto* const device = renderer->getDevice(); + const auto& immutables = m_active.immutables; + + bool success = true; + // slam the barriers as big as possible, it wont happen frequently + using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; + core::vector before; + { + constexpr image_barrier_t beforeBase = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .srcAccessMask = ACCESS_FLAGS::NONE, // because we don't care about reading previously written values + .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .dstAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS + } + }, + .subresourceRange = {}, + .newLayout = IGPUImage::LAYOUT::GENERAL + }; + before.reserve(SensorDSBindingCounts::AsSampledImages); + + auto enqueueClear = [&before,beforeBase](const SImageWithViews& img)->void + { + auto& out = before.emplace_back(beforeBase); + out.image = img.image.get(); + out.subresourceRange = { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .levelCount = 1, + .layerCount = out.image->getCreationParameters().arrayLayers + }; + }; + enqueueClear(immutables.sampleCount); + enqueueClear(immutables.beauty); + enqueueClear(immutables.rwmcCascades); + enqueueClear(immutables.albedo); + enqueueClear(immutables.normal); + enqueueClear(immutables.motion); + enqueueClear(immutables.mask); + success = success && cb->pipelineBarrier(asset::EDF_NONE,{.imgBarriers=before}); + } + + { + IGPUCommandBuffer::SClearColorValue color; + memset(&color,0,sizeof(color)); + for (const auto& entry : before) + { + success = success && cb->clearColorImage(const_cast(entry.image),IGPUImage::LAYOUT::GENERAL,&color,1,&entry.subresourceRange); + } + } + + const SMemoryBarrier after[] = { + { + .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=after}); + + if (success) + m_active.prevSensorState = m_active.currentSensorState = newVal; + return success; +} + +bool CSession::update(const SSensorDynamics& newVal) +{ + if (!isInitialized()) + return false; + + m_active.prevSensorState = m_active.currentSensorState; + m_active.currentSensorState = newVal; + return true; +} + +} \ No newline at end of file diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp new file mode 100644 index 000000000..cca4bad40 --- /dev/null +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -0,0 +1,300 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/present/CWindowPresenter.h" +#include "renderer/shaders/session.hlsl" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::ui; +using namespace nbl::video; + +constexpr auto SessionImageWritingStages = PIPELINE_STAGE_FLAGS::CLEAR_BIT|PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + +constexpr IGPURenderpass::SCreationParams::SSubpassDependency CWindowPresenter::Dependencies[3] = +{ + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = SessionImageWritingStages, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT|ACCESS_FLAGS::STORAGE_WRITE_BIT, + // fragment shader that draws them + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + // the output to swapchain image + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // we only worry about next compute dispatch not overwriting our presented image + .dstStageMask = SessionImageWritingStages, + // but there are no writes from present to make available to it + .dstAccessMask = ACCESS_FLAGS::NONE + // swapchain present of image index I synchronises with the next acquire of image I so no need to worry about the reuse of that + // note that there's no extra destination stages or accesses because they're not needed for a swapchain present + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd +}; + +// +smart_refctd_ptr CWindowPresenter::create(SCreationParams&& _params) +{ + if (!_params) + { + _params.logger.log("`CWindowPresenter::SCreationParams` are invalidl!",ILogger::ELL_ERROR); + return nullptr; + } + CWindowPresenter::SConstructorParams params = {std::move(_params),std::move(_params)}; + + { + const auto& primDpyInfo = params.winMgr->getPrimaryDisplayInfo(); + // subtract window border/decoration elements + params.maxResolution = hlsl::max(int32_t2(primDpyInfo.resX,primDpyInfo.resY)-int32_t2(32,32),int32_t2(0,0)); + // we add an additional constraint that any dimension of maxResolution cannot be less than any dimension of minResolution + // e.g. max resolution Height cannot be less than min resolution width + if (hlsl::any(hlsl::less()(params.maxResolution.xxyy,params.minResolution.xyxy))) + { + params.logger.log( + "`CWindowPresenter::create` desktop resolution must allow for at least a %d x %d window!", + ILogger::ELL_ERROR,params.minResolution.x,params.minResolution.y + ); + return nullptr; + } + params.aspectRatioRange[0] = float64_t(params.minResolution.x)/float64_t(params.maxResolution.y); + params.aspectRatioRange[1] = float64_t(params.maxResolution.x)/float64_t(params.minResolution.y); + } + + // create the window + smart_refctd_ptr window; + { + IWindow::SCreationParams winParams = {}; + winParams.width = 64; + winParams.height = 64; + winParams.x = 32; + winParams.y = 32; + winParams.flags = IWindow::ECF_HIDDEN|IWindow::ECF_BORDERLESS|IWindow::ECF_RESIZABLE; + winParams.windowCaption = _params.initialWindowCaption; + winParams.callback = std::move(_params.callback); + window = params.winMgr->createWindow(std::move(winParams)); + } + if (!window) + { + params.logger.log("`CWindowPresenter::create` failed to create a window!",ILogger::ELL_ERROR); + return nullptr; + } + params.window = window.get(); + params.cursorControl = window->getCursorControl(); + + // create surface + { + auto surface = CSurfaceVulkanWin32::create(std::move(_params.api),move_and_static_cast(window)); + params.surface = surface_t::create(std::move(surface)); + } + if (!params.surface) + { + params.logger.log("`CWindowPresenter::create` failed to create a surface!",ILogger::ELL_ERROR); + return nullptr; + } + + return smart_refctd_ptr(new CWindowPresenter(std::move(params)),dont_grab); +} + +bool CWindowPresenter::init_impl(CRenderer* renderer) +{ + auto& logger = IPresenter::getCreationParams().logger; + auto* device = renderer->getDevice(); + + // create swapchain and its resources (renderpass, etc.) + { + ISurface* const tmp = getSurface(); + ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr(tmp)}; + if (!swapchainParams.deduceFormat(device->getPhysicalDevice())) + { + logger.log("Could not choose a Surface Format for the Swapchain!",ILogger::ELL_ERROR); + return false; + } + + auto scResources = std::make_unique(device,swapchainParams.surfaceFormat.format,Dependencies,IGPURenderpass::LOAD_OP::DONT_CARE); + if (!scResources || !scResources->getRenderpass()) + { + logger.log("Failed to create Renderpass!",ILogger::ELL_ERROR); + return false; + } + + if (!m_construction.surface->init(renderer->getCreationParams().graphicsQueue,std::move(scResources),swapchainParams.sharedParams)) + { + logger.log("Could not create Window & Surface or initialize the Surface!",ILogger::ELL_ERROR); + return false; + } + } + + // + auto* const assMan = IPresenter::getCreationParams().assMan.get(); + + // present pipeline layout + smart_refctd_ptr layout; + { + const SPushConstantRange pcRange[] = { + {.stageFlags=ShaderStage::ESS_FRAGMENT,.offset=0,.size=sizeof(m_pushConstants)} + }; + if (!(layout=device->createPipelineLayout(pcRange,renderer->getConstructionParams().sensorDSLayout))) + { + logger.log("`CWindowPresenter::create` failed to create Pipeline Layout!",ILogger::ELL_ERROR); + return false; + } + } + + // present pipeline + if (auto shader=renderer->loadPrecompiledShader<"present_default">(assMan,device,logger.get().get()); shader) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = shader.get(), + .entryPoint = "present_default" + }; + + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPln(assMan, device, logger.get().get()); + if (!fsTriProtoPln) { logger.log("`CWindowPresenter::create` failed to create Full Screen Triangle protopipeline or load its vertex shader!",ILogger::ELL_ERROR); return false; } + m_present = fsTriProtoPln.createPipeline(fragSpec, layout.get(), getRenderpass()); + + if (!m_present) + logger.log("`CWindowPresenter::create` failed to create Graphics Pipeline!",ILogger::ELL_ERROR); + } + else + { + logger.log("`CWindowPresenter::create` failed to load shader!",ILogger::ELL_ERROR); + return false; + } + + return bool(m_present); +} + +auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitInfo* p_currentImageAcquire) -> clock_t::time_point +{ + auto expectedPresent = clock_t::time_point::min(); // invalid value + if (!session) + return expectedPresent; + const auto& sessionParams = session->getConstructionParams(); + m_pushConstants.isCubemap = sessionParams.type==CSession::sensor_type_e::Env; + + const auto maxResolution = m_construction.maxResolution; + uint16_t2 targetResolution = m_pushConstants.isCubemap ? maxResolution:sessionParams.uniforms.renderSize; + if (m_pushConstants.isCubemap) + { + // TODO: build default perspective projection matrix given aspect ratio and smaller axis (or diagonal) FOV of the viewer +// m_pushConstants.cubemap.invProjView = ; + } + else + { + m_pushConstants.regular._min = float32_t2(sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; + m_pushConstants.regular._max = float32_t2(sessionParams.cropResolution+sessionParams.cropOffsets)*sessionParams.uniforms.rcpPixelSize; + const double originalAspectRatio = float64_t(targetResolution.x)/float64_t(targetResolution.y); + // prevent extreme window size + const auto minResolution = m_creation.minResolution; + double scaleDown = 1.0; + for (uint8_t i=0; i<2; i++) + scaleDown = hlsl::min(float64_t(maxResolution[i])/float64_t(targetResolution[i]),scaleDown); + targetResolution = float64_t2(targetResolution)*scaleDown; + // pad artificially + m_pushConstants.regular.scale = {1,1}; + for (uint8_t i=0; i<2; i++) + { + const auto tmp = float64_t(minResolution[i])/float64_t(targetResolution[i]); + if (tmp>1.0) + targetResolution[i] = minResolution[i]; + } + // pad with darkness on the dimension thats too big + const double newAspectRatio = float64_t(targetResolution.x)/float64_t(targetResolution.y); + if (newAspectRatio>originalAspectRatio) + m_pushConstants.regular.scale[1] *= newAspectRatio/originalAspectRatio; + else + m_pushConstants.regular.scale[0] *= originalAspectRatio/newAspectRatio; + // `CWindowPresenter::create` aspect ratio ranges and min/max relationships help us stay valid + assert(all(minResolution<=targetResolution)&&all(targetResolution<=maxResolution)); + } + + // handle session resolution change + auto& winMgr = m_creation.winMgr; + auto* const window = m_construction.window; + if (const uint16_t2 currentResolution={window->getWidth(),window->getHeight()}; currentResolution!=targetResolution) + { + if (!winMgr->setWindowSize(window,targetResolution.x,targetResolution.y)) + return expectedPresent; + m_construction.surface->recreateSwapchain(); + } + if (window->isHidden()) + winMgr->show(window); + + m_pushConstants.layer = 0; // TODO: cubemaps and RWMC debug + m_pushConstants.imageIndex = uint8_t(SensorDSBindings::SampledImageIndex::Albedo); + + auto acquireResult = m_construction.surface->acquireNextImage(); + *p_currentImageAcquire = {.semaphore=acquireResult.semaphore,.value=acquireResult.acquireCount}; + m_currentImageIndex = acquireResult.imageIndex; + if (!acquireResult) + return expectedPresent; + + // TODO: Do this properly with present timing extension and a better oracle + expectedPresent = clock_t::now() + std::chrono::microseconds(16666); + + return expectedPresent; +} + +bool CWindowPresenter::beginRenderpass_impl() +{ + auto* const scRes = getSwapchainResources(); + auto* const framebuffer = scRes->getFramebuffer(m_currentImageIndex); + const uint16_t2 resolution = { framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height}; + + auto* const cb = getCurrentCmdBuffer(); + bool success = cb->beginDebugMarker("Present"); + const SViewport viewport[] = {{ + .x = 0u, .y = 0u, + .width = static_cast(resolution.x), + .height = static_cast(resolution.y), + .minDepth = 1.f, .maxDepth = 0.f + }}; + success = success && cb->setViewport(viewport,0); + { + const VkRect2D defaultScisors[] = {{ + .offset = {static_cast(viewport->x), static_cast(viewport->y)}, + .extent = {resolution.x,resolution.y} + }}; + success = success && cb->setScissor(defaultScisors); + const VkRect2D currentRenderArea = {.offset = {0,0}, .extent = defaultScisors->extent}; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = framebuffer, + .colorClearValues = nullptr, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + success = success && cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + + success = success && cb->bindGraphicsPipeline(m_present.get()); + + const auto* layout = m_present->getLayout(); + { + const auto* ds = getCurrentSessionDS(); + success = success && cb->bindDescriptorSets(EPBP_GRAPHICS,layout,0,1u,&ds); + } + success = success && cb->pushConstants(layout,ShaderStage::ESS_FRAGMENT,0,sizeof(m_pushConstants),&m_pushConstants); + ext::FullScreenTriangle::recordDrawCall(cb); + + success = success && cb->endDebugMarker(); + return success; +} + +} diff --git a/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp new file mode 100644 index 000000000..4646684b1 --- /dev/null +++ b/40_PathTracer/src/renderer/resolve/CBasicRWMCResolver.cpp @@ -0,0 +1,76 @@ +// Copyright (C) 2025-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "renderer/resolve/CBasicRWMCResolver.h" + +namespace nbl::this_example +{ +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::hlsl; +using namespace nbl::ui; +using namespace nbl::video; + +// +smart_refctd_ptr CBasicRWMCResolver::create(SCreationParams&& _params) +{ + auto logger = _params.renderer->getLogger(); + if (!_params) + { + logger.log("`CBasicRWMCResolver::SCreationParams` are invalid!",ILogger::ELL_ERROR); + return nullptr; + } + CBasicRWMCResolver::SConstructorParams params = {std::move(_params)}; + + auto* const device = _params.renderer->getDevice(); + { + const SPushConstantRange pcRange[] = { + {.stageFlags=ShaderStage::ESS_COMPUTE,.offset=0,.size=sizeof(SResolveConstants)} + }; + if (!(params.layout=device->createPipelineLayout(pcRange,_params.renderer->getConstructionParams().sensorDSLayout))) + { + logger.log("`CBasicRWMCResolver::create` failed to create Pipeline Layout!",ILogger::ELL_ERROR); + return nullptr; + } + } + + // TODO: create all the pipelines! + + return smart_refctd_ptr(new CBasicRWMCResolver(std::move(params)),dont_grab); +} + +bool CBasicRWMCResolver::changeSession_impl() +{ + return true; +} + +bool CBasicRWMCResolver::resolve(video::IGPUCommandBuffer* cb, video::IGPUBuffer* scratch) +{ + if (!cb) + return false; + + switch (m_activeSession->getConstructionParams().mode) + { + case CSession::RenderMode::Previs: [[fallthrough]]; + case CSession::RenderMode::Debug: + return true; // do nothing + case CSession::RenderMode::Beauty: + break; + default: + return false; + } + + const auto* const layout = m_construction.layout.get(); + { + constexpr auto raytracingStages = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto firstResolveStage = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + // TODO: pipeline barrier from raytracing pipeline to first resolve pass + } + + // compute passes + + return false; // TODO: uimplemented yet +} + +} \ No newline at end of file diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt deleted file mode 100644 index 52e8e83f2..000000000 --- a/50.IESProfileTest/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" nlohmann_json::nlohmann_json) - -add_dependencies(${EXECUTABLE_NAME} nlohmann_json::nlohmann_json) -target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file diff --git a/50.IESProfileTest/compute/cdc.comp b/50.IESProfileTest/compute/cdc.comp deleted file mode 100644 index 390d63acb..000000000 --- a/50.IESProfileTest/compute/cdc.comp +++ /dev/null @@ -1,260 +0,0 @@ -#version 430 core - -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "common.h" - -layout(local_size_x = WORKGROUP_DIMENSION, local_size_y = WORKGROUP_DIMENSION) in; - -layout(set = 0, binding = 0, r16) restrict uniform image2D outIESCandelaImage; -layout(set = 0, binding = 1, rg32f) restrict uniform image2D outSphericalCoordinatesImage; -layout(set = 0, binding = 2, rgba32f) restrict uniform image2D outOUVProjectionDirectionImage; -layout(set = 0, binding = 3, rg8) restrict uniform image2D outPassTMask; - -layout(std430, set = 0, binding = 4) readonly buffer HorizontalAngles -{ - double hAngles[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer VerticalAngles -{ - double vAngles[]; -}; - -layout(std430, set = 0, binding = 6) readonly buffer Data -{ - double data[]; -}; - -layout(push_constant) uniform PushConstants -{ - float maxIValue; - float zAngleDegreeRotation; - uint mode; - uint dummy; -} pc; - -vec3 octahedronUVToDir(vec2 uv) -{ - vec3 position = vec3((uv * 2.0 - 1.0).xy, 0.0); - vec2 absP = vec2(abs(position.x), abs(position.y)); - - position.z = 1.0 - absP.x - absP.y; - - if (position.z < 0.0) - { - position.x = sign(position.x) * (1.0 - absP.y); - position.y = sign(position.y) * (1.0 - absP.x); - } - - // rotate position vector around Z-axis with "pc.zAngleDegreeRotation" - if(pc.zAngleDegreeRotation != 0.0) - { - float rDegree = pc.zAngleDegreeRotation; - - const float zAngleRadians = float(rDegree * M_PI / 180.0); - const float cosineV = cos(zAngleRadians); - const float sineV = sin(zAngleRadians); - - position = vec3(cosineV * position.x - cosineV * position.y, sineV * position.x + sineV * position.y, position.z); - //position = vec3((cosineV * position.x) - (sineV * position.y), (cosineV * position.x) + (sineV * position.y), position.z); - } - - return normalize(position); -} - -//! Returns spherical coordinates with physics convention in radians -/* - https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg - Retval.x is "theta" polar angle in range [0, PI] & Retval.y "phi" is azimuthal angle - in range [-PI, PI] range -*/ - -vec2 sphericalDirToRadians(vec3 direction) -{ - double theta = acos(clamp(direction.z/length(direction), -1.0, 1.0)); - double phi = atan(direction.y, direction.x); - - return vec2(theta, phi); -} - -uint implGetVUB(const float angle) -{ - const uint len = vAngles.length(); - - for(uint i = 0; i < len; ++i) - if(vAngles[i] > angle) - return i; - - return len; -} - -uint implGetHUB(const float angle) -{ - const uint len = hAngles.length(); - - for(uint i = 0; i < len; ++i) - if(hAngles[i] > angle) - return i; - - return len; -} - -uint getVLB(const float angle) -{ - return uint(max(int(implGetVUB(angle)) - 1, 0)); -} - -uint getHLB(const float angle) -{ - return uint(max(int(implGetHUB(angle)) - 1, 0)); -} - -uint getVUB(const float angle) -{ - return uint(min(int(implGetVUB(angle)), int(vAngles.length()) - 1)); -} - -uint getHUB(const float angle) -{ - return uint(min(int(implGetHUB(angle)), int(hAngles.length()) - 1)); -} - -double getValue(uint i, uint j) -{ - return data[vAngles.length() * i + j]; -} - -// symmetry -#define ISOTROPIC 0u -#define QUAD_SYMETRIC 1u -#define HALF_SYMETRIC 2u -#define NO_LATERAL_SYMMET 3u - -uint getSymmetry() // TODO: to reduce check time we could pass it with PCs -{ - const uint hALength = hAngles.length(); - if(hALength < 2) // careful here, somebody can break it by feeding us with too much data by mistake - return ISOTROPIC; - - const double hABack = hAngles[hALength - 1]; - - if(hABack == 90) - return QUAD_SYMETRIC; - else if(hABack == 180) // note that OTHER_HALF_SYMMETRIC = HALF_SYMETRIC here - return HALF_SYMETRIC; - else - return NO_LATERAL_SYMMET; -} - -float wrapPhi(const float phi, const uint symmetry) //! wrap phi spherical coordinate compoment to range defined by symmetry -{ - switch (symmetry) - { - case ISOTROPIC: - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float wrapPhi = abs(phi); //! first MIRROR - - if(wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - return abs(phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: - { - if(phi < 0) - return phi + 2.0 * M_PI; - else - return phi; - } - } - - return 69; -} - -double sampleI(const vec2 sphericalCoordinates, const uint symmetry) -{ - const float vAngle = degrees(sphericalCoordinates.x), hAngle = degrees(wrapPhi(sphericalCoordinates.y, symmetry)); - - double vABack = vAngles[vAngles.length() - 1]; - double hABack = hAngles[hAngles.length() - 1]; - - if (vAngle > vABack) - return 0.0; - - // bilinear interpolation - uint j0 = getVLB(vAngle); - uint j1 = getVUB(vAngle); - uint i0 = symmetry == ISOTROPIC ? 0 : getHLB(hAngle); - uint i1 = symmetry == ISOTROPIC ? 0 : getHUB(hAngle); - - double uReciprocal = i1 == i0 ? 1.0 : 1.0 / (hAngles[i1] - hAngles[i0]); - double vReciprocal = j1 == j0 ? 1.0 : 1.0 / (vAngles[j1] - vAngles[j0]); - - double u = (hAngle - hAngles[i0]) * uReciprocal; - double v = (vAngle - vAngles[j0]) * vReciprocal; - - double s0 = getValue(i0, j0) * (1.0 - v) + getValue(i0, j1) * (v); - double s1 = getValue(i1, j0) * (1.0 - v) + getValue(i1, j1) * (v); - - return s0 * (1.0 - u) + s1 * u; -} - -//! Checks if (x,y) /in [0,PI] x [-PI,PI] product -/* - IES vertical range is [0, 180] degrees - and horizontal range is [0, 360] degrees - but for easier computations (MIRROR & MIRROW_REPEAT operations) - we represent horizontal range as [-180, 180] given spherical coordinates -*/ - -bool isWithinSCDomain(vec2 point) -{ - const vec2 lb = vec2(0, -M_PI); - const vec2 ub = vec2(M_PI, M_PI); - - return all(lessThanEqual(lb, point)) && all(lessThanEqual(point, ub)); -} - -void main() -{ - const ivec2 destinationSize = imageSize(outIESCandelaImage); - const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); - - const float VERTICAL_INVERSE = 1.0f / float(destinationSize.x); - const float HORIZONTAL_INVERSE = 1.0f / float(destinationSize.y); - - if (all(lessThan(pixelCoordinates, destinationSize))) - { - const vec2 uv = vec2((float(pixelCoordinates.x) + 0.5) * VERTICAL_INVERSE, (float(pixelCoordinates.y) + 0.5) * HORIZONTAL_INVERSE); - const vec3 direction = octahedronUVToDir(uv); - const vec2 sphericalCoordinates = sphericalDirToRadians(direction); // third radius spherical compoment is normalized and skipped - - const double intensity = sampleI(sphericalCoordinates, getSymmetry()); - const vec4 value = vec4(intensity / pc.maxIValue, 0, 0, 0); - - const double normD = length(direction); - vec2 mask; - - if(1.0 - QUANT_ERROR_ADMISSIBLE <= normD && normD <= 1.0 + QUANT_ERROR_ADMISSIBLE) - mask.x = 1.0; // pass - else - mask.x = 0; - - if(isWithinSCDomain(sphericalCoordinates)) - mask.y = 1.0; // pass - else - mask.y = 0; - - imageStore(outIESCandelaImage, pixelCoordinates, value); - imageStore(outSphericalCoordinatesImage, pixelCoordinates, vec4(sphericalCoordinates, 0, 1)); - imageStore(outOUVProjectionDirectionImage, pixelCoordinates, vec4(direction.xyz, 1)); - imageStore(outPassTMask, pixelCoordinates, vec4(mask.xy, 1, 1)); - } -} \ No newline at end of file diff --git a/50.IESProfileTest/compute/common.h b/50.IESProfileTest/compute/common.h deleted file mode 100644 index edbc94104..000000000 --- a/50.IESProfileTest/compute/common.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _COMMON_INCLUDED_ -#define _COMMON_INCLUDED_ - -#ifndef UINT16_MAX -#define UINT16_MAX 65535u // would be cool if we have this define somewhere or GLSL do -#endif -#define M_PI 3.1415926535897932384626433832795f // would be cool if we have this define somewhere or GLSL do -#define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do -#define QUANT_ERROR_ADMISSIBLE 1/1024 - -#define WORKGROUP_SIZE 256u -#define WORKGROUP_DIMENSION 16u - -#endif // _COMMON_INCLUDED_ diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json deleted file mode 100644 index d6b4ce528..000000000 --- a/50.IESProfileTest/inputs.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "directories": [ - "../media/mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" - ], - "files": [ - "../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", - "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", - "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", - "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", - "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" - ], - "gui": true, - "writeAssets": false -} \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp deleted file mode 100644 index 7aa640f67..000000000 --- a/50.IESProfileTest/main.cpp +++ /dev/null @@ -1,823 +0,0 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define BENCHMARK_TILL_FIRST_FRAME - -#include -#include -#include -#include "nbl/ext/ScreenShot/ScreenShot.h" -#include "compute/common.h" -#include - -// small hack to compile with the json library -namespace std -{ - int sprintf_s(char* buffer, size_t size, const char* format, ...) { - va_list args; - va_start(args, format); - int result = ::sprintf_s(buffer, size, format, args); - va_end(args); - return result; - } -} - -#include "nlohmann/json.hpp" - -using namespace nbl; -using namespace core; -using json = nlohmann::json; - -#ifdef BENCHMARK_TILL_FIRST_FRAME -const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); -bool stopBenchamrkFlag = false; -#endif - -class IESCompute -{ - public: - IESCompute(video::IVideoDriver* _driver, asset::IAssetManager* _assetManager, const std::vector& _assets) - : assets(_assets), driver(_driver), generalPurposeOffset(0), pushConstant({(float)getProfile(0).getMaxCandelaValue(), 0.f}) - { - createGPUEnvironment(_assetManager); - - fbo = createFBO(driver->getScreenSize().Width, driver->getScreenSize().Height); - } - ~IESCompute() {} - - enum E_MODE : uint32_t - { - EM_CDC, //! Candlepower Distribution Curve - EM_IES_C, //! IES Candela - EM_SPERICAL_C, //! Sperical coordinates - EM_DIRECTION, //! Sample direction - EM_PASS_T_MASK, //! Test mask - EM_SIZE - }; - - enum E_BINDINGS - { - EB_IMAGE_IES_C, //! Image with IES Candela data - EB_IMAGE_S, //! Image with spehircal coordinates data - EB_IMAGE_D, //! Image with direction data - EB_IMAGE_T_MASK,//! Image with test mask data - EB_SSBO_HA, //! IES Profile SSBO Horizontal Angles - EB_SSBO_VA, //! IES Profile SSBO Vertical Angles - EB_SSBO_D, //! IES Profile SSBO Data - EB_SIZE - }; - - const asset::CIESProfile& getProfile(const size_t& assetIndex) - { - return assets[assetIndex].getMetadata()->selfCast()->profile; - } - - const asset::CIESProfile& getActiveProfile() - { - return getProfile(generalPurposeOffset); - } - - void begin() - { - driver->setRenderTarget(fbo); - const float clear[4]{ 0.f,0.f,0.f,1.f }; - driver->clearColorBuffer(video::EFAP_COLOR_ATTACHMENT0, clear); - driver->beginScene(true, false, video::SColor(255, 0, 0, 0)); - } - - void dispatch() - { - auto& gpue = m_gpue; - - driver->bindComputePipeline(gpue.cPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, gpue.cPipeline->getLayout(), 0u, 1u, &gpue.cDescriptorSet.get(), nullptr); - driver->pushConstants(gpue.cPipeline->getLayout(), asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant), &pushConstant); - - const auto xGroups = (getActiveProfile().getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; - driver->dispatch(xGroups, xGroups, 1u); - - COpenGLExtensionHandler::extGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); - } - - void renderpass() - { - auto& gpue = m_gpue; - - driver->bindGraphicsPipeline(gpue.gPipeline.get()); - driver->bindDescriptorSets(video::EPBP_GRAPHICS, gpue.gPipeline->getLayout(), 3u, 1u, &gpue.gDescriptorSet.get(), nullptr); - driver->pushConstants(gpue.gPipeline->getLayout(), asset::ISpecializedShader::ESS_FRAGMENT, 0u, sizeof(PushConstant), &pushConstant); - driver->drawMeshBuffer(gpue.mBuffer.get()); - } - - void end() - { - driver->blitRenderTargets(fbo, nullptr, false, false); - driver->endScene(); - - #ifdef BENCHMARK_TILL_FIRST_FRAME - if (!stopBenchamrkFlag) - { - const std::chrono::steady_clock::time_point stopBenchmark = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(stopBenchmark - startBenchmark); - std::cout << "Time taken till first render pass: " << duration.count() << " milliseconds" << std::endl; - stopBenchamrkFlag = true; - } - #endif - } - - void updateZDegree(const asset::CIESProfile::IES_STORAGE_FORMAT& degreeOffset) - { - const auto& profile = getProfile(generalPurposeOffset); - const auto newDegreeRotation = std::clamp(pushConstant.zAngleDegreeRotation + degreeOffset, profile.getHoriAngles().front(), profile.getHoriAngles().back()); - pushConstant.zAngleDegreeRotation = newDegreeRotation; - } - - void updateGeneralPurposeOffset(const int8_t& offset) - { - const auto newOffset = std::clamp(int64_t(generalPurposeOffset) + int64_t(core::sign(offset)), int64_t(0), int64_t(assets.size() - 1)); - - if (newOffset != generalPurposeOffset) - { - generalPurposeOffset = newOffset; - - // not elegant way to do it here but lets leave it as it is - updateCDescriptorSets(); // flush descriptor set - updateGDescriptorSets(); // flush descriptor set - - const auto& profile = getActiveProfile(); - pushConstant.maxIValue = (float)profile.getMaxCandelaValue(); - } - } - - const asset::CIESProfile::IES_STORAGE_FORMAT getZDegree() - { - const auto& profile = getProfile(generalPurposeOffset); - return pushConstant.zAngleDegreeRotation + (profile.getSymmetry() == asset::CIESProfile::OTHER_HALF_SYMMETRIC ? 90.0 : 0.0); // real IES horizontal angle has 90.0 degress offset if OTHER_HALF_SYMMETRY, we handle it because of legacy IES 1995 specification case - } - - void updateMode(const E_MODE& mode) - { - pushConstant.mode = static_cast(mode); - } - - const auto& getMode() - { - return pushConstant.mode; - } - - private: - - void createGPUEnvironment(asset::IAssetManager* _assetManager) - { - auto gpuSpecializedShaderFromFile = [&](const char* path) - { - auto bundle = _assetManager->getAsset(path, {}); - auto shader = core::smart_refctd_ptr_static_cast(*bundle.getContents().begin()); - - return driver->getGPUObjectsFromAssets(&shader, &shader + 1u)->operator[](0); // omg - }; - - auto& gpue = m_gpue; - createGPUDescriptors(); - const auto initIdx = generalPurposeOffset; - - // Compute - { - const std::vector bindings = getCBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; - - gpue.cPipeline = driver->createGPUComputePipeline(nullptr, driver->createGPUPipelineLayout(&range, &range + 1u, core::smart_refctd_ptr(descriptorSetLayout)), gpuSpecializedShaderFromFile("../compute/cdc.comp")); - gpue.cDescriptorSet = driver->createGPUDescriptorSet(std::move(descriptorSetLayout)); - } - - { - for (auto i = 0; i < EB_SIZE; i++) - { - gpue.cwrites[i].dstSet = gpue.cDescriptorSet.get(); - gpue.cwrites[i].binding = i; - gpue.cwrites[i].arrayElement = 0u; - gpue.cwrites[i].count = 1u; - gpue.cwrites[i].info = &gpue.cinfos[i]; - } - - gpue.cwrites[EB_IMAGE_IES_C].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_S].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_D].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_IMAGE_T_MASK].descriptorType = asset::EDT_STORAGE_IMAGE; - gpue.cwrites[EB_SSBO_HA].descriptorType = asset::EDT_STORAGE_BUFFER; - gpue.cwrites[EB_SSBO_VA].descriptorType = asset::EDT_STORAGE_BUFFER; - gpue.cwrites[EB_SSBO_D].descriptorType = asset::EDT_STORAGE_BUFFER; - - updateCDescriptorSets(); - } - } - - // Graphics - { - const std::vector bindings = getGBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - - auto mesh = _assetManager->getGeometryCreator()->createRectangleMesh(vector2df_SIMD(1.0, 1.0)); - auto cpusphere = core::make_smart_refctd_ptr(nullptr, nullptr, mesh.bindings, std::move(mesh.indexBuffer)); - - cpusphere->setBoundingBox(mesh.bbox); - cpusphere->setIndexType(mesh.indexType); - cpusphere->setIndexCount(mesh.indexCount); - - auto vShader = gpuSpecializedShaderFromFile("../shader.vert"); - auto fShader = gpuSpecializedShaderFromFile("../shader.frag"); - - video::IGPUSpecializedShader* shaders[] = { vShader.get(), fShader.get() }; - asset::SRasterizationParams raster; - - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_FRAGMENT, 0u, sizeof(PushConstant) }; - gpue.gPipeline = driver->createGPURenderpassIndependentPipeline(nullptr, driver->createGPUPipelineLayout(&range, &range + 1u, nullptr, nullptr, nullptr, core::smart_refctd_ptr(descriptorSetLayout)), shaders, shaders + 2, mesh.inputParams, asset::SBlendParams{}, mesh.assemblyParams, raster); - gpue.gDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(descriptorSetLayout)); - gpue.mBuffer = driver->getGPUObjectsFromAssets(&cpusphere.get(), &cpusphere.get() + 1)->front(); - } - - auto createSampler = [&]() - { - return driver->createGPUSampler({ asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK,asset::ISampler::ETF_LINEAR,asset::ISampler::ETF_LINEAR,asset::ISampler::ESMM_LINEAR,0u,false,asset::ECO_ALWAYS }); - }; - - gpue.sampler = createSampler(); - - for (auto i = 0; i < gpue.NBL_D_IMAGES_AMOUNT; i++) - { - gpue.gwrites[i].dstSet = gpue.gDescriptorSet.get(); - gpue.gwrites[i].binding = i; - gpue.gwrites[i].count = 1u; - gpue.gwrites[i].arrayElement = 0u; - gpue.gwrites[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; - gpue.gwrites[i].info = gpue.ginfos + i; - } - - updateGDescriptorSets(); - } - } - - void createGPUDescriptors() - { - auto createCPUBuffer = [&](const auto& pInput) - { - core::smart_refctd_ptr buffer = core::make_smart_refctd_ptr(sizeof(asset::CIESProfile::IES_STORAGE_FORMAT) * pInput.size()); - memcpy(buffer->getPointer(), pInput.data(), buffer->getSize()); - - return buffer; - }; - - for(size_t i = 0; i < assets.size(); ++i) - { - const auto& profile = getProfile(i); - auto& cssbod = m_gpue.CSSBOD.emplace_back(); - - auto createGPUBuffer = [&](const auto& cpuBuffer) - { - return driver->createFilledDeviceLocalGPUBufferOnDedMem(cpuBuffer->getSize(), cpuBuffer->getPointer()); - }; - - cssbod.hAngles = createGPUBuffer(createCPUBuffer(profile.getHoriAngles())); - cssbod.vAngles = createGPUBuffer(createCPUBuffer(profile.getVertAngles())); - cssbod.data = createGPUBuffer(createCPUBuffer(profile.getData())); - - const auto optimalResolution = profile.getOptimalIESResolution(); - - cssbod.dImageIESC = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageS = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageD = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - cssbod.dImageTMask = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); - } - } - - void updateCDescriptorSets() - { - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_IES_C]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_S]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_D]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_T_MASK]); - - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_HA]); - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_VA]); - fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_D]); - - const core::smart_refctd_ptr proxy(m_gpue.cPipeline->getLayout()->getDescriptorSetLayout(0)); - m_gpue.cDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); - - for (auto i = 0; i < EB_SIZE; i++) - m_gpue.cwrites[i].dstSet = m_gpue.cDescriptorSet.get(); - - driver->updateDescriptorSets(EB_SIZE, m_gpue.cwrites, 0u, nullptr); - } - - void updateGDescriptorSets() - { - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_IES_C]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_S]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_D]); - fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_T_MASK]); - - const core::smart_refctd_ptr proxy(m_gpue.gPipeline->getLayout()->getDescriptorSetLayout(3)); - m_gpue.gDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); - - for (auto i = 0; i < m_gpue.NBL_D_IMAGES_AMOUNT; i++) - m_gpue.gwrites[i].dstSet = m_gpue.gDescriptorSet.get(); - - driver->updateDescriptorSets(m_gpue.NBL_D_IMAGES_AMOUNT, m_gpue.gwrites, 0u, nullptr); - } - - template - void fillSSBODescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) - { - static_assert(binding == EB_SSBO_HA || binding == EB_SSBO_VA || binding == EB_SSBO_D); - - const auto& profile = getProfile(assetIndex); - auto& cssbod = m_gpue.CSSBOD[assetIndex]; - - core::smart_refctd_ptr proxy; - - if constexpr (binding == EB_SSBO_HA) - proxy = core::smart_refctd_ptr(cssbod.hAngles); - else if (binding == EB_SSBO_VA) - proxy = core::smart_refctd_ptr(cssbod.vAngles); - else - proxy = core::smart_refctd_ptr(cssbod.data); - - info.desc = core::smart_refctd_ptr(proxy); - info.buffer = { 0, proxy->getSize() }; - } - - template - void fillImageDescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) - { - static_assert(binding == EB_IMAGE_IES_C || binding == EB_IMAGE_S || binding == EB_IMAGE_D || binding == EB_IMAGE_T_MASK); - - const auto& profile = getProfile(assetIndex); - auto& cssbod = m_gpue.CSSBOD[assetIndex]; - - core::smart_refctd_ptr proxy; - - if constexpr (binding == EB_IMAGE_IES_C) - proxy = core::smart_refctd_ptr(cssbod.dImageIESC); - else if (binding == EB_IMAGE_S) - proxy = core::smart_refctd_ptr(cssbod.dImageS); - else if (binding == EB_IMAGE_D) - proxy = core::smart_refctd_ptr(cssbod.dImageD); - else - proxy = core::smart_refctd_ptr(cssbod.dImageTMask); - - info.desc = core::smart_refctd_ptr(proxy); - info.image = { core::smart_refctd_ptr(m_gpue.sampler), asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - } - - template - auto createGPUImageView(const size_t& width, const size_t& height) - { - IGPUImage::SCreationParams imageInfo; - imageInfo.format = format; - imageInfo.type = IGPUImage::ET_2D; - imageInfo.extent.width = width; - imageInfo.extent.height = height; - imageInfo.extent.depth = 1u; - - imageInfo.mipLevels = 1u; - imageInfo.arrayLayers = 1u; - imageInfo.samples = asset::ICPUImage::ESCF_1_BIT; - imageInfo.flags = static_cast(0u); - - auto image = driver->createGPUImageOnDedMem(std::move(imageInfo), driver->getDeviceLocalGPUMemoryReqs()); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(image); - imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - return driver->createGPUImageView(std::move(imgViewInfo)); - } - - std::vector getCBindings() - { - std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_S, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_D, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_STORAGE_IMAGE, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_HA, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_VA, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr }, - { EB_SSBO_D, asset::EDT_STORAGE_BUFFER, 1, asset::ISpecializedShader::ESS_COMPUTE, nullptr } - }; - - return bindings; - } - - std::vector getGBindings() - { - const std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } - }; - - return bindings; - } - - template - video::IFrameBuffer* createFBO(const size_t& width, const size_t& height) - { - auto* fbo = driver->addFrameBuffer(); - - bBuffer = createGPUImageView(width, height); - fbo->attach(video::EFAP_COLOR_ATTACHMENT0, core::smart_refctd_ptr(bBuffer)); - - return fbo; - } - - const std::vector assets; - video::IVideoDriver* const driver; - - struct GPUE - { - _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; - - // Compute - core::smart_refctd_ptr cPipeline; - core::smart_refctd_ptr cDescriptorSet; - - IGPUDescriptorSet::SDescriptorInfo cinfos[EB_SIZE]; - IGPUDescriptorSet::SWriteDescriptorSet cwrites[EB_SIZE]; - - struct CSSBODescriptor - { - core::smart_refctd_ptr vAngles, hAngles, data; - core::smart_refctd_ptr dImageIESC, dImageS, dImageD, dImageTMask; - }; - - std::vector CSSBOD; - - // Graphics - core::smart_refctd_ptr gPipeline; - core::smart_refctd_ptr gDescriptorSet; - core::smart_refctd_ptr mBuffer; - - IGPUDescriptorSet::SDescriptorInfo ginfos[NBL_D_IMAGES_AMOUNT]; - IGPUDescriptorSet::SWriteDescriptorSet gwrites[NBL_D_IMAGES_AMOUNT]; - - // Shared data - core::smart_refctd_ptr sampler; - } m_gpue; - - #include "nbl/nblpack.h" - struct PushConstant - { - float maxIValue; - float zAngleDegreeRotation; - IESCompute::E_MODE mode = IESCompute::EM_CDC; - } PACK_STRUCT; - #include "nbl/nblunpack.h" - - PushConstant pushConstant; - - video::IFrameBuffer* fbo = nullptr; - core::smart_refctd_ptr bBuffer; - - size_t generalPurposeOffset = 0; -}; - -class IESExampleEventReceiver : public nbl::IEventReceiver -{ -public: - IESExampleEventReceiver() {} - - bool OnEvent(const nbl::SEvent& event) - { - if (event.EventType == nbl::EET_MOUSE_INPUT_EVENT) - { - zDegreeOffset = event.MouseInput.Wheel; - - return true; - } - - if (event.EventType == nbl::EET_KEY_INPUT_EVENT && !event.KeyInput.PressedDown) - { - switch (event.KeyInput.Key) - { - case nbl::KEY_UP: - { - generalPurposeOffset = 1; - return true; - } - case nbl::KEY_DOWN: - { - generalPurposeOffset = -1; - return true; - } - case nbl::KEY_KEY_C: - { - mode = IESCompute::EM_CDC; - return true; - } - case nbl::KEY_KEY_V: - { - mode = IESCompute::EM_IES_C; - return true; - } - case nbl::KEY_KEY_S: - { - mode = IESCompute::EM_SPERICAL_C; - return true; - } - case nbl::KEY_KEY_D: - { - mode = IESCompute::EM_DIRECTION; - return true; - } - case nbl::KEY_KEY_M: - { - mode = IESCompute::EM_PASS_T_MASK; - return true; - } - case nbl::KEY_KEY_Q: - { - running = false; - return true; - } - } - } - - return false; - } - - void reset() { zDegreeOffset = 0; generalPurposeOffset = 0; } - inline const auto& isRunning() const { return running; } - inline const auto& getMode() const { return mode; } - template - inline const auto& getZDegreeOffset() const { return static_cast(zDegreeOffset); } - inline const auto& getGeneralPurposeOffset() { return generalPurposeOffset; } -private: - double zDegreeOffset = 0.0; - int8_t generalPurposeOffset = 0; - IESCompute::E_MODE mode = IESCompute::EM_CDC; - bool running = true; -}; - -int main() -{ - nbl::SIrrlichtCreationParameters params; - params.Bits = 24; - params.ZBufferBits = 24; - params.DriverType = video::EDT_OPENGL; - params.WindowSize = dimension2d(640, 640); - params.Fullscreen = false; - params.Vsync = true; - params.Doublebuffer = true; - params.Stencilbuffer = false; - - auto device = createDeviceEx(params); - - if (!device) - return 1; - - auto* driver = device->getVideoDriver(); - auto* am = device->getAssetManager(); - - asset::IAssetLoader::SAssetLoadParams lparams; - lparams.loaderFlags; - - auto readJSON = [](const std::string& filePath) - { - std::ifstream file(filePath.data()); - if (!file.is_open()) { - printf("Invalid input json \"%s\" file! Aborting..", filePath.data()); - exit(0x45); - } - - std::stringstream buffer; - buffer << file.rdbuf(); - - return buffer.str(); - }; - - const auto INPUT_JSON_FILE_PATH_FS = std::filesystem::absolute("../inputs.json"); - const auto INPUT_JSON_FILE_PATH = INPUT_JSON_FILE_PATH_FS.string(); - const auto jsonBuffer = readJSON(INPUT_JSON_FILE_PATH); - if (jsonBuffer.empty()) { - printf("Read input json \"%s\" file is empty! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - const auto jsonMap = json::parse(jsonBuffer.c_str()); - - if (!jsonMap["directories"].is_array()) - { - printf("Input json \"%s\" file's field \"directories\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - if (!jsonMap["files"].is_array()) - { - printf("Input json \"%s\" file's field \"files\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - if (!jsonMap["writeAssets"].is_boolean()) - { - printf("Input json \"%s\" file's field \"writeAssets\" is not a boolean! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); - exit(0x45); - } - - const auto&& IES_INPUTS = [&]() - { - std::vector inputFilePaths; - - auto addFile = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS](const std::string_view filePath) -> void - { - auto path = std::filesystem::path(filePath); - - if (!path.is_absolute()) - path = std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / path); - - if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") - inputFilePaths.push_back(path.string()); - else - { - printf("Invalid input path \"%s\"! Aborting..\n", path.string().c_str()); - exit(0x45); - } - }; - - auto addFiles = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS, &addFile](const std::string_view directoryPath) -> void - { - auto directory(std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / directoryPath)); - if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) { - printf("Invalid input directory \"%s\"! Aborting..\n", directoryPath.data()); - exit(0x45); - } - - for (const auto& entry : std::filesystem::directory_iterator(directory)) - addFile(entry.path().string().c_str()); - }; - - // parse json - { - std::vector jDirectories; - jsonMap["directories"].get_to(jDirectories); - - for (const auto& it : jDirectories) - addFiles(it); - - std::vector jFiles; - jsonMap["files"].get_to(jFiles); - - for (const auto& it : jFiles) - addFile(it); - } - - return std::move(inputFilePaths); - }(); - - const bool GUI = [&]() - { - bool b = false; - jsonMap["gui"].get_to(b); - - return b; - }(); - - const bool WRITE_ASSETS = [&]() - { - bool b = false; - jsonMap["writeAssets"].get_to(b); - - return b; - }(); - - const auto ASSETS = [&]() - { - size_t loaded = {}, total = IES_INPUTS.size(); - std::vector assets; - std::vector outStems; - - for (size_t i = 0; i < total; ++i) - { - auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].c_str(), lparams); - const auto* path = IES_INPUTS[i].c_str(); - const auto stem = std::filesystem::path(IES_INPUTS[i].c_str()).stem().string(); - - if (asset.getMetadata()) - { - assets.emplace_back(std::move(asset)); - outStems.push_back(stem); - ++loaded; - } - else - printf("Could not load metadata from \"%s\" asset! Skipping..\n", path); - } - printf("Loaded [%s/%s] assets! Status: %s\n", std::to_string(loaded).c_str(), std::to_string(total).c_str(), loaded == total ? "PASSING" : "FAILING"); - - return std::make_pair(assets, outStems); - }(); - - if (GUI) - printf("GUI Mode: ON\n"); - else - { - printf("GUI Mode: OFF\nExiting..."); - exit(0); - } - - IESCompute iesComputeEnvironment(driver, am, ASSETS.first); - IESExampleEventReceiver receiver; - device->setEventReceiver(&receiver); - - auto getModeRS = [&]() - { - switch (iesComputeEnvironment.getMode()) - { - case IESCompute::EM_CDC: - return "CDC"; - case IESCompute::EM_IES_C: - return "IES Candela"; - case IESCompute::EM_SPERICAL_C: - return "Spherical Coordinates"; - case IESCompute::EM_DIRECTION: - return "Direction sample"; - case IESCompute::EM_PASS_T_MASK: - return "Pass Mask"; - default: - return "ERROR"; - } - }; - - auto getProfileRS = [&](const asset::CIESProfile& profile) - { - switch (profile.getSymmetry()) - { - case asset::CIESProfile::ISOTROPIC: - return "ISOTROPIC"; - case asset::CIESProfile::QUAD_SYMETRIC: - return "QUAD_SYMETRIC"; - case asset::CIESProfile::HALF_SYMETRIC: - return "HALF_SYMETRIC"; - case asset::CIESProfile::OTHER_HALF_SYMMETRIC: - return "OTHER_HALF_SYMMETRIC"; - case asset::CIESProfile::NO_LATERAL_SYMMET: - return "NO_LATERAL_SYMMET"; - default: - return "ERROR"; - } - }; - - while (device->run() && receiver.isRunning()) - { - iesComputeEnvironment.updateGeneralPurposeOffset(receiver.getGeneralPurposeOffset()); - iesComputeEnvironment.updateZDegree(receiver.getZDegreeOffset()); - iesComputeEnvironment.updateMode(receiver.getMode()); - - iesComputeEnvironment.begin(); - iesComputeEnvironment.dispatch(); - iesComputeEnvironment.renderpass(); - iesComputeEnvironment.end(); - - std::wostringstream windowCaption; - { - const auto* const mode = getModeRS(); - const auto* const profile = getProfileRS(iesComputeEnvironment.getActiveProfile()); - - windowCaption << "IES Demo - Nabla Engine - Profile: " << profile << " - Degrees: " << iesComputeEnvironment.getZDegree() << " - Mode: " << mode; - device->setWindowCaption(windowCaption.str()); - } - receiver.reset(); - } - - if(WRITE_ASSETS) - for (size_t i = 0; i < ASSETS.first.size(); ++i) - { - const auto& bundle = ASSETS.first[i]; - const auto& stem = ASSETS.second[i]; - - const auto& profile = bundle.getMetadata()->selfCast()->profile; - // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? ah touch required probably first - const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); - - asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); - - if (am->writeAsset(out.c_str(), wparams)) - printf("Saved \"%s\"\n", out.c_str()); - else - printf("Could not write \"%s\"\n", out.c_str()); - } - - return 0; -} \ No newline at end of file diff --git a/50.IESProfileTest/shader.frag b/50.IESProfileTest/shader.frag deleted file mode 100644 index ec7a00b8f..000000000 --- a/50.IESProfileTest/shader.frag +++ /dev/null @@ -1,71 +0,0 @@ -#version 430 core -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include - -layout (location = 0) in vec3 Pos; - -layout (location = 0) out vec4 outColor; - -layout(set = 3, binding = 0) uniform sampler2D inIESCandelaImage; -layout(set = 3, binding = 1) uniform sampler2D inSphericalCoordinatesImage; -layout(set = 3, binding = 2) uniform sampler2D inOUVProjectionDirectionImage; -layout(set = 3, binding = 3) uniform sampler2D inPassTMask; - -layout(push_constant) uniform PushConstants -{ - float maxIValue; - float zAngleDegreeRotation; - uint mode; - uint dummy; -} pc; - -#define M_PI 3.1415926536 - -float plot(float cand, float pct, float bold){ - return smoothstep( pct-0.005*bold, pct, cand) - - smoothstep( pct, pct+0.005*bold, cand); -} - -// vertical cut of IES (i.e. cut by plane x = 0) -float f(vec2 uv) { - return texture(inIESCandelaImage,nbl_glsl_IES_convert_dir_to_uv(normalize(vec3(uv.x, 0.001, uv.y)))).x; - // float vangle = (abs(atan(uv.x,uv.y)))/(M_PI); - // float hangle = uv.x <= 0.0 ? 0.0 : 1.0; - // return texture(inIESCandelaImage,vec2(hangle,vangle)).x; -} - -void main() -{ - vec2 ndc = Pos.xy; - vec2 uv = (ndc + 1) / 2; - - if(pc.mode == 0) - { - float dist = length(ndc)*1.015625; - vec3 col = vec3(plot(dist,1.0,0.75)); - - float normalizedStrength = f(ndc); - if (dist + + +NBL_EXPOSE_NAMESPACES + +namespace nbl::ext::imgui +{ + class UI; +} + +template +concept AppIESByteCount = std::unsigned_integral; + +template +concept AppIESContainer = std::ranges::sized_range && + (std::same_as, float> || + std::same_as, hlsl::ies::IESTextureInfo>); +static_assert(alignof(hlsl::ies::IESTextureInfo) == 4u, "IESTextureInfo must be 4 byte aligned"); + +template +concept AppIESBufferCreationAllowed = AppIESByteCount || AppIESContainer; + +class IESWindowedApplication : public virtual SimpleWindowedApplication +{ + using base_t = SimpleWindowedApplication; + +public: + constexpr static inline uint8_t MaxFramesInFlight = 3; + + template + IESWindowedApplication(const hlsl::uint16_t2 _initialResolution, const asset::E_FORMAT _depthFormat, Args&&... args) : + base_t(std::forward(args)...), m_initialResolution(_initialResolution), m_depthFormat(_depthFormat) {} + + using surface_list_t = decltype(std::declval().getSurfaces()); + + inline surface_list_t getSurfaces() const override + { + if (!m_surface) + { + auto windowCallback = make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = make_smart_refctd_ptr(); + params.width = m_initialResolution[0]; + params.height = m_initialResolution[1]; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_CAN_MINIMIZE | IWindow::ECF_CAN_MAXIMIZE | IWindow::ECF_CAN_RESIZE; + params.windowCaption = "IESViewer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + + if (m_surface) + return { {m_surface->getSurface()} }; + + return {}; + } + + inline bool onAppInitialized(core::smart_refctd_ptr&& system) override + { + using namespace nbl::core; + using namespace nbl::video; + if (!MonoSystemMonoLoggerApplication::onAppInitialized(std::move(system))) + return false; + + m_inputSystem = make_smart_refctd_ptr(system::logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + if (!base_t::onAppInitialized(std::move(system))) + return false; + + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + swapchainParams.sharedParams.imageUsage |= IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + auto scResources = std::make_unique(m_device.get(), m_depthFormat, swapchainParams.surfaceFormat.format, getDefaultSubpassDependencies()); + auto* renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + return true; + } + + inline void workLoopBody() override final + { + using namespace nbl::core; + using namespace nbl::video; + if (m_window && m_surface && !m_window->isMinimized()) + { + if (auto* scRes = m_surface->getSwapchainResources()) + { + if (auto* sc = scRes->getSwapchain()) + { + const auto& params = sc->getCreationParameters().sharedParams; + if (params.width != m_window->getWidth() || params.height != m_window->getHeight()) + { + m_surface->recreateSwapchain(); + return; + } + } + } + } + + const uint32_t framesInFlightCount = hlsl::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + if (m_framesInFlight.size() >= framesInFlightCount) + { + const ISemaphore::SWaitInfo framesDone[] = + { + { + .semaphore = m_framesInFlight.front().semaphore.get(), + .value = m_framesInFlight.front().value + } + }; + if (m_device->blockForSemaphores(framesDone) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + m_framesInFlight.pop_front(); + } + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { renderFrame(nextPresentationTimestamp) }; + onPostRenderFrame(rendered[0]); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + if (rendered->semaphore) + m_framesInFlight.emplace_back(smart_refctd_ptr(rendered->semaphore), rendered->value); + } + + inline bool keepRunning() override final + { + if (m_exitRequested) + return false; + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + m_inputSystem = nullptr; + m_device->waitIdle(); + m_framesInFlight.clear(); + m_surface = nullptr; + m_window = nullptr; + return base_t::onAppTerminated(); + } + +protected: + inline void onAppInitializedFinish() + { + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + } + inline const auto& getCurrentAcquire() const { return m_currentImageAcquire; } + inline void requestExit() { m_exitRequested = true; } + + virtual const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const = 0; + virtual video::IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) = 0; + virtual void onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) {} + + const hlsl::uint16_t2 m_initialResolution; + const asset::E_FORMAT m_depthFormat; + core::smart_refctd_ptr m_inputSystem; + core::smart_refctd_ptr m_window; + core::smart_refctd_ptr> m_surface; + +private: + struct SSubmittedFrame + { + core::smart_refctd_ptr semaphore; + uint64_t value; + }; + core::deque m_framesInFlight; + video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + video::CDumbPresentationOracle oracle; + bool m_exitRequested = false; +}; + +class IESViewer final : public IESWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = IESWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + static constexpr inline uint32_t AppWindowWidth = 669u * 2u; + static constexpr inline uint32_t AppWindowHeight = AppWindowWidth; + static constexpr inline asset::E_FORMAT AppDepthBufferFormat = asset::EF_UNKNOWN; + static constexpr inline const char* MediaEntry = "../../media"; + static constexpr inline const char* InputJsonFile = "../inputs.json"; + + IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD); + + bool onAppInitialized(smart_refctd_ptr&& system) override; + IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override; + +protected: + const IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override; + void onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) override; + +private: + smart_refctd_ptr m_graphicsPipeline; + smart_refctd_ptr m_computePipeline; + std::array, IGPUPipelineLayout::DESCRIPTOR_SET_COUNT> m_descriptors; + + bool m_running = true; + std::vector m_assets; + + size_t m_realFrameIx = 0; + smart_refctd_ptr m_semaphore; + std::array, device_base_t::MaxFramesInFlight> m_cmdBuffers; + + std::array, device_base_t::MaxFramesInFlight> m_frameBuffers2D, m_frameBuffers3D; + + smart_refctd_ptr m_scene; + smart_refctd_ptr m_renderer; + Camera camera; + uint32_t m_plot3DWidth = 640u; + uint32_t m_plot3DHeight = 640u; + float m_plotRadius = 100.0f; + bool m_ciMode = false; + bool m_ciScreenshotDone = false; + uint32_t m_ciFrameCounter = 0u; + static constexpr uint32_t CiFramesBeforeCapture = 10u; + system::path m_ciScreenshotPath; + std::vector m_assetLabels; + std::vector m_candelaDirty; + + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + struct { + smart_refctd_ptr it; + smart_refctd_ptr descriptor; + } ui; + + struct UIState + { + size_t activeAssetIx = 0; + float cameraMoveSpeed = 1.0f; + float cameraRotateSpeed = 1.0f; + float cameraFovDeg = 60.0f; + bool cameraControlEnabled = false; + bool cameraControlApplied = false; + bool wireframeEnabled = false; + bool showOctaMapPreview = true; + bool showHints = true; + bool plot2DRectValid = false; + hlsl::float32_t2 plot2DRectMin = hlsl::float32_t2(0.f, 0.f); + hlsl::float32_t2 plot2DRectMax = hlsl::float32_t2(0.f, 0.f); + + struct + { + IES::E_MODE view = IES::EM_CDC; + bitflag sphere = + bitflag(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE) | hlsl::this_example::ies::ESM_FALSE_COLOR; + } mode; + } uiState; + + void processMouse(const IMouseEventChannel::range_t& events); + void processKeyboard(const IKeyboardEventChannel::range_t& events); + + smart_refctd_ptr createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, + bitflag usage = bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_STORAGE_BIT, + bitflag aspectFlags = bitflag(IImage::EAF_COLOR_BIT)); + bool recreate3DPlotFramebuffers(uint32_t width, uint32_t height); + void applyWindowMode(); + bool parseCommandLine(); + + template + requires AppIESBufferCreationAllowed + smart_refctd_ptr createBuffer(const T& in, std::string name, bool unmap = true) + { + const void* src = nullptr; size_t bytes = {}; + if constexpr (AppIESByteCount) + bytes = static_cast(in); + else if (AppIESContainer) + { + using element_t = std::ranges::range_value_t; + static_assert(alignof(element_t) == 4u, "IESViewer::createBuffer: AppIESContainer's \"T\" must be 4 byte aligned"); + bytes = sizeof(element_t) * in.size(); + src = static_cast(std::data(in)); + } + return implCreateBuffer(src, bytes, name, unmap); + } + smart_refctd_ptr implCreateBuffer(const void* src, size_t bytes, const std::string& name, bool unmap); + + void uiListener(); +}; + +#endif // _THIS_EXAMPLE_APP_HPP_ diff --git a/50.IESViewer/AppEvent.cpp b/50.IESViewer/AppEvent.cpp new file mode 100644 index 000000000..cbd5ba042 --- /dev/null +++ b/50.IESViewer/AppEvent.cpp @@ -0,0 +1,62 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +void IESViewer::processMouse(const nbl::ui::IMouseEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + auto ev = *it; + + if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) + { + auto* cursorControl = m_window ? m_window->getCursorControl() : nullptr; + if (!cursorControl || !uiState.plot2DRectValid) + continue; + const auto cursor = cursorControl->getPosition(); + const float cursorX = static_cast(cursor.x); + const float cursorY = static_cast(cursor.y); + if (cursorX < uiState.plot2DRectMin.x || cursorX > uiState.plot2DRectMax.x || + cursorY < uiState.plot2DRectMin.y || cursorY > uiState.plot2DRectMax.y) + continue; + + auto& ies = m_assets[uiState.activeAssetIx]; + const auto& accessor = ies.getProfile()->getAccessor(); + + auto impulse = ev.scrollEvent.verticalScroll * 0.02f; + ies.zDegree = std::clamp(ies.zDegree + impulse, accessor.hAngles.front(), accessor.hAngles.back()); + } + } +} + +void IESViewer::processKeyboard(const nbl::ui::IKeyboardEventChannel::range_t& events) +{ + for (auto it = events.begin(); it != events.end(); it++) + { + const auto ev = *it; + + if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + { + if (ev.keyCode == nbl::ui::EKC_UP_ARROW) + uiState.activeAssetIx = std::clamp(uiState.activeAssetIx + 1, 0, m_assets.size() - 1u); + else if (ev.keyCode == nbl::ui::EKC_DOWN_ARROW) + uiState.activeAssetIx = std::clamp(uiState.activeAssetIx - 1, 0, m_assets.size() - 1u); + + auto& ies = m_assets[uiState.activeAssetIx]; + + if (ev.keyCode == nbl::ui::EKC_C) + uiState.mode.view = IES::EM_CDC; + else if (ev.keyCode == nbl::ui::EKC_V) + uiState.mode.view = IES::EM_OCTAHEDRAL_MAP; + else if (ev.keyCode == nbl::ui::EKC_ESCAPE && uiState.cameraControlEnabled) + uiState.cameraControlEnabled = false; + else if (ev.keyCode == nbl::ui::EKC_SPACE) + uiState.cameraControlEnabled = !uiState.cameraControlEnabled; + + if (ev.keyCode == nbl::ui::EKC_Q) + requestExit(); + } + } +} diff --git a/50.IESViewer/AppGPU.cpp b/50.IESViewer/AppGPU.cpp new file mode 100644 index 000000000..c9fa20e76 --- /dev/null +++ b/50.IESViewer/AppGPU.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +core::smart_refctd_ptr IESViewer::createImageView(const size_t width, const size_t height, E_FORMAT format, std::string name, bitflag usage, bitflag aspectFlags) +{ + IGPUImage::SCreationParams imageParams{}; + imageParams.type = IImage::E_TYPE::ET_2D; + imageParams.extent.height = height; + imageParams.extent.width = width; + imageParams.extent.depth = 1u; + imageParams.format = format; + imageParams.mipLevels = 1u; + imageParams.flags = IImage::ECF_NONE; + imageParams.arrayLayers = 1u; + imageParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + imageParams.usage = usage; + + auto image = m_device->createImage(std::move(imageParams)); + image->setObjectDebugName(name.c_str()); + + if (!image) + { + m_logger->log("Failed to create \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto allocation = m_device->allocate(image->getMemoryReqs(), image.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!allocation.isValid()) + { + m_logger->log("Failed to allocate device memory for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + IGPUImageView::SCreationParams viewParams{}; + viewParams.image = std::move(image); + viewParams.format = format; + viewParams.viewType = IGPUImageView::ET_2D; + viewParams.flags = IImageViewBase::ECF_NONE; + viewParams.subresourceRange.baseArrayLayer = 0u; + viewParams.subresourceRange.baseMipLevel = 0u; + viewParams.subresourceRange.layerCount = 1u; + viewParams.subresourceRange.levelCount = 1u; + viewParams.subresourceRange.aspectMask = aspectFlags; + + auto imageView = m_device->createImageView(std::move(viewParams)); + + if (not imageView) + m_logger->log("Failed to create image view for \"%s\" image!", system::ILogger::ELL_ERROR, name.c_str()); + + return imageView; +} + +core::smart_refctd_ptr IESViewer::implCreateBuffer(const void* src, size_t bytes, const std::string& name, bool unmap) +{ + IGPUBuffer::SCreationParams bufferParams = {}; + bufferParams.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT; + bufferParams.size = bytes; + + auto buffer = m_device->createBuffer(std::move(bufferParams)); + buffer->setObjectDebugName(name.c_str()); + + if (not buffer) + { + m_logger->log("Failed to create \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto memoryReqs = buffer->getMemoryReqs(); + + if (m_utils) + memoryReqs.memoryTypeBits &= m_utils->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = m_device->allocate(memoryReqs, buffer.get(), core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT)); + if (not allocation.isValid()) + { + m_logger->log("Failed to allocate \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + auto* mappedPointer = allocation.memory->map({ 0ull, memoryReqs.size }, IDeviceMemoryAllocation::EMCAF_READ_AND_WRITE); + + if (not mappedPointer) + { + m_logger->log("Failed to map device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + if(src) + memcpy(mappedPointer, src, buffer->getSize()); + + if(unmap) + if (not allocation.memory->unmap()) + { + m_logger->log("Failed to unmap device memory for \"%s\" buffer!", ILogger::ELL_ERROR, name.c_str()); + return nullptr; + } + + return buffer; +} \ No newline at end of file diff --git a/50.IESViewer/AppInit.cpp b/50.IESViewer/AppInit.cpp new file mode 100644 index 000000000..79338e8ec --- /dev/null +++ b/50.IESViewer/AppInit.cpp @@ -0,0 +1,654 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "argparse/argparse.hpp" +#include "App.hpp" +#include +#include +#include +#include +#include "AppInputParser.hpp" +#include "app_resources/common.hlsl" +#include "app_resources/imgui.opts.hlsl" +#include "nbl/ext/ImGui/ImGui.h" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +bool IESViewer::parseCommandLine() +{ + argparse::ArgumentParser parser("IESViewer"); + parser.add_argument("--ci") + .help("Run in CI mode: capture a screenshot after a few frames and exit.") + .default_value(false) + .implicit_value(true); + + try + { + parser.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& e) + { + if (m_logger) + m_logger->log("Failed to parse arguments: %s", system::ILogger::ELL_ERROR, e.what()); + return false; + } + + m_ciMode = parser.get("--ci"); + if (m_ciMode) + m_ciScreenshotPath = localOutputCWD / "iesviewer_ci.png"; + return true; +} + +bool IESViewer::onAppInitialized(smart_refctd_ptr&& system) +{ + if (!parseCommandLine()) + return false; + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + const auto media = absolute(path(MediaEntry)); + + AppInputParser::Output out; + AppInputParser parser(system::logger_opt_ptr(m_logger.get())); + if (!parser.parse(out, InputJsonFile, media.string())) + return false; + + m_logger->log("Loading IES m_assets..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + size_t loaded = {}, total = out.inputList.size(); + IAssetLoader::SAssetLoadParams lp = {}; + lp.loaderFlags = IAssetLoader::E_LOADER_PARAMETER_FLAGS::ELPF_LOAD_METADATA_ONLY; + lp.logger = system::logger_opt_ptr(m_logger.get()); + + for (const auto& in : out.inputList) + { + auto asset = m_assetMgr->getAsset(in.c_str(), lp); + + if (asset.getMetadata()) + { + auto& ies = m_assets.emplace_back(); + ies.bundle = std::move(asset); + ies.key = path(in).lexically_relative(media).string(); + ++loaded; + + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, in.c_str()); + } + else + m_logger->log("Failed to load metadata for \"%s\"! Skipping..", system::ILogger::ELL_WARNING, in.c_str()); + } + const auto sl = std::to_string(loaded), st = std::to_string(total); + const bool passed = loaded == total; + + if (not passed) + { + auto diff = std::to_string(total - loaded); + m_logger->log("Failed to load [%s/%s] IES m_assets!", system::ILogger::ELL_ERROR, diff.c_str(), st.c_str()); + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading IES m_assets, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + { + m_assetLabels.clear(); + m_assetLabels.reserve(m_assets.size()); + for (const auto& ies : m_assets) + m_assetLabels.emplace_back(path(ies.key).filename().string()); + } + m_candelaDirty.assign(m_assets.size(), true); + + m_logger->log("Creating GPU IES resources..", system::ILogger::ELL_INFO); + { + auto start = std::chrono::high_resolution_clock::now(); + + auto textureInfos = createBuffer(m_assets.size() * sizeof(hlsl::ies::IESTextureInfo), "IES Textures Info", false); + if(!textureInfos) return false; + auto* textureInfosMapped = static_cast(textureInfos->getBoundMemory().memory->getMappedPointer()); + + for (size_t i = 0u; i < m_assets.size(); ++i) + { + auto& ies = m_assets[i]; + const auto* profile = ies.getProfile(); + const auto& accessor = profile->getAccessor(); + const auto& resolution = accessor.properties.optimalIESResolution; + textureInfosMapped[i] = CIESProfile::texture_t::create(accessor.properties.maxCandelaValue, resolution).info; + ies.buffers.textureInfo.buffer = textureInfos; + ies.buffers.textureInfo.offset = i * sizeof(hlsl::ies::IESTextureInfo); + + #define CREATE_VIEW(VIEW, FORMAT, NAME) \ + if (!(VIEW = createImageView(resolution.x, resolution.y, FORMAT, NAME + ies.key) )) return false; + + // Filled later by the compute pass (CdcCS) when candela data is marked dirty. + CREATE_VIEW(ies.views.candelaOctahedralMap, asset::EF_R16_UNORM, "IES Candela Octahedral Map Image: ") + + #define CREATE_BUFFER(BUFFER, DATA, NAME) \ + if (!(BUFFER = createBuffer(DATA, NAME + ies.key) )) return false; + + CREATE_BUFFER(ies.buffers.vAngles, accessor.vAngles, "IES Vertical Angles Buffer: ") + CREATE_BUFFER(ies.buffers.hAngles, accessor.hAngles, "IES Horizontal Angles Buffer: ") + CREATE_BUFFER(ies.buffers.data, accessor.data, "IES Data Buffer: ") + } + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished creating GPU IES resources, took %s seconds.", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + auto createShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = system::logger_opt_ptr(m_logger.get()); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto m_assets = assetBundle.getContents(); + + if (m_assets.empty()) + { + m_logger->log("Failed to load \"%s\" shader!", system::ILogger::ELL_ERROR, key.data()); + return nullptr; + } + + auto spirvShader = IAsset::castDown(m_assets[0]); + + if (spirvShader) + m_logger->log("Loaded \"%s\".", system::ILogger::ELL_INFO, key.data()); + else + m_logger->log("Failed to cast \"%s\" asset to IShader!", system::ILogger::ELL_ERROR, key.data()); + + return spirvShader; + }; + + #define CREATE_SHADER(SHADER, PATH) \ + if (!(SHADER = createShader.template operator()() )) return false; + + m_logger->log("Loading GPU shaders..", system::ILogger::ELL_INFO); + + struct + { + smart_refctd_ptr ies, imgui, fullScreenTriangleVS; + } shaders; + { + auto start = std::chrono::high_resolution_clock::now(); + CREATE_SHADER(shaders.ies, "ies.unified") + CREATE_SHADER(shaders.imgui, "imgui.unified") + shaders.fullScreenTriangleVS = ext::FullScreenTriangle::ProtoPipeline::createDefaultVertexShader(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!shaders.fullScreenTriangleVS) + return logFail("Failed to create FullScreenTriangle vertex shader!"); + auto elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start); + auto took = std::to_string(elapsed.count()); + m_logger->log("Finished loading GPU shaders, took %s seconds!", system::ILogger::ELL_PERFORMANCE, took.c_str()); + } + + // Pipelines & Descriptor Sets + { + using binding_flags_t = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + using stage_flags_t = asset::IShader::E_SHADER_STAGE; + static constexpr auto TexturesCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT) | binding_flags_t::ECF_PARTIALLY_BOUND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT; + static constexpr auto SamplersCreateFlags = core::bitflag(binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT); + static constexpr auto StageFlags = core::bitflag(stage_flags_t::ESS_FRAGMENT) | stage_flags_t::ESS_VERTEX | stage_flags_t::ESS_COMPUTE; + + //! single descriptor for both compute & graphics, we will only need to trasition images' layout with a barrier + #define BINDING_TEXTURE(IX, TYPE) { .binding = IX, .type = TYPE, .createFlags = TexturesCreateFlags, .stageFlags = StageFlags, .count = hlsl::this_example::MaxIesImages, .immutableSamplers = nullptr } + #define BINDING_SAMPLER(IX) { .binding = IX, .type = IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = SamplersCreateFlags, .stageFlags = StageFlags, .count = 1u, .immutableSamplers = nullptr } + static constexpr auto bindings = std::to_array + ({ + BINDING_TEXTURE(0u, IDescriptor::E_TYPE::ET_SAMPLED_IMAGE), BINDING_TEXTURE(0u + 10u, IDescriptor::E_TYPE::ET_STORAGE_IMAGE), // candela octahedral map + BINDING_SAMPLER(0u + 100u) + }); + + const uint32_t texturesCount = m_assets.size(); + smart_refctd_ptr generalSampler; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE; + params.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; + params.MinFilter = ISampler::ETF_LINEAR; + params.MaxFilter = ISampler::ETF_LINEAR; + params.MipmapMode = ISampler::ESMM_LINEAR; + params.AnisotropicFilter = 0u; + params.CompareEnable = false; + params.CompareFunc = ISampler::ECO_ALWAYS; + + generalSampler = m_device->createSampler(params); + + if (not generalSampler) + { + m_logger->log("Failed to create sampler!", system::ILogger::ELL_ERROR); + return false; + } + + generalSampler->setObjectDebugName("General IES sampler"); + } + + auto scRes = static_cast(m_surface->getSwapchainResources()); + scRes->getRenderpass(); // note it also creates rp if nulled + { + auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + + if (not descriptorSetLayout) + return logFail("Failed to create descriptor set layout!"); + + auto range = std::to_array({ {StageFlags.value, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(hlsl::this_example::ies::CdcPC)} }); + auto pipelineLayout = m_device->createPipelineLayout(range, core::smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); + + if (not pipelineLayout) + return logFail("Failed to create pipeline layout!"); + + // Compute Pipeline + { + auto params = std::to_array({ {} });; + params[0].layout = pipelineLayout.get(); + params[0].shader.shader = shaders.ies.get(); + params[0].shader.entryPoint = "CdcCS"; + + if (!m_device->createComputePipelines(nullptr, params, &m_computePipeline)) + return logFail("Failed to create compute pipeline!"); + } + + // Graphics Pipeline + { + IGPUPipelineBase::SShaderEntryMap specConstants; + const auto orientationAsUint32 = static_cast(SurfaceTransform::FLAG_BITS::IDENTITY_BIT); + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + + video::IGPUPipelineBase::SShaderSpecInfo specInfo[] = + { + {.shader = shaders.fullScreenTriangleVS.get(), .entryPoint = "__nbl__hlsl__ext__FullScreenTriangle__vertex_main", .entries = &specConstants }, + {.shader = shaders.ies.get(), .entryPoint = "CdcPS" } + }; + + auto params = std::to_array({ {} }); + params[0].renderpass = scRes->getRenderpass(); + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + params[0].layout = pipelineLayout.get(); + params[0].cached = + { + .vertexInput = {}, // full screen tri ext, no inputs + .primitiveAssembly = {}, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {}, + .subpassIx = 0u + }; + + if (!m_device->createGraphicsPipelines(nullptr, params, &m_graphicsPipeline)) + return logFail("Failed to create graphics pipeline!"); + } + + const auto dscLayoutPtrs = m_graphicsPipeline->getLayout()->getDescriptorSetLayouts(); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dscLayoutPtrs); + pool->createDescriptorSets(dscLayoutPtrs.size(), dscLayoutPtrs.data(), m_descriptors.data()); + { + constexpr auto ViewsCount = 1u; // used to be 4u with debug maps (counted x2 for RO & RW binding but one descriptor) + std::array, ViewsCount * 2u + 1u> infos; + auto addInfo = [](auto& list, auto desc, IImage::LAYOUT layout) + { + auto& info = list.emplace_back(); + info.desc = desc; + info.info.image.imageLayout = layout; + }; + + for (uint32_t i = 0; i < m_assets.size(); ++i) + { + auto& ies = m_assets[i]; + addInfo(infos[0u], ies.views.candelaOctahedralMap, IImage::LAYOUT::READ_ONLY_OPTIMAL); + addInfo(infos[1u], ies.views.candelaOctahedralMap, IImage::LAYOUT::GENERAL); + } + addInfo(infos.back(), generalSampler, IImage::LAYOUT::READ_ONLY_OPTIMAL); + auto* samplerInfo = infos.back().data(); + + std::array writes = {}; + auto& sampledWrite = writes[0u]; + sampledWrite.count = m_assets.size(); + sampledWrite.info = infos[0u].data(); + sampledWrite.dstSet = m_descriptors[0u].get(); + sampledWrite.arrayElement = 0u; + sampledWrite.binding = 0u; + + auto& storageWrite = writes[1u]; + storageWrite.count = m_assets.size(); + storageWrite.info = infos[1u].data(); + storageWrite.dstSet = m_descriptors[0u].get(); + storageWrite.arrayElement = 0u; + storageWrite.binding = 10u; + + auto& write = writes.back(); + write.count = 1u; + write.info = samplerInfo; + write.dstSet = m_descriptors[0u].get(); + write.arrayElement = 0u; + write.binding = 0u + 100u; + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write descriptor sets"); + } + } + } + + // frame buffers + { + // TODO: I will create my own + auto renderpass = smart_refctd_ptr(static_cast(m_surface->getSwapchainResources())->getRenderpass()); + + for (uint32_t i = 0u; i < m_frameBuffers2D.size(); ++i) + { + auto& fb2D = m_frameBuffers2D[i]; + auto& fb3D = m_frameBuffers3D[i]; + auto ixs = std::to_string(i); + + // TODO: may actually change it, temporary hardcoding + constexpr auto WIDTH = 640; + constexpr auto HEIGHT_2D = WIDTH * 2; + constexpr auto HEIGHT_3D = WIDTH; + + { + auto color = createImageView(WIDTH, HEIGHT_2D, EF_R8G8B8A8_SRGB, "[2D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + fb2D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = nullptr, + .colorAttachments = &color.get(), + .width = WIDTH, + .height = HEIGHT_2D + } } + ); + } + + { + auto color = createImageView(WIDTH, HEIGHT_3D, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + auto depth = createImageView(WIDTH, HEIGHT_3D, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + + fb3D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = &depth.get(), + .colorAttachments = &color.get(), + .width = WIDTH, + .height = HEIGHT_3D + } } + ); + } + } + } + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // geometries for 3D scene + { + struct IESGeometryScene final : public CGeometryCreatorScene + { + explicit IESGeometryScene(const std::vector& assets) : m_assets(&assets) {} + + protected: + core::vector addGeometries(asset::CGeometryCreator* creator) const override + { + core::vector entries; + if (!m_assets) + return entries; + + std::set> seen; + for (const auto& ies : *m_assets) + { + const auto& resolution = ies.getProfile()->getAccessor().properties.optimalIESResolution; + std::pair key{ resolution.x, resolution.y }; + if (!seen.insert(key).second) + continue; + + std::string name = "Grid (" + std::to_string(resolution.x) + " x " + std::to_string(resolution.y) + ")"; // (**) used to assign polygons! + entries.push_back({ std::move(name), creator->createGrid({ resolution.x, resolution.y }) }); + } + + return entries; + } + + private: + const std::vector* m_assets = nullptr; + }; + + const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies + }, + CSimpleIESRenderer::DefaultPolygonGeometryPatch, + m_assets + ); + + const auto& geoParams = m_scene->getInitParams(); + std::vector> polygons(m_assets.size()); + for (uint32_t i = 0u; i < m_assets.size(); ++i) + { + const auto& resolution = m_assets[i].getProfile()->getAccessor().properties.optimalIESResolution; + + for (uint32_t g = 0u; g < geoParams.geometryNames.size(); ++g) + { + uint32_t w = 0u, h = 0u; + std::sscanf(geoParams.geometryNames[g].c_str(), "Grid (%u x %u)", &w, &h); // (**) + + if (w == resolution.x && h == resolution.y) + { + polygons[i] = geoParams.geometries[g]; + break; + } + } + assert(polygons[i]); + } + + m_renderer = CSimpleIESRenderer::create(shaders.ies, core::smart_refctd_ptr(m_descriptors[0u]->getLayout()), scRes->getRenderpass(), 0, { &polygons.front().get(),polygons.size() }); + if (!m_renderer || m_renderer->getGeometries().size() != polygons.size()) + return logFail("Could not create 3D Plot Renderer!"); + + m_renderer->m_instances.resize(1); + m_renderer->m_instances[0].world = float32_t3x4( + float32_t4(1, 0, 0, 0), + float32_t4(0, 1, 0, 0), + float32_t4(0, 0, 1, 0) + ); + + using core_vec_t = std::remove_cv_t>; + const auto toCoreVec3 = [](const float32_t3& v) -> core_vec_t + { + return core_vec_t(v.x, v.y, v.z); + }; + + float32_t3 cameraPosition(-5.81655884f, 2.58630896f, -4.23974705f); + float32_t3 cameraTarget(-0.349590302f, -0.213266611f, 0.317821503f); + const auto cameraOffset = cameraPosition - cameraTarget; + cameraPosition = cameraTarget + cameraOffset * 1.5f; + + const auto& params = m_frameBuffers3D.front()->getCreationParameters(); + const float aspect = float(params.width) / float(params.height); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera = Camera(toCoreVec3(cameraPosition), toCoreVec3(cameraTarget), projectionMatrix, 1.069f, 0.4f); + uiState.cameraMoveSpeed = camera.getMoveSpeed(); + uiState.cameraRotateSpeed = camera.getRotateSpeed(); + uiState.cameraControlApplied = !uiState.cameraControlEnabled; + } + + // imGUI + { + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = NBL_TEXTURES_SET_IX, .bindingIx = NBL_TEXTURES_BINDING_IX }; + params.resources.samplersInfo = { .setIx = NBL_SAMPLER_STATES_SET_IX, .bindingIx = NBL_SAMPLER_STATES_BINDING_IX }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, NBL_TEXTURES_COUNT); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + + using imgui_precompiled_spirv_t = ext::imgui::UI::SCreationParameters::PrecompiledShaders; + params.spirv = std::make_optional(imgui_precompiled_spirv_t{ .vertex = shaders.imgui, .fragment = shaders.imgui }); + + auto imguiPtr = ext::imgui::UI::create(std::move(params)); + auto* imgui = imguiPtr.get(); + ui.it = smart_refctd_ptr_static_cast(imguiPtr); + if (not imgui) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + + { + const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + ui.descriptor = make_smart_refctd_ptr(std::move(ds)); + if (!ui.descriptor) + return logFail("Failed to create the descriptor set"); + + { + std::array addresses; + addresses.fill(SubAllocatedDescriptorSet::invalid_value); + ui.descriptor->multi_allocate(0, addresses.size(), addresses.data()); + + bool ok = true; + ok &= addresses.front() == ext::imgui::UI::FontAtlasTexId; + for (auto i = ext::imgui::UI::FontAtlasTexId; i < addresses.size(); ++i) + ok &= addresses[i] == i; + + assert(ok); + + std::array infos; + for (auto& it : infos) it.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + auto* ix = addresses.data(); + infos[*ix].desc = smart_refctd_ptr(imgui->getFontAtlasView()); ++ix; + + for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; + for (uint8_t i = 0u; i < MaxFramesInFlight; ++i, ++ix) infos[*ix].desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; + + auto writes = std::to_array({ IGPUDescriptorSet::SWriteDescriptorSet{ + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = NBL_TEXTURES_BINDING_IX, + .arrayElement = 0u, + .count = infos.size(), + .info = infos.data() + }}); + + if (!m_device->updateDescriptorSets(writes, {})) + return logFail("Failed to write the descriptor set"); + } + } + + imgui->registerListener([this]() + { + uiListener(); + }); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + using pool_flags_t = IGPUCommandPool::CREATE_FLAGS; + + auto createCommandBuffers = [&](auto* queue, const std::span> out, pool_flags_t flags) -> bool + { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), flags); + if (!pool) + return logFail("Couldn't create command pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, out)) + return logFail("Couldn't create command buffer!"); + return true; + }; + + // render loop command buffers + if (not createCommandBuffers(getGraphicsQueue(), m_cmdBuffers, pool_flags_t::RESET_COMMAND_BUFFER_BIT)) + return false; + + // transient command buffer + { + auto* queue = getGraphicsQueue(); + auto cbs = std::to_array({ smart_refctd_ptr() }); + if (not createCommandBuffers(queue, cbs, pool_flags_t::RESET_COMMAND_BUFFER_BIT | pool_flags_t::TRANSIENT_BIT)) + return false; + + std::vector images; + for (uint32_t i = 0; i < m_assets.size(); ++i) + { + auto& ies = m_assets[i]; + + images.emplace_back() = ies.views.candelaOctahedralMap->getCreationParameters().image.get(); + } + + auto* cb = cbs.front().get(); + cb->setObjectDebugName("Transient Command Buffer"); + + if (not cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + return logFail("Couldn't begin command buffer!"); + + if (not IES::barrier(cb, images)) + return logFail("Failed to record pipeline barriers!"); + + if (not cb->end()) + return logFail("Couldn't end command buffer!"); + + core::smart_refctd_ptr semaphore = m_device->createSemaphore(0); + semaphore->setObjectDebugName("Scratch Semaphore"); + { + IQueue::SSubmitInfo::SSemaphoreInfo signal = + { + .semaphore = semaphore.get(), + .value = 1u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + const IQueue::SSubmitInfo::SCommandBufferInfo cmds[] = { {.cmdbuf = cb } }; + + const IQueue::SSubmitInfo infos[] = + { { + .waitSemaphores = {}, + .commandBuffers = cmds, + .signalSemaphores = {&signal,1} + } }; + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + return logFail("Failed to submit queue!"); + } + + { + const ISemaphore::SWaitInfo infos[] = + { { + .semaphore = semaphore.get(), + .value = 1u + } }; + + if (m_device->blockForSemaphores(infos) != ISemaphore::WAIT_RESULT::SUCCESS) + return logFail("Couldn't block for scratch semaphore!"); + } + } + + onAppInitializedFinish(); + if (m_window && m_winMgr) + applyWindowMode(); + + return true; +} + +void IESViewer::applyWindowMode() +{ + if (!m_window || !m_winMgr) + return; + + m_winMgr->maximize(m_window.get()); + + if (m_surface) + { + m_surface->recreateSwapchain(); + } +} + diff --git a/50.IESViewer/AppInputParser.cpp b/50.IESViewer/AppInputParser.cpp new file mode 100644 index 000000000..0f236969b --- /dev/null +++ b/50.IESViewer/AppInputParser.cpp @@ -0,0 +1,108 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "AppInputParser.hpp" +#include "nlohmann/json.hpp" + +NBL_EXPOSE_NAMESPACES +using namespace nlohmann; + +bool AppInputParser::parse(Output& out, const std::string input, const std::string cwd) +{ + const auto jInputFile = std::filesystem::absolute(input); + const auto sjInputFile = jInputFile.string(); + + std::ifstream file(sjInputFile.c_str()); + if (!file.is_open()) { + + logger.log("Could not open \"%s\" file.", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + std::stringstream buffer; + buffer << file.rdbuf(); + const auto jsonBuffer = buffer.str(); + + if (jsonBuffer.empty()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + const auto jsonMap = json::parse(jsonBuffer.c_str()); + + if (!jsonMap["directories"].is_array()) + { + logger.log("\"%s\" file is empty!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["files"].is_array()) + { + logger.log("\"%s\" file's field \"files\" is not an array!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + if (!jsonMap["writeAssets"].is_boolean()) + { + logger.log("\"%s\" file's field \"writeAssets\" is not a boolean!", system::ILogger::ELL_ERROR, sjInputFile.c_str()); + return false; + } + + auto addFile = [&](const std::string_view in) -> bool + { + auto path = std::filesystem::absolute(cwd / std::filesystem::path(in)); + + if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") + out.inputList.push_back(path.string()); + else + { + logger.log("Invalid \"%s\" input!", system::ILogger::ELL_ERROR, path.string().c_str()); + return false; + } + + return true; + }; + + auto addFiles = [&](const std::string_view directoryPath) -> bool + { + auto directory(std::filesystem::absolute(cwd / std::filesystem::path(directoryPath))); + if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) + { + logger.log("Invalid \"%s\" directory!", system::ILogger::ELL_ERROR, directory.string().c_str()); + return false; + } + + for (const auto& entry : std::filesystem::directory_iterator(directory)) + if (!addFile(entry.path().string().c_str())) + return false; + + return true; + }; + + // parse json + { + std::vector jDirectories; + jsonMap["directories"].get_to(jDirectories); + + for (const auto& it : jDirectories) + if (!addFiles(it)) + return false; + + std::vector jFiles; + jsonMap["files"].get_to(jFiles); + + for (const auto& it : jFiles) + if (!addFile(it)) + return false; + } + + out.withGUI = false; + jsonMap["gui"].get_to(out.withGUI); + + out.writeAssets = false; + jsonMap["writeAssets"].get_to(out.writeAssets); + + return true; +} \ No newline at end of file diff --git a/50.IESViewer/AppInputParser.hpp b/50.IESViewer/AppInputParser.hpp new file mode 100644 index 000000000..18b5e4fe3 --- /dev/null +++ b/50.IESViewer/AppInputParser.hpp @@ -0,0 +1,27 @@ +#ifndef _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ +#define _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" + +struct AppInputParser +{ +public: + struct Output + { + std::vector inputList; + bool withGUI; + bool writeAssets; + }; + + AppInputParser(nbl::system::logger_opt_ptr _logger = nullptr) : logger(_logger) {} + bool parse(Output& out, const std::string jFilePath, const std::string cwd = "."); + +private: + nbl::system::logger_opt_ptr logger; +}; + +#endif // _THIS_EXAMPLE_APP_INPUT_PARSER_HPP_ \ No newline at end of file diff --git a/50.IESViewer/AppRender.cpp b/50.IESViewer/AppRender.cpp new file mode 100644 index 000000000..a06b2702a --- /dev/null +++ b/50.IESViewer/AppRender.cpp @@ -0,0 +1,557 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include +#include +#include "nbl/ext/ImGui/ImGui.h" +#include "nbl/ext/ScreenShot/ScreenShot.h" +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" + +bool IESViewer::recreate3DPlotFramebuffers(uint32_t width, uint32_t height) +{ + if (width == 0u || height == 0u) + return false; + + if (width == m_plot3DWidth && height == m_plot3DHeight) + return true; + + m_device->waitIdle(); + m_plot3DWidth = width; + m_plot3DHeight = height; + + auto* scRes = static_cast(m_surface->getSwapchainResources()); + auto renderpass = smart_refctd_ptr(scRes->getRenderpass()); + + for (uint32_t i = 0u; i < m_frameBuffers3D.size(); ++i) + { + auto& fb3D = m_frameBuffers3D[i]; + auto ixs = std::to_string(i); + + auto color = createImageView(width, height, EF_R8G8B8A8_SRGB, "[3D Plot]: framebuffer[" + ixs + "].color attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IImage::EAF_COLOR_BIT); + if (!color) + return false; + + auto depth = createImageView(width, height, EF_D16_UNORM, "[3D Plot]: framebuffer[" + ixs + "].depth attachement", IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT, IGPUImage::EAF_DEPTH_BIT); + if (!depth) + return false; + + fb3D = m_device->createFramebuffer + ( + { { + .renderpass = renderpass, + .depthStencilAttachments = &depth.get(), + .colorAttachments = &color.get(), + .width = width, + .height = height + } } + ); + if (!fb3D) + return false; + } + + auto* imgui = static_cast(ui.it.get()); + if (imgui && ui.descriptor) + { + std::array infos; + for (auto& it : infos) + it.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + auto* ix = infos.data(); + ix->desc = smart_refctd_ptr(imgui->getFontAtlasView()); + ++ix; + for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) + ix->desc = m_frameBuffers2D[i]->getCreationParameters().colorAttachments[0u]; + for (uint8_t i = 0u; i < device_base_t::MaxFramesInFlight; ++i, ++ix) + ix->desc = m_frameBuffers3D[i]->getCreationParameters().colorAttachments[0u]; + + const auto texturesBinding = imgui->getCreationParameters().resources.texturesInfo.bindingIx; + auto writes = std::to_array({ IGPUDescriptorSet::SWriteDescriptorSet{ + .dstSet = ui.descriptor->getDescriptorSet(), + .binding = texturesBinding, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = static_cast(infos.size()), + .info = infos.data() + } }); + + if (!m_device->updateDescriptorSets(writes, {})) + return false; + } + + const float aspect = float(width) / float(height); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); + + return true; +} + +IQueue::SSubmitInfo::SSemaphoreInfo IESViewer::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) +{ + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + auto* const cb = m_cmdBuffers.data()[resourceIx].get(); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* imgui = static_cast(ui.it.get()); + + const bool windowFocused = m_window->hasInputFocus() || m_window->hasMouseFocus(); + if (!windowFocused && uiState.cameraControlEnabled) + uiState.cameraControlEnabled = false; + const bool wantCameraControl = uiState.cameraControlEnabled && windowFocused; + + uint32_t renderWidth = m_window->getWidth(); + uint32_t renderHeight = m_window->getHeight(); + if (auto* sc = scRes->getSwapchain()) + { + const auto& params = sc->getCreationParameters().sharedParams; + if (params.width && params.height) + { + renderWidth = params.width; + renderHeight = params.height; + } + } + if (renderWidth == 0u || renderHeight == 0u || m_window->isMinimized()) + return {}; + + if (uiState.cameraControlApplied != wantCameraControl) + { + uiState.cameraControlApplied = wantCameraControl; + const float moveSpeed = wantCameraControl ? uiState.cameraMoveSpeed : 0.0f; + const float rotateSpeed = wantCameraControl ? uiState.cameraRotateSpeed : 0.0f; + camera.setMoveSpeed(moveSpeed); + camera.setRotateSpeed(rotateSpeed); + } + + + + const uint32_t desired3DWidth = renderWidth; + const uint32_t desired3DHeight = renderHeight; + if (!recreate3DPlotFramebuffers(desired3DWidth, desired3DHeight)) + return {}; + + auto* const fb2D = m_frameBuffers2D[resourceIx].get(); + auto* const fb3D = m_frameBuffers3D[resourceIx].get(); + + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + { + struct + { + std::vector mouse{}; std::vector keyboard{}; + } captured; + + camera.beginInputProcessing(nextPresentationTimestamp); + if (windowFocused) + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (wantCameraControl) + camera.mouseProcess(events); + processMouse(events); + for (const auto& e : events) + captured.mouse.emplace_back(e); + }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + camera.keyboardProcess(events); + processKeyboard(events); + for (const auto& e : events) + captured.keyboard.emplace_back(e); + }, m_logger.get()); + } + else + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t&) -> void {}, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t&) -> void {}, m_logger.get()); + } + camera.endInputProcessing(nextPresentationTimestamp); + + { + const float maxRadius = m_plotRadius * 0.98f; + const float clampRadius = maxRadius * 0.999f; + using core_vec_t = std::remove_cv_t>; + const auto toHlslVec3 = [](const core_vec_t& v) + { + return float32_t3(v.x, v.y, v.z); + }; + const auto toCoreVec3 = [](const float32_t3& v) + { + return core_vec_t(v.x, v.y, v.z); + }; + auto pos = toHlslVec3(camera.getPosition()); + const float dist = length(pos); + if (dist > maxRadius) + { + const auto target = toHlslVec3(camera.getTarget()); + const auto forward = target - pos; + pos = normalize(pos) * clampRadius; + camera.setPosition(toCoreVec3(pos)); + camera.setTarget(toCoreVec3(pos + forward)); + } + } + + auto* cursorControl = m_window->getCursorControl(); + const auto cursorPosition = cursorControl->getPosition(); + const int32_t windowX = m_window->getX(); + const int32_t windowY = m_window->getY(); + const int32_t windowW = static_cast(m_window->getWidth()); + const int32_t windowH = static_cast(m_window->getHeight()); + const bool cursorInsideWindow = + cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && + cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; + cursorControl->setVisible(!(cursorInsideWindow || uiState.cameraControlApplied)); + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {renderWidth,renderHeight}, + .mouseEvents = captured.mouse, + .keyboardEvents = captured.keyboard + }; + + if (imgui) + imgui->update(params); + } + + if (uiState.cameraControlApplied) + { + if (auto* cursor = m_window->getCursorControl()) + cursor->setRelativePosition(m_window.get(), {0.5f, 0.5f}); + } + + auto& ies = m_assets[uiState.activeAssetIx]; + const auto* profile = ies.getProfile(); + const auto& accessor = profile->getAccessor(); + const auto hCount = accessor.hAnglesCount(); + const auto vCount = accessor.vAnglesCount(); + const auto pc = hlsl::this_example::ies::CdcPC + { + .hAnglesBDA = ies.buffers.hAngles->getDeviceAddress(), + .vAnglesBDA = ies.buffers.vAngles->getDeviceAddress(), + .dataBDA = ies.buffers.data->getDeviceAddress(), + .txtInfoBDA = ies.buffers.textureInfo.buffer->getDeviceAddress(), + .mode = uiState.mode.view, + .texIx = static_cast(uiState.activeAssetIx), + .hAnglesCount = hCount, + .vAnglesCount = vCount, + .zAngleDegreeRotation = ies.zDegree, + .properties = accessor.getProperties() + }; + + for (auto& buffer : { ies.buffers.data, ies.buffers.hAngles, ies.buffers.vAngles }) // flush request for sanity + { + auto bound = buffer->getBoundMemory(); + if (bound.memory->haveToMakeVisible()) + { + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset, buffer->getSize()); + m_device->flushMappedMemoryRanges(1, &range); + } + } + + auto* const descriptor = m_descriptors[0].get(); + auto* image = ies.getActiveImage(IES::EM_OCTAHEDRAL_MAP); + + bool needCompute = true; + if (uiState.activeAssetIx < m_candelaDirty.size()) + needCompute = m_candelaDirty[uiState.activeAssetIx]; + + if (needCompute) + { + cb->beginDebugMarker("IES::compute"); + IES::barrier(cb, image); + auto* layout = m_computePipeline->getLayout(); + cb->bindComputePipeline(m_computePipeline.get()); + cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &descriptor); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, offsetof(hlsl::this_example::ies::PushConstants, cdc), sizeof(pc), &pc); + const auto xGroups = (ies.getProfile()->getAccessor().properties.optimalIESResolution.x - 1u) / hlsl::this_example::WorkgroupDimension + 1u; + cb->dispatch(xGroups, xGroups, 1); + IES::barrier(cb, image); + cb->endDebugMarker(); + if (uiState.activeAssetIx < m_candelaDirty.size()) + m_candelaDirty[uiState.activeAssetIx] = false; + } + + // Graphics + { + auto extent = fb2D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; + const uint32_t plotHeight = extent.height / 2u; + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = extent.width; + viewport.height = extent.height; + } + cb->setViewport(0u, 1u, &viewport); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { extent.width, extent.height }, + }; + cb->setScissor(0u, 1u, &scissor); + + VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {extent.width,extent.height} + }; + + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = fb2D, + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + + cb->beginDebugMarker("IES::graphics 2D plot"); + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + auto* layout = m_graphicsPipeline->getLayout(); + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); + cb->bindDescriptorSets(EPBP_GRAPHICS, layout, 0, 1, &descriptor); + asset::SViewport viewport2D = viewport; + viewport2D.width = static_cast(extent.width); + viewport2D.height = static_cast(plotHeight); + VkRect2D scissor2D = scissor; + scissor2D.extent = { extent.width, plotHeight }; + + auto pc2D = pc; + pc2D.mode = uiState.mode.view; + cb->setViewport(0u, 1u, &viewport2D); + cb->setScissor(0u, 1u, &scissor2D); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc2D), &pc2D); + ext::FullScreenTriangle::recordDrawCall(cb); + + if (uiState.showOctaMapPreview) + { + viewport2D.y = static_cast(plotHeight); + scissor2D.offset.y = static_cast(plotHeight); + pc2D.mode = IES::EM_OCTAHEDRAL_MAP; + cb->setViewport(0u, 1u, &viewport2D); + cb->setScissor(0u, 1u, &scissor2D); + cb->pushConstants(layout, layout->getPushConstantRanges().begin()->stageFlags, 0, sizeof(pc2D), &pc2D); + ext::FullScreenTriangle::recordDrawCall(cb); + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + + const IGPUCommandBuffer::SClearColorValue d3clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + auto info3D = info; + info3D.colorClearValues = &d3clearValue; // tmp + info3D.depthStencilClearValues = &depthValue; + info3D.framebuffer = fb3D; + auto extent3D = fb3D->getCreationParameters().colorAttachments[0u]->getCreationParameters().image->getCreationParameters().extent; + viewport.width = extent3D.width; + viewport.height = extent3D.height; + cb->setViewport(0u, 1u, &viewport); + scissor.extent = { extent3D.width, extent3D.height }; + cb->setScissor(0u, 1u, &scissor); + currentRenderArea.extent = { extent3D.width, extent3D.height }; + info3D.renderArea = currentRenderArea; + cb->beginDebugMarker("IES::graphics 3D plot"); + cb->beginRenderPass(info3D, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + viewMatrix = camera.getViewMatrix(); + viewProjMatrix = camera.getConcatenatedMatrix(); + } + const auto viewParams = CSimpleIESRenderer::SViewParams(viewMatrix, viewProjMatrix); + const auto iesParams = CSimpleIESRenderer::SIESParams({ .radius = m_plotRadius, .ds = m_descriptors[0u].get(), .texID = static_cast(uiState.activeAssetIx), .mode = uiState.mode.sphere.value, .wireframe = uiState.wireframeEnabled }); + + // tear down scene every frame + m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data() + uiState.activeAssetIx; + m_renderer->render(cb, viewParams, iesParams); + } + cb->endRenderPass(); + cb->endDebugMarker(); + + cb->beginDebugMarker("IES::graphics ImGUI"); + + viewport.width = renderWidth; + viewport.height = renderHeight; + cb->setViewport(0u, 1u, &viewport); + scissor.extent = { renderWidth, renderHeight }; + cb->setScissor(0u, 1u, &scissor); + currentRenderArea.extent = { renderWidth, renderHeight }; + + info.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + info.renderArea = currentRenderArea; + + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + if (imgui) + { + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + const auto* ds = ui.descriptor->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + const ISemaphore::SWaitInfo wait = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, wait)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + cb->end(); + } + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] IES Viewer"; + { + m_window->setCaption(caption); + } + return retval; +} + +void IESViewer::onPostRenderFrame(const video::IQueue::SSubmitInfo::SSemaphoreInfo& rendered) +{ + if (!m_ciMode || m_ciScreenshotDone) + return; + + ++m_ciFrameCounter; + if (m_ciFrameCounter < CiFramesBeforeCapture) + return; + + m_ciScreenshotDone = true; + + if (!m_device || !m_surface || !m_assetMgr) + { + requestExit(); + return; + } + + // Ensure the last submitted frame is finished before we read back. + m_device->waitIdle(); + + auto* scRes = static_cast(m_surface->getSwapchainResources()); + auto* fb = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + if (!fb) + { + m_logger->log("CI screenshot failed: missing swapchain framebuffer.", system::ILogger::ELL_ERROR); + requestExit(); + return; + } + + auto colorView = fb->getCreationParameters().colorAttachments[0u]; + if (!colorView) + { + m_logger->log("CI screenshot failed: missing swapchain color attachment.", system::ILogger::ELL_ERROR); + requestExit(); + return; + } + + { + const auto usage = colorView->getCreationParameters().image->getCreationParameters().usage; + const bool hasTransferSrc = usage.hasFlags(asset::IImage::EUF_TRANSFER_SRC_BIT); + m_logger->log( + "CI screenshot source usage: 0x%llx (transfer_src=%s).", + system::ILogger::ELL_INFO, + static_cast(usage.value), + hasTransferSrc ? "yes" : "no"); + } + + const bool ok = ext::ScreenShot::createScreenShot( + m_device.get(), + getGraphicsQueue(), + nullptr, + colorView.get(), + m_assetMgr.get(), + m_ciScreenshotPath, + asset::IImage::LAYOUT::PRESENT_SRC, + asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); + + if (ok) + m_logger->log("CI screenshot saved to \"%s\".", system::ILogger::ELL_INFO, m_ciScreenshotPath.string().c_str()); + else + m_logger->log("CI screenshot failed to save.", system::ILogger::ELL_ERROR); + + requestExit(); +} + +const video::IGPURenderpass::SCreationParams::SSubpassDependency* IESViewer::getDefaultSubpassDependencies() const +{ + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; +} + diff --git a/50.IESViewer/AppUI.cpp b/50.IESViewer/AppUI.cpp new file mode 100644 index 000000000..c376f7730 --- /dev/null +++ b/50.IESViewer/AppUI.cpp @@ -0,0 +1,651 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" +#include +#include +#include +#include +#include "imgui/imgui.h" +#include "imgui/imgui_internal.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "app_resources/common.hlsl" +#include "app_resources/false_color.hlsl" +#include "app_resources/imgui.opts.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/math/octahedral.hlsl" + +void IESViewer::uiListener() +{ + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; + + SImResourceInfo info; + info.textureID = ext::imgui::UI::FontAtlasTexId + resourceIx + 1u; + info.samplerIx = (uint16_t)ext::imgui::UI::DefaultSamplerIx::USER; + + const ImGuiViewport* vp = ImGui::GetMainViewport(); + const ImVec2 viewportPos = vp->Pos; + const ImVec2 viewportSize = vp->Size; + auto* cursorControl = m_window->getCursorControl(); + const auto cursorPosition = cursorControl ? cursorControl->getPosition() : ICursorControl::SPosition{}; + const int32_t windowX = m_window->getX(); + const int32_t windowY = m_window->getY(); + const int32_t windowW = static_cast(m_window->getWidth()); + const int32_t windowH = static_cast(m_window->getHeight()); + const bool cursorInsideWindow = cursorControl && + cursorPosition.x >= windowX && cursorPosition.x < windowX + windowW && + cursorPosition.y >= windowY && cursorPosition.y < windowY + windowH; + ImGui::GetIO().MouseDrawCursor = cursorInsideWindow && !uiState.cameraControlEnabled; + const ImVec2 bottomSize(viewportSize.x, viewportSize.y); + const ImVec2 bottomPos(viewportPos.x, viewportPos.y); + const auto legendColor = [&](float v, bool useFalseColor) -> ImU32 + { + const float clamped = ImClamp(v, 0.0f, 1.0f); + if (useFalseColor) + { + const auto col = hlsl::this_example::ies::falseColor(clamped); + return ImGui::ColorConvertFloat4ToU32(ImVec4(col.x, col.y, col.z, 1.0f)); + } + return ImGui::ColorConvertFloat4ToU32(ImVec4(clamped, clamped, clamped, 1.0f)); + }; + const auto showHint = [&](const char* text) + { + if (!uiState.showHints || !text || text[0] == '\0') + return; + if (!ImGui::IsItemHovered()) + return; + ImGui::BeginTooltip(); + ImGui::TextUnformatted(text); + ImGui::EndTooltip(); + }; + std::vector assetLabelPtrs; + assetLabelPtrs.reserve(m_assetLabels.size()); + for (const auto& label : m_assetLabels) + assetLabelPtrs.push_back(label.c_str()); + + size_t activeIx = uiState.activeAssetIx; + if (activeIx >= m_assets.size()) + activeIx = 0u; + int activeIxUi = static_cast(activeIx); + float candelaValue = 0.0f; + bool candelaValid = false; + ImVec2 plotRectMin(0.f, 0.f); + ImVec2 plotRectMax(0.f, 0.f); + bool plotRectValid = false; + bool plotHovered = false; + uiState.plot2DRectValid = false; + + auto& ies = m_assets[activeIx]; + auto* profile = ies.getProfile(); + const auto& accessor = profile->getAccessor(); + const auto& properties = accessor.getProperties(); + + const float lowerBound = accessor.hAngles.front(); + const float upperBound = accessor.hAngles.back(); + const bool singleAngle = (upperBound == lowerBound); + + constexpr size_t kSmallBufSize = 32; + auto angle = ImClamp(ies.zDegree, lowerBound, upperBound); + + auto updateCameraProjection = [&]() + { + if (m_plot3DWidth == 0u || m_plot3DHeight == 0u) + return; + const float aspect = float(m_plot3DWidth) / float(m_plot3DHeight); + const auto projectionMatrix = buildProjectionMatrixPerspectiveFovLH(hlsl::radians(uiState.cameraFovDeg), aspect, 0.1f, 10000.0f); + camera.setProjectionMatrix(projectionMatrix); + }; + + auto draw3DControls = [&]() + { + bool interpolateCandela = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + + if (ImGui::Checkbox("interpolate candelas", &interpolateCandela)) + { + if (interpolateCandela) + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE; + else + uiState.mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_OCTAHEDRAL_UV_INTERPOLATE + ); + } + showHint("Interpolate candela values in the octahedral map."); + + bool falseColor = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); + + if (ImGui::Checkbox("false color", &falseColor)) + { + if (falseColor) + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR; + else + uiState.mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_FALSE_COLOR + ); + } + showHint("Use false color palette for the 3D plot."); + + bool showOctaMap = uiState.showOctaMapPreview; + if (ImGui::Checkbox("octahedral map", &showOctaMap)) + uiState.showOctaMapPreview = showOctaMap; + showHint("Show octahedral map preview under the 2D plot."); + + bool showHints = uiState.showHints; + if (ImGui::Checkbox("show hints", &showHints)) + uiState.showHints = showHints; + showHint("Toggle help tooltips."); + + bool cubePlot = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); + + if (ImGui::Checkbox("cube plot", &cubePlot)) + { + if (cubePlot) + uiState.mode.sphere |= hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE; + else + uiState.mode.sphere &= static_cast( + ~hlsl::this_example::ies::E_SPHERE_MODE::ESM_CUBE + ); + } + showHint("Render the plot on a cube instead of a sphere."); + + bool wireframe = uiState.wireframeEnabled; + if (ImGui::Checkbox("wireframe", &wireframe)) + uiState.wireframeEnabled = wireframe; + showHint("Show wireframe topology in the 3D plot."); + + bool cameraControl = uiState.cameraControlEnabled; + if (ImGui::Checkbox("camera control (space)", &cameraControl)) + uiState.cameraControlEnabled = cameraControl; + showHint("Enable camera movement with mouse and keyboard."); + + bool speedChanged = false; + bool fovChanged = false; + if (ImGui::BeginTable("##camera_controls", 2, ImGuiTableFlags_SizingStretchProp)) + { + float labelWidth = 0.0f; + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("move speed").x); + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("rotate speed").x); + labelWidth = ImMax(labelWidth, ImGui::CalcTextSize("fov").x); + labelWidth += ImGui::GetStyle().CellPadding.x * 2.0f; + labelWidth = ImMin(labelWidth, ImGui::GetContentRegionAvail().x * 0.6f); + ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, labelWidth); + ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch); + auto sliderRow = [&](const char* label, float* value, float min, float max, const char* fmt, const char* hint) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::AlignTextToFramePadding(); + ImGui::TextUnformatted(label); + showHint(hint); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(label); + const bool changed = ImGui::SliderFloat("##value", value, min, max, fmt, ImGuiSliderFlags_AlwaysClamp); + ImGui::PopID(); + showHint(hint); + return changed; + }; + + speedChanged |= sliderRow("move speed", &uiState.cameraMoveSpeed, 0.1f, 10.0f, "%.2f", "Camera movement speed."); + speedChanged |= sliderRow("rotate speed", &uiState.cameraRotateSpeed, 0.1f, 5.0f, "%.2f", "Camera rotation speed."); + fovChanged |= sliderRow("fov", &uiState.cameraFovDeg, 30.0f, 120.0f, "%.0f", "Camera field of view."); + + ImGui::EndTable(); + } + + if (speedChanged && uiState.cameraControlEnabled) + { + camera.setMoveSpeed(uiState.cameraMoveSpeed); + camera.setRotateSpeed(uiState.cameraRotateSpeed); + } + + if (fovChanged) + updateCameraProjection(); + + }; + + const float panelMargin = 8.f; + const float panelWidth = ImClamp(viewportSize.x * 0.25f, 260.0f, 420.0f); + const float panelMaxHeight = ImMax(240.0f, viewportSize.y * 0.9f); + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always); + ImGui::SetNextWindowSizeConstraints(ImVec2(panelWidth, 0.0f), ImVec2(panelWidth, panelMaxHeight)); + ImGui::SetNextWindowBgAlpha(0.7f); + ImGuiWindowFlags panelFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoResize; + + if (ImGui::Begin("IES Panel", nullptr, panelFlags)) + { + const auto& resolution = accessor.properties.optimalIESResolution; + + constexpr size_t kInfoBufSize = 64; + std::array bAngle{}; + std::array bAngles{}; + std::array bRes{}; + std::array bMax{}; + std::array bAvg{}; + std::array bAvgFull{}; + const auto hCount = accessor.hAnglesCount(); + const auto vCount = accessor.vAnglesCount(); + std::snprintf(bAngle.data(), bAngle.size(), "%.3f deg", angle); + std::snprintf(bAngles.data(), bAngles.size(), "angles: %u x %u", hCount, vCount); + std::snprintf(bRes.data(), bRes.size(), "resolution: %u x %u", resolution.x, resolution.y); + std::snprintf(bMax.data(), bMax.size(), "max cd: %.3f", properties.maxCandelaValue); + std::snprintf(bAvg.data(), bAvg.size(), "avg: %.3f", properties.avgEmmision); + std::snprintf(bAvgFull.data(), bAvgFull.size(), "avg full: %.3f", properties.fullDomainAvgEmission); + const std::string symmetryLabel = nbl::system::to_string(properties.getSymmetry()); + const std::string typeLabel = nbl::system::to_string(properties.getType()); + const std::string versionLabel = nbl::system::to_string(properties.getVersion()); + float leftWidth = 0.0f; + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(symmetryLabel.c_str()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(versionLabel.c_str()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAngles.data()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bMax.data()).x); + leftWidth = ImMax(leftWidth, ImGui::CalcTextSize(bAvgFull.data()).x); + leftWidth += ImGui::GetStyle().CellPadding.x * 2.0f; + leftWidth = ImMin(leftWidth, ImGui::GetContentRegionAvail().x * 0.6f); + if (ImGui::BeginTable("##profile_info", 2, ImGuiTableFlags_SizingFixedFit)) + { + ImGui::TableSetupColumn("left", ImGuiTableColumnFlags_WidthFixed, leftWidth); + ImGui::TableSetupColumn("right", ImGuiTableColumnFlags_WidthStretch); + auto rightText = [&](const char* text, const char* hint) + { + const float avail = ImGui::GetContentRegionAvail().x; + const float textWidth = ImGui::CalcTextSize(text).x; + const char* displayText = text; + std::string clipped; + if (textWidth > avail && avail > 0.0f) + { + const char* ell = "..."; + const float ellW = ImGui::CalcTextSize(ell).x; + const float target = ImMax(0.0f, avail - ellW); + const int len = static_cast(std::strlen(text)); + int lo = 0; + int hi = len; + while (lo < hi) + { + int mid = (lo + hi + 1) / 2; + float w = ImGui::CalcTextSize(text, text + mid).x; + if (w <= target) + lo = mid; + else + hi = mid - 1; + } + clipped.assign(text, text + lo); + clipped.append(ell); + displayText = clipped.c_str(); + } + const float displayWidth = ImGui::CalcTextSize(displayText).x; + if (displayWidth < avail) + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + (avail - displayWidth)); + ImGui::TextUnformatted(displayText); + showHint(hint); + }; + auto row = [&](const char* left, const char* right, const char* leftHint, const char* rightHint) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(left); + showHint(leftHint); + ImGui::TableSetColumnIndex(1); + rightText(right, rightHint); + }; + + row(symmetryLabel.c_str(), typeLabel.c_str(), "IES symmetry mode.", "IES photometric type."); + row(versionLabel.c_str(), assetLabelPtrs.empty() ? ies.key.c_str() : assetLabelPtrs[activeIx], "IES standard/version.", "Active IES profile file."); + row(bAngles.data(), bRes.data(), "Horizontal and vertical angle count.", "Octahedral map resolution."); + row(bMax.data(), bAvg.data(), "Maximum candela value.", "Average candela value."); + row(bAvgFull.data(), bAngle.data(), "Average candela over full domain.", "Current horizontal angle."); + + ImGui::EndTable(); + } + + ImGui::Separator(); + + const ImVec2 avail = ImGui::GetContentRegionAvail(); + ImVec2 plotSize(0.f, 0.f); + float plotSide = ImMax(0.0f, avail.x); + if (plotSide > 0.0f) + { + plotSize = ImVec2(plotSide, plotSide); + ImVec2 plotPos = ImGui::GetCursorScreenPos(); + { + const std::string modeLabel = nbl::system::to_string(uiState.mode.view); + const char* title = modeLabel.c_str(); + const ImVec2 titleSize = ImGui::CalcTextSize(title); + const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); + ImGui::TextUnformatted(title); + showHint("2D candlepower distribution curve."); + } + + plotPos = ImGui::GetCursorScreenPos(); + ImGui::Image(info, plotSize, ImVec2(0.f, 0.f), ImVec2(1.f, 0.5f)); + const ImVec2 itemMin = ImGui::GetItemRectMin(); + const ImVec2 itemMax = ImGui::GetItemRectMax(); + uiState.plot2DRectMin = float32_t2(itemMin.x, itemMin.y); + uiState.plot2DRectMax = float32_t2(itemMax.x, itemMax.y); + uiState.plot2DRectValid = true; + showHint("2D candlepower distribution curve."); + + ImDrawList* dl = ImGui::GetWindowDrawList(); + + const float pad = 6.f; + const float barWidth = 16.f; + const float sliderH = ImMax(0.f, plotSize.y - pad * 2.f); + const float sliderX = plotPos.x + plotSize.x - barWidth - pad; + const float sliderY = plotPos.y + pad; + + if (sliderH > 0.0f) + { + ImGui::SetCursorScreenPos(ImVec2(sliderX, sliderY)); + ImGui::InvisibleButton("##angle_slider", ImVec2(barWidth, sliderH)); + showHint("Adjust horizontal angle."); + ImVec2 rmin = ImGui::GetItemRectMin(); + ImVec2 rmax = ImGui::GetItemRectMax(); + ImU32 col = IM_COL32(220, 60, 60, 255); + + float knobR = 7.f; + float trackX = rmax.x - barWidth * 0.5f; + float y0 = rmin.y + knobR + 1.f; + float y1 = rmax.y - knobR - 1.f; + + dl->AddLine(ImVec2(trackX, y0), ImVec2(trackX, y1), col, 3.f); + + if (singleAngle) + { + float y = (y0 + y1) * 0.5f; + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + std::array tb{}; + std::snprintf(tb.data(), tb.size(), "%.0f", lowerBound); + ImVec2 ts = ImGui::CalcTextSize(tb.data()); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb.data()); + } + else + { + for (int i = 0; i < 5; ++i) + { + float v = lowerBound + (upperBound - lowerBound) * (float(i) / 4.f); + float t = (v - lowerBound) / (upperBound - lowerBound); + float y = y1 - t * (y1 - y0); + dl->AddLine(ImVec2(trackX - 22.f, y), ImVec2(trackX - 8.f, y), ImGui::GetColorU32(ImGuiCol_Text)); + std::array tb{}; + std::snprintf(tb.data(), tb.size(), "%.0f", v); + ImVec2 ts = ImGui::CalcTextSize(tb.data()); + dl->AddText(ImVec2(trackX - 24.f - ts.x, y - ts.y * 0.5f), ImGui::GetColorU32(ImGuiCol_Text), tb.data()); + } + } + + float t = singleAngle ? 0.5f : (angle - lowerBound) / (upperBound - lowerBound); + float knobY = y1 - t * (y1 - y0); + dl->AddCircleFilled(ImVec2(trackX, knobY), knobR, col); + dl->AddCircle(ImVec2(trackX, knobY), knobR, ImGui::GetColorU32(ImGuiCol_Border)); + + if (!singleAngle && (ImGui::IsItemHovered() || ImGui::IsItemActive()) && ImGui::IsMouseDown(0)) + { + float my = ImClamp(ImGui::GetIO().MousePos.y, y0, y1); + float nt = (y1 - my) / (y1 - y0); + angle = lowerBound + nt * (upperBound - lowerBound); + } + } + } + + if (plotSize.x > 0.0f && plotSize.y > 0.0f && uiState.showOctaMapPreview) + { + ImGui::Spacing(); + { + const char* title = "Octahedral Map"; + const ImVec2 titleSize = ImGui::CalcTextSize(title); + const float titleX = ImMax(0.0f, (ImGui::GetContentRegionAvail().x - titleSize.x) * 0.5f); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + titleX); + ImGui::TextUnformatted(title); + showHint("Octahedral map preview."); + } + ImGui::Image(info, plotSize, ImVec2(0.f, 0.5f), ImVec2(1.f, 1.f)); + showHint("Octahedral map preview."); + } + + ImGui::Separator(); + draw3DControls(); + ImGui::Separator(); + + if (!assetLabelPtrs.empty()) + { + ImGui::TextUnformatted("profile"); + ImGui::SameLine(); + if (ImGui::ArrowButton("##profile_prev", ImGuiDir_Up)) + { + activeIx = (activeIx + assetLabelPtrs.size() - 1u) % assetLabelPtrs.size(); + activeIxUi = static_cast(activeIx); + } + ImGui::SameLine(); + if (ImGui::ArrowButton("##profile_next", ImGuiDir_Down)) + { + activeIx = (activeIx + 1u) % assetLabelPtrs.size(); + activeIxUi = static_cast(activeIx); + } + showHint("Select active IES profile. Use up/down arrows."); + ImGui::NewLine(); + ImGui::SetNextItemWidth(ImGui::GetContentRegionAvail().x); + if (ImGui::Combo("##profile", &activeIxUi, assetLabelPtrs.data(), static_cast(assetLabelPtrs.size()))) + activeIx = static_cast(activeIxUi); + showHint("Select active IES profile."); + } + } + ImGui::End(); + + ies.zDegree = angle; + uiState.activeAssetIx = activeIx; + // 3D plot + { + info.textureID += device_base_t::MaxFramesInFlight; + + { + ImGui::SetNextWindowPos(bottomPos, ImGuiCond_Always); + ImGui::SetNextWindowSize(bottomSize, ImGuiCond_Always); + + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.f); + + ImGuiWindowFlags imgFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoBringToFrontOnFocus | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse; + + if (ImGui::Begin("3D Plot", nullptr, imgFlags)) + { + const ImVec2 avail = ImGui::GetContentRegionAvail(); + const ImVec2 plotSize(ImMax(0.0f, avail.x), ImMax(0.0f, avail.y)); + ImVec2 imgPos = ImGui::GetCursorScreenPos(); + ImGui::Image(info, plotSize); + plotRectMin = ImGui::GetItemRectMin(); + plotRectMax = ImGui::GetItemRectMax(); + plotRectValid = true; + plotHovered = ImGui::IsItemHovered(); + + const float margin = 8.0f; + const float barWidth = 16.0f; + const float barHeight = ImMax(80.0f, plotSize.y - margin * 2.0f); + if (plotSize.x > barWidth + margin * 2.0f && plotSize.y > margin * 2.0f) + { + const bool useFalseColorLegend = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_FALSE_COLOR); + ImVec2 barMin(imgPos.x + plotSize.x - barWidth - margin, imgPos.y + margin); + ImVec2 barMax(barMin.x + barWidth, barMin.y + barHeight); + + ImDrawList* dl = ImGui::GetWindowDrawList(); + const int steps = 64; + for (int i = 0; i < steps; ++i) + { + const float t0 = float(i) / float(steps); + const float t1 = float(i + 1) / float(steps); + const float y0 = barMin.y + (1.0f - t1) * barHeight; + const float y1 = barMin.y + (1.0f - t0) * barHeight; + const float v = (t0 + t1) * 0.5f; + const ImU32 col = legendColor(v, useFalseColorLegend); + dl->AddRectFilled(ImVec2(barMin.x, y0), ImVec2(barMax.x, y1), col); + } + dl->AddRect(barMin, barMax, ImGui::GetColorU32(ImGuiCol_Border)); + + const ImU32 textCol = ImGui::GetColorU32(ImGuiCol_Text); + for (uint32_t i = 0u; i < hlsl::this_example::ies::FalseColorStopCount; ++i) + { + const float stop = hlsl::this_example::ies::falseColorStop(i); + const float y = barMin.y + (1.0f - stop) * barHeight; + dl->AddLine(ImVec2(barMin.x - 4.0f, y), ImVec2(barMin.x, y), textCol); + const float cdValue = stop * properties.maxCandelaValue; + std::array label{}; + std::snprintf(label.data(), label.size(), "%.0f cd", cdValue); + ImVec2 labelSize = ImGui::CalcTextSize(label.data()); + dl->AddText(ImVec2(barMin.x - labelSize.x - 6.0f, y - labelSize.y * 0.5f), textCol, label.data()); + } + } + } + ImGui::End(); + + ImGui::PopStyleVar(2); + } + } + + if (plotRectValid && plotHovered && activeIx < m_assets.size()) + { + const float plotW = plotRectMax.x - plotRectMin.x; + const float plotH = plotRectMax.y - plotRectMin.y; + const ImVec2 mousePos = ImGui::GetIO().MousePos; + if (plotW > 1.0f && plotH > 1.0f && + mousePos.x >= plotRectMin.x && mousePos.x <= plotRectMax.x && + mousePos.y >= plotRectMin.y && mousePos.y <= plotRectMax.y) + { + const auto& iesCandela = m_assets[activeIx]; + const auto* profileCandela = iesCandela.getProfile(); + const auto& accessorCandela = profileCandela->getAccessor(); + const auto& resolutionCandela = accessorCandela.properties.optimalIESResolution; + + const float u = (mousePos.x - plotRectMin.x) / plotW; + const float v = (mousePos.y - plotRectMin.y) / plotH; + const float ndcX = u * 2.0f - 1.0f; + const float ndcY = v * 2.0f - 1.0f; + + float32_t4x4 viewProj = camera.getConcatenatedMatrix(); + const auto invViewProj = inverse(viewProj); + + const float32_t4 nearPoint(ndcX, ndcY, 0.0f, 1.0f); + const float32_t4 farPoint(ndcX, ndcY, 1.0f, 1.0f); + auto nearWorld = mul(invViewProj, nearPoint); + auto farWorld = mul(invViewProj, farPoint); + nearWorld /= nearWorld.w; + farWorld /= farWorld.w; + + using core_vec_t = std::remove_cv_t>; + const auto toHlslVec3 = [](const core_vec_t& v) + { + return float32_t3(v.x, v.y, v.z); + }; + + const float32_t3 origin = toHlslVec3(camera.getPosition()); + const float32_t3 farPos = float32_t3(farWorld); + float32_t3 direction = normalize(farPos - origin); + + float32_t3 hitPos(0.f); + bool hit = false; + const bool cubePlot = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_CUBE); + if (cubePlot) + { + float tmin = -1.0e20f; + float tmax = 1.0e20f; + auto update = [&](float originAxis, float dirAxis) -> bool + { + const float eps = 1.0e-6f; + if (abs(dirAxis) < eps) + { + if (originAxis < -m_plotRadius || originAxis > m_plotRadius) + return false; + return true; + } + float t1 = (-m_plotRadius - originAxis) / dirAxis; + float t2 = (m_plotRadius - originAxis) / dirAxis; + if (t1 > t2) + { + float tmp = t1; + t1 = t2; + t2 = tmp; + } + tmin = hlsl::max(tmin, t1); + tmax = hlsl::min(tmax, t2); + return tmin <= tmax; + }; + + if (update(origin.x, direction.x) && update(origin.y, direction.y) && update(origin.z, direction.z)) + { + const float t = (tmax < 0.0f) ? tmin : tmax; + if (t >= 0.0f) + { + hitPos = origin + direction * t; + hit = true; + } + } + } + else + { + const float b = dot(origin, direction); + const float c = dot(origin, origin) - m_plotRadius * m_plotRadius; + const float disc = b * b - c; + if (disc >= 0.0f) + { + const float sqrtDisc = sqrt(disc); + const float tFar = -b + sqrtDisc; + const float tNear = -b - sqrtDisc; + const float t = (tFar < 0.0f) ? tNear : tFar; + if (t >= 0.0f) + { + hitPos = origin + direction * t; + hit = true; + } + } + } + + if (hit) + { + using octahedral_t = math::OctahedralTransform; + const float32_t3 dir = normalize(hitPos); + const uint32_t resX = resolutionCandela.x; + const uint32_t resY = resolutionCandela.y; + if (resX > 0u && resY > 0u) + { + const float32_t2 res(static_cast(resX), static_cast(resY)); + const float32_t2 halfMinusHalfPixel = float32_t2(0.5f, 0.5f) - float32_t2(0.5f, 0.5f) / res; + float32_t2 uv = octahedral_t::dirToUV(dir, halfMinusHalfPixel); + const bool interpolateCandela = uiState.mode.sphere.hasFlags(hlsl::this_example::ies::ESM_OCTAHEDRAL_UV_INTERPOLATE); + if (!interpolateCandela) + { + const auto pixel = floor(uv * res); + uv = (pixel + float32_t2(0.5f, 0.5f)) / res; + } + + const auto texture = CIESProfile::texture_t::create(accessorCandela.properties.maxCandelaValue, resolutionCandela); + const float normalized = texture.__call(accessorCandela, uv); + candelaValue = texture.info.maxValueRecip > 0.0f ? (normalized / texture.info.maxValueRecip) : 0.0f; + candelaValid = true; + } + } + } + } + + if (candelaValid && !uiState.cameraControlEnabled) + { + ImGui::BeginTooltip(); + ImGui::Text("candela: %.3f cd", candelaValue); + ImGui::EndTooltip(); + } +} + + + diff --git a/50.IESViewer/CMakeLists.txt b/50.IESViewer/CMakeLists.txt new file mode 100644 index 000000000..2bf83045f --- /dev/null +++ b/50.IESViewer/CMakeLists.txt @@ -0,0 +1,75 @@ +if(NBL_BUILD_IMGUI) +set(SRCs + AppInit.cpp AppRender.cpp AppGPU.cpp AppUI.cpp AppEvent.cpp AppInputParser.cpp + App.hpp AppInputParser.hpp + IES.cpp IES.hpp + CSimpleIESRenderer.hpp + inputs.json +) + +set(LIBs + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle +) + +nbl_create_executable_project("${SRCs}" "" "" "${LIBs}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE nlohmann_json::nlohmann_json) +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/false_color.hlsl + app_resources/imgui.opts.hlsl + app_resources/ies.unified.hlsl + app_resources/imgui.unified.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/ies.unified.hlsl", + "KEY": "ies.unified" + }, + { + "INPUT": "app_resources/imgui.unified.hlsl", + "KEY": "imgui.unified" + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to imgui ext headers which are not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) +endif() diff --git a/50.IESViewer/CSimpleIESRenderer.hpp b/50.IESViewer/CSimpleIESRenderer.hpp new file mode 100644 index 000000000..d5614aa7a --- /dev/null +++ b/50.IESViewer/CSimpleIESRenderer.hpp @@ -0,0 +1,427 @@ +#ifndef _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ +#define _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ + +// NOTE: this is CSimpleDebugRenderer with dirty updates, not meant to be used outside the example + +#include "nbl/examples/examples.hpp" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "app_resources/common.hlsl" +#include + +namespace nbl::examples +{ + +class CSimpleIESRenderer final : public core::IReferenceCounted +{ +#define EXPOSE_NABLA_NAMESPACES \ + using namespace nbl::core; \ + using namespace nbl::system; \ + using namespace nbl::asset; \ + using namespace nbl::video + + public: + // + constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0; + // + struct SViewParams + { + inline SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj) + { + view = _view; + viewProj = _viewProj; + using namespace nbl::hlsl; + normal = transpose(inverse(float32_t3x3(view))); + } + + inline auto computeForInstance(hlsl::float32_t3x4 world) const + { + using namespace nbl::hlsl; + hlsl::this_example::SInstanceMatrices retval = { + .worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj),float64_t3x4(world))) + }; + const auto sub3x3 = mul(float64_t3x3(viewProj),float64_t3x3(world)); + retval.normal = float32_t3x3(transpose(inverse(sub3x3))); + return retval; + } + + hlsl::float32_t3x4 view; + hlsl::float32_t4x4 viewProj; + hlsl::float32_t3x3 normal; + }; + + struct SIESParams + { + hlsl::float32_t radius = 1.f; + IGPUDescriptorSet* ds = nullptr; + uint16_t texID = 0u; + uint16_t mode = hlsl::this_example::ies::ESM_NONE; + bool wireframe = false; + }; + // + struct SPackedGeometry + { + core::smart_refctd_ptr pipeline = {}; + asset::SBufferBinding indexBuffer = {}; + uint32_t elementCount = 0; + // indices into the descriptor set + constexpr static inline auto MissingView = hlsl::this_example::ies::SpherePC::DescriptorCount; + uint16_t positionView = MissingView; + uint16_t normalView = MissingView; + asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN; + }; + // + struct SInstance + { + using SPushConstants = hlsl::this_example::ies::SpherePC; + inline SPushConstants computePushConstants(const SViewParams& viewParams, const SIESParams& iesParams) const + { + using namespace hlsl; + return { + .matrices = viewParams.computeForInstance(world), + .positionView = packedGeo->positionView, + .normalView = packedGeo->normalView, + .radius = iesParams.radius, + .mode = iesParams.mode, + .texIx = iesParams.texID + }; + } + + hlsl::float32_t3x4 world; + const SPackedGeometry* packedGeo; + }; + + // + constexpr static inline auto DefaultPolygonGeometryPatch = []()->video::CAssetConverter::patch_t + { + // we want to use the vertex data through UTBs + using usage_f = video::IGPUBuffer::E_USAGE_FLAGS; + video::CAssetConverter::patch_t patch = {}; + patch.positionBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + patch.indexBufferUsages = usage_f::EUF_INDEX_BUFFER_BIT; + patch.otherBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + return patch; + }(); + + // + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, core::smart_refctd_ptr iesDSLayout, video::IGPURenderpass* renderpass, const uint32_t subpassIX) + { + EXPOSE_NABLA_NAMESPACES; + + if (!renderpass) + return nullptr; + auto device = const_cast(renderpass->getOriginDevice()); + auto logger = device->getLogger(); + + if (not precompiled) + return nullptr; + smart_refctd_ptr shader = precompiled; + + SInitParams init; + + // create descriptor set + { + // create Descriptor Set Layout + smart_refctd_ptr dsLayout; + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + const IGPUDescriptorSetLayout::SBinding bindings[] = + { + { + .binding = VertexAttrubUTBDescBinding, + .type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + // need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable + .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT|binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT |binding_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX|IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = SPackedGeometry::MissingView + } + }; + dsLayout = device->createDescriptorSetLayout(bindings); + if (!dsLayout) + { + logger->log("Could not create descriptor set layout!",ILogger::ELL_ERROR); + return nullptr; + } + } + + // create Descriptor Set + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT,{&dsLayout.get(),1}); + auto ds = pool->createDescriptorSet(std::move(dsLayout)); + if (!ds) + { + logger->log("Could not descriptor set!",ILogger::ELL_ERROR); + return nullptr; + } + init.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + } + + // create pipeline layout + const SPushConstantRange ranges[] = {{ + .stageFlags = hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT, + .offset = offsetof(hlsl::this_example::ies::PushConstants, sphere), + .size = sizeof(SInstance::SPushConstants), + }}; + init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr(iesDSLayout), smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); + + // create pipelines + using pipeline_e = SInitParams::PipelineType; + { + IGPUGraphicsPipeline::SCreationParams params[pipeline_e::Count] = {}; + params[pipeline_e::SphereTriangleStrip].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; + params[pipeline_e::SphereTriangleStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "SpherePS" }; + params[pipeline_e::SphereTriangleStripWire].vertexShader = { .shader = shader.get(),.entryPoint = "SphereVS" }; + params[pipeline_e::SphereTriangleStripWire].fragmentShader = { .shader = shader.get(),.entryPoint = "SpherePS" }; + for (auto i=0; i(i); + switch (type) + { + case pipeline_e::SphereTriangleStrip: + primitiveAssembly.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_STRIP; + break; + case pipeline_e::SphereTriangleStripWire: + primitiveAssembly.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_STRIP; + rasterization.polygonMode = EPM_LINE; + break; + default: + assert(false); + break; + } + primitiveAssembly.primitiveRestartEnable = false; + rasterization.faceCullingMode = EFCM_NONE; + rasterization.depthWriteEnable = true; + rasterization.depthCompareOp = ECO_GREATER; + params[i].cached.subpassIx = subpassIX; + params[i].renderpass = renderpass; + } + if (!device->createGraphicsPipelines(nullptr,params,init.pipelines)) + { + logger->log("Could not create Graphics Pipelines!",ILogger::ELL_ERROR); + return nullptr; + } + } + + return smart_refctd_ptr(new CSimpleIESRenderer(std::move(init)),dont_grab); + } + + // + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr precompiled, core::smart_refctd_ptr iesDSLayout, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span geometries) + { + auto retval = create(precompiled, iesDSLayout, renderpass, subpassIX); + if (retval) + retval->addGeometries(geometries); + return retval; + } + + // + struct SInitParams + { + enum PipelineType : uint8_t + { + SphereTriangleStrip, + SphereTriangleStripWire, + Count + }; + + core::smart_refctd_ptr subAllocDS; + core::smart_refctd_ptr layout; + core::smart_refctd_ptr pipelines[PipelineType::Count]; + }; + inline const SInitParams& getInitParams() const {return m_params;} + + // + inline bool addGeometries(const std::span geometries) + { + EXPOSE_NABLA_NAMESPACES; + if (geometries.empty()) + return false; + auto device = const_cast(m_params.layout->getOriginDevice()); + + std::vector writes; + std::vector infos; + bool anyFailed = false; + auto allocateUTB = [&](const IGeometry::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) + { + if (!view) + return SPackedGeometry::MissingView; + auto index = SubAllocatedDescriptorSet::invalid_value; + if (m_params.subAllocDS->multi_allocate(VertexAttrubUTBDescBinding,1,&index)!=0) + { + anyFailed = true; + return SPackedGeometry::MissingView; + } + const auto infosOffset = infos.size(); + infos.emplace_back().desc = device->createBufferView(view.src,view.composed.format); + writes.emplace_back() = { + .dstSet = m_params.subAllocDS->getDescriptorSet(), + .binding = VertexAttrubUTBDescBinding, + .arrayElement = index, + .count = 1, + .info = reinterpret_cast(infosOffset) + }; + return index; + }; + if (anyFailed) + device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!",system::ILogger::ELL_ERROR); + + auto sizeToSet = m_geoms.size(); + auto resetGeoms = core::makeRAIIExiter([&]()->void + { + for (auto& write : writes) + immediateDealloc(write.arrayElement); + m_geoms.resize(sizeToSet); + } + ); + for (const auto geom : geometries) + { + // could also check device origin on all buffers + if (!geom->valid()) + return false; + auto& out = m_geoms.emplace_back(); + using pipeline_e = SInitParams::PipelineType; + switch (geom->getIndexingCallback()->knownTopology()) + { + case E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_STRIP: + out.pipeline = m_params.pipelines[pipeline_e::SphereTriangleStrip]; + break; + default: + assert(false); + break; + } + if (const auto& view=geom->getIndexView(); view) + { + out.indexBuffer.offset = view.src.offset; + out.indexBuffer.buffer = view.src.buffer; + switch (view.composed.format) + { + case E_FORMAT::EF_R16_UINT: + out.indexType = EIT_16BIT; + break; + case E_FORMAT::EF_R32_UINT: + out.indexType = EIT_32BIT; + break; + default: + return false; + } + } + out.elementCount = geom->getVertexReferenceCount(); + out.positionView = allocateUTB(geom->getPositionView()); + out.normalView = allocateUTB(geom->getNormalView()); + } + + // no geometry + if (infos.empty()) + return false; + + // unbase our pointers + for (auto& write : writes) + write.info = infos.data()+reinterpret_cast(write.info); + if (!device->updateDescriptorSets(writes,{})) + return false; + + // retain + writes.clear(); + sizeToSet = m_geoms.size(); + return true; + } + + // + inline void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info) + { + EXPOSE_NABLA_NAMESPACES; + if (ix>=m_geoms.size()) + return; + + std::vector deferredFree; + deferredFree.reserve(3); + auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void + { + if (index>=SPackedGeometry::MissingView) + return; + if (info.semaphore) + deferredFree.push_back(index); + else + immediateDealloc(index); + }; + auto geo = m_geoms.begin() + ix; + deallocate(geo->positionView); + deallocate(geo->normalView); + m_geoms.erase(geo); + + if (deferredFree.empty()) + return; + m_params.subAllocDS->multi_deallocate(VertexAttrubUTBDescBinding,deferredFree.size(),deferredFree.data(),info); + } + + // + inline void clearGeometries(const video::ISemaphore::SWaitInfo& info) + { + // back to front to avoid O(n^2) resize + while (!m_geoms.empty()) + removeGeometry(m_geoms.size()-1,info); + } + + // + inline const auto& getGeometries() const {return m_geoms;} + inline auto& getGeometry(const uint32_t ix) {return m_geoms[ix];} + + // + inline void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams, const SIESParams& iesParams) const + { + EXPOSE_NABLA_NAMESPACES; + + cmdbuf->beginDebugMarker("CSimpleIESRenderer::render"); + + const auto* layout = m_params.layout.get(); + + IGPUDescriptorSet* descriptors[] = { iesParams.ds, m_params.subAllocDS->getDescriptorSet() }; + cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS,layout,0,2, descriptors); + + for (const auto& instance : m_instances) + { + const auto* geo = instance.packedGeo; + auto pipeline = geo->pipeline; + if (iesParams.wireframe) + pipeline = m_params.pipelines[SInitParams::PipelineType::SphereTriangleStripWire]; + cmdbuf->bindGraphicsPipeline(pipeline.get()); + const auto pc = instance.computePushConstants(viewParams, iesParams); + cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,offsetof(hlsl::this_example::ies::PushConstants, sphere),sizeof(pc),&pc); + if (geo->indexBuffer) + { + cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType); + cmdbuf->drawIndexed(geo->elementCount,1,0,0,0); + } + else + cmdbuf->draw(geo->elementCount,1,0,0); + } + cmdbuf->endDebugMarker(); + } + + std::vector m_instances; + + protected: + inline CSimpleIESRenderer(SInitParams&& _params) : m_params(std::move(_params)) {} + inline ~CSimpleIESRenderer() + { + // clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed + const_cast(m_params.layout->getOriginDevice())->waitIdle(); + clearGeometries({}); + } + + inline void immediateDealloc(video::SubAllocatedDescriptorSet::value_type index) + { + video::IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + m_params.subAllocDS->multi_deallocate(dummy,VertexAttrubUTBDescBinding,1,&index); + } + + SInitParams m_params; + std::vector m_geoms; +#undef EXPOSE_NABLA_NAMESPACES +}; + +} +#endif // _NBL_EXAMPLES_C_SIMPLE_IES_RENDERER_H_INCLUDED_ diff --git a/50.IESViewer/IES.cpp b/50.IESViewer/IES.cpp new file mode 100644 index 000000000..3c1df172c --- /dev/null +++ b/50.IESViewer/IES.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "IES.hpp" + +const asset::CIESProfile* IES::getProfile() const +{ + auto* meta = bundle.getMetadata(); + if (meta) + return &meta->selfCast()->profile; + + return nullptr; +} + +video::IGPUImage* IES::getActiveImage(E_MODE mode) const +{ + switch (mode) + { + case EM_OCTAHEDRAL_MAP: + return views.candelaOctahedralMap->getCreationParameters().image.get(); + + case EM_CDC: + default: + return nullptr; + } +} diff --git a/50.IESViewer/IES.hpp b/50.IESViewer/IES.hpp new file mode 100644 index 000000000..da9f98b3f --- /dev/null +++ b/50.IESViewer/IES.hpp @@ -0,0 +1,193 @@ +#ifndef _THIS_EXAMPLE_IES_HPP_ +#define _THIS_EXAMPLE_IES_HPP_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/system/to_string.h" + +NBL_EXPOSE_NAMESPACES + +struct IES +{ + enum E_MODE : uint32_t + { + EM_CDC, //! Candlepower Distribution Curve + EM_OCTAHEDRAL_MAP, //! Candela Octahedral Map + + EM_SIZE + }; + + struct + { + smart_refctd_ptr candelaOctahedralMap = nullptr; + } views; + + struct + { + smart_refctd_ptr vAngles = nullptr, hAngles = nullptr, data = nullptr; // allocation per ies + SBufferBinding textureInfo; // shared allocation for all ies + } buffers; + + SAssetBundle bundle; + std::string key; + + float zDegree = 0.f; + + const asset::CIESProfile* getProfile() const; + video::IGPUImage* getActiveImage(E_MODE mode) const; + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, const std::span images) + { + if (images.empty()) + return false; + + if (not cb) + return false; + + using image_memory_barrier_t = IGPUCommandBuffer::SImageMemoryBarrier; + const IGPUImage::SSubresourceRange range = + { + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + + std::vector imageBarriers(images.size()); + + for (uint32_t i = 0; i < imageBarriers.size(); ++i) + { + auto& it = imageBarriers[i] = + { + .barrier = {.dep = {}}, + .image = images[i], + .subresourceRange = range, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = newLayout + }; + + if constexpr (newLayout == IImage::LAYOUT::GENERAL) + { + // READ_ONLY_OPTIMAL -> GENERAL, RW + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + } + else if (newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + { + // GENERAL -> READ_ONLY_OPTIMAL, RO + it.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + it.barrier.dep.srcAccessMask = ACCESS_FLAGS::STORAGE_WRITE_BIT; + it.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS; + it.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; + it.oldLayout = IImage::LAYOUT::GENERAL; + } + + if constexpr (undefined) + it.oldLayout = IImage::LAYOUT::UNDEFINED; // transition for init + } + + return cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {}, .imgBarriers = imageBarriers }); + } + + template + requires(newLayout == IImage::LAYOUT::GENERAL or newLayout == IImage::LAYOUT::READ_ONLY_OPTIMAL) + static inline bool barrier(IGPUCommandBuffer* const cb, video::IGPUImage* image) + { + if (not image) + return false; + + auto in = std::to_array({ image }); + return barrier(cb, in); + } +}; + +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + static std::string __call(const IES::E_MODE mode) + { + switch (mode) + { + case IES::EM_CDC: + return "Candlepower Distribution Curve"; + case IES::EM_OCTAHEDRAL_MAP: + return "Candela Octahedral Map"; + default: + return "ERROR (mode)"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::LuminairePlanesSymmetry symmetry) + { + switch (symmetry) + { + case nbl::asset::CIESProfile::properties_t::ISOTROPIC: + return "ISOTROPIC"; + case nbl::asset::CIESProfile::properties_t::QUAD_SYMETRIC: + return "QUAD_SYMETRIC"; + case nbl::asset::CIESProfile::properties_t::HALF_SYMETRIC: + return "HALF_SYMETRIC"; + case nbl::asset::CIESProfile::properties_t::OTHER_HALF_SYMMETRIC: + return "OTHER_HALF_SYMMETRIC"; + case nbl::asset::CIESProfile::properties_t::NO_LATERAL_SYMMET: + return "NO_LATERAL_SYMMET"; + default: + return "ERROR (symmetry)"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::PhotometricType type) + { + switch (type) + { + case nbl::asset::CIESProfile::properties_t::TYPE_C: + return "TYPE_C"; + case nbl::asset::CIESProfile::properties_t::TYPE_B: + return "TYPE_B"; + case nbl::asset::CIESProfile::properties_t::TYPE_A: + return "TYPE_A"; + case nbl::asset::CIESProfile::properties_t::TYPE_NONE: + default: + return "TYPE_NONE"; + } + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const nbl::asset::CIESProfile::properties_t::Version version) + { + switch (version) + { + case nbl::asset::CIESProfile::properties_t::V_1995: + return "V_1995"; + case nbl::asset::CIESProfile::properties_t::V_2002: + return "V_2002"; + default: + return "V_UNKNOWN"; + } + } +}; +} + +#endif // _THIS_EXAMPLE_IES_HPP_ diff --git a/50.IESViewer/app_resources/common.hlsl b/50.IESViewer/app_resources/common.hlsl new file mode 100644 index 000000000..54a95b9d0 --- /dev/null +++ b/50.IESViewer/app_resources/common.hlsl @@ -0,0 +1,73 @@ +#ifndef _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/ies/profile.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace this_example +{ + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float QuantErrorAdmissible = 1.0f / 1024.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = 256u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupDimension = 16u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIesImages = 6969u; + +struct SInstanceMatrices +{ + float32_t4x4 worldViewProj; + float32_t3x3 normal; +}; + +namespace ies +{ + +struct CdcPC +{ + uint64_t hAnglesBDA; + uint64_t vAnglesBDA; + uint64_t dataBDA; + uint64_t txtInfoBDA; + uint32_t mode : 8; + uint32_t texIx : 24; + uint32_t hAnglesCount; + uint32_t vAnglesCount; + float32_t zAngleDegreeRotation; + nbl::hlsl::ies::ProfileProperties properties; + + float32_t pad; +}; + +enum E_SPHERE_MODE : uint16_t +{ + ESM_NONE = 0, + ESM_OCTAHEDRAL_UV_INTERPOLATE = 1u << 0, + ESM_FALSE_COLOR = 1u << 1, + ESM_CUBE = 1u << 2 +}; + +struct SpherePC +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; + this_example::SInstanceMatrices matrices; + uint32_t positionView : 16; + uint32_t normalView : 16; + float32_t radius; + uint32_t mode : 8; + uint32_t texIx : 24; +}; + +struct PushConstants +{ + CdcPC cdc; + SpherePC sphere; +}; + +} +} +} +} +#endif // _THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/50.IESViewer/app_resources/false_color.hlsl b/50.IESViewer/app_resources/false_color.hlsl new file mode 100644 index 000000000..ffc830ec2 --- /dev/null +++ b/50.IESViewer/app_resources/false_color.hlsl @@ -0,0 +1,74 @@ +#ifndef _THIS_EXAMPLE_FALSE_COLOR_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_FALSE_COLOR_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/tgmath.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace this_example +{ +namespace ies +{ + +NBL_CONSTEXPR_STATIC_INLINE uint32_t FalseColorStopCount = 6u; + +inline float32_t falseColorStop(uint32_t idx) +{ + switch (idx) + { + case 0u: return 0.0f; + case 1u: return 0.15f; + case 2u: return 0.35f; + case 3u: return 0.55f; + case 4u: return 0.75f; + default: return 1.0f; + } +} + +inline float32_t3 falseColor(float32_t v) +{ + v = nbl::hlsl::clamp(v, float32_t(0.0f), float32_t(1.0f)); + v = nbl::hlsl::pow(v, float32_t(0.8f)); + + const float32_t3 c0 = float32_t3(0.0f, 0.0f, 0.0f); + const float32_t3 c1 = float32_t3(0.0f, 0.0f, 0.35f); + const float32_t3 c2 = float32_t3(0.10f, 0.20f, 0.90f); + const float32_t3 c3 = float32_t3(0.70f, 0.05f, 0.80f); + const float32_t3 c4 = float32_t3(1.00f, 0.30f, 1.00f); + const float32_t3 c5 = float32_t3(1.00f, 1.00f, 1.00f); + + if (v < 0.15f) + { + const float32_t t = v / 0.15f; + return c0 + (c1 - c0) * t; + } + else if (v < 0.35f) + { + const float32_t t = (v - 0.15f) / (0.35f - 0.15f); + return c1 + (c2 - c1) * t; + } + else if (v < 0.55f) + { + const float32_t t = (v - 0.35f) / (0.55f - 0.35f); + return c2 + (c3 - c2) * t; + } + else if (v < 0.75f) + { + const float32_t t = (v - 0.55f) / (0.75f - 0.55f); + return c3 + (c4 - c3) * t; + } + else + { + const float32_t t = (v - 0.75f) / (1.0f - 0.75f); + return c4 + (c5 - c4) * t; + } +} + +} +} +} +} + +#endif diff --git a/50.IESViewer/app_resources/ies.unified.hlsl b/50.IESViewer/app_resources/ies.unified.hlsl new file mode 100644 index 000000000..fb89b2ed5 --- /dev/null +++ b/50.IESViewer/app_resources/ies.unified.hlsl @@ -0,0 +1,165 @@ +#include "common.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl" +#include "false_color.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::hlsl::this_example; +using namespace nbl::hlsl::this_example::ies; +using namespace nbl::hlsl::ext::FullScreenTriangle; + +[[vk::binding(0, 0)]] Texture2D inIESCandelaImage[MaxIesImages]; +[[vk::binding(0 + 10, 0)]] RWTexture2D outIESCandelaImage[MaxIesImages]; +[[vk::binding(0 + 100, 0)]] SamplerState generalSampler; + +[[vk::binding(0, 1)]] Buffer utbs[SpherePC::DescriptorCount]; +[[vk::push_constant]] PushConstants pc; + +struct Accessor +{ + using angle_t = float32_t; + using candela_t = float32_t; + + candela_t value(const uint32_t2 ij) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.dataBDA) + pc.cdc.vAnglesCount * ij.x + ij.y).deref().load(); } + angle_t vAngle(const uint32_t idx) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.vAnglesBDA) + idx).deref().load(); } + angle_t hAngle(const uint32_t idx) { return (nbl::hlsl::bda::__ptr::create(pc.cdc.hAnglesBDA) + idx).deref().load(); } + uint32_t vAnglesCount() { return pc.cdc.vAnglesCount; } + uint32_t hAnglesCount() { return pc.cdc.hAnglesCount; } + + nbl::hlsl::ies::ProfileProperties getProperties() { return pc.cdc.properties; } +}; + +#include "nbl/builtin/hlsl/ies/texture.hlsl" + +struct SInterpolants +{ + float32_t4 ndc : SV_Position; + float32_t3 latDir : COLOR1; + float32_t2 uv : TEXCOORD0; +}; + +using octahedral_t = math::OctahedralTransform; +using texture_t = nbl::hlsl::ies::SProceduralTexture; + +[shader("vertex")] +SInterpolants SphereVS(uint32_t vIx : SV_VertexID) +{ + uint32_t2 res; + inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); + + const float32_t2 resF = float32_t2(res); + const float32_t2 uv = (float32_t2(vIx % res.x, vIx / res.x) + float32_t2(0.5f, 0.5f)) / resF; + const float32_t2 halfMinusHalfPixel = float32_t2(0.5f, 0.5f) - float32_t2(0.5f, 0.5f) / resF; + + const float32_t3 dir = octahedral_t::uvToDir(uv, halfMinusHalfPixel); + float32_t3 pos = dir; + const bool useCube = (pc.sphere.mode & ESM_CUBE) != 0; + if (useCube) + { + const float32_t3 ad = abs(dir); + const float32_t maxAxis = max(ad.x, max(ad.y, ad.z)); + pos = dir / maxAxis; + } + pos *= pc.sphere.radius; + + SInterpolants o; + o.ndc = math::linalg::promoted_mul(pc.sphere.matrices.worldViewProj, pos); + o.latDir = dir; + o.uv = uv; + + return o; +} + +[shader("pixel")] +float32_t4 SpherePS(SInterpolants input) : SV_Target0 +{ + uint32_t2 res; + inIESCandelaImage[pc.sphere.texIx].GetDimensions(res.x, res.y); + float32_t2 uv = input.uv; + + const bool dontInterpolateUV = (pc.sphere.mode & ESM_OCTAHEDRAL_UV_INTERPOLATE) == 0; + if (dontInterpolateUV) + { + float32_t2 pixel = floor(uv * float32_t2(res)); + uv = (pixel + float32_t2(0.5f, 0.5f)) / float32_t2(res); + } + + float32_t I = inIESCandelaImage[pc.sphere.texIx].SampleLevel(generalSampler, uv, 0.0f).r; + const bool useFalseColor = (pc.sphere.mode & ESM_FALSE_COLOR) != 0; + float32_t3 col = useFalseColor ? falseColor(I) : float32_t3(I, I, I); + + return float32_t4(col, 1.0f); +} + +[numthreads(WorkgroupDimension, WorkgroupDimension, 1)] +[shader("compute")] +void CdcCS(uint32_t3 ID : SV_DispatchThreadID) +{ + uint32_t2 destinationSize; + outIESCandelaImage[pc.cdc.texIx].GetDimensions(destinationSize.x, destinationSize.y); + const uint32_t2 pixelCoordinates = uint32_t2(glsl::gl_GlobalInvocationID().x, glsl::gl_GlobalInvocationID().y); + if (all(pixelCoordinates < destinationSize)) + { + Accessor accessor; + texture_t txt; + nbl::hlsl::ies::IESTextureInfo info = (nbl::hlsl::bda::__ptr::create(pc.cdc.txtInfoBDA) + pc.cdc.texIx).deref().load(); + txt.info = info; + outIESCandelaImage[pc.cdc.texIx][pixelCoordinates] = txt.__call(accessor, pixelCoordinates); + } +} + +float32_t plot(float32_t cand, float32_t pct, float32_t bold) +{ + return smoothstep(pct - 0.005f * bold, pct, cand) - smoothstep(pct, pct + 0.005f * bold, cand); +} + +// vertical cut of IES (i.e. cut by plane x = 0) +float32_t f(float32_t2 uv) +{ + float32_t3 dir = normalize(float32_t3(uv.x, 0.001f, uv.y)); + if (pc.cdc.zAngleDegreeRotation != 0.f) + { + float32_t rad = radians(pc.cdc.zAngleDegreeRotation); + float32_t s = sin(rad); + float32_t c = cos(rad); + + dir = float32_t3( + c * dir.x - s * dir.y, + s * dir.x + c * dir.y, + dir.z + ); + } + + uint32_t2 res; + inIESCandelaImage[pc.cdc.texIx].GetDimensions(res.x, res.y); + float32_t2 halfMinusHalfPixel = 0.5f - 0.5f / float32_t2(res); + float32_t2 uvOcta = octahedral_t::dirToUV(dir, halfMinusHalfPixel); + + return inIESCandelaImage[pc.cdc.texIx].SampleLevel(generalSampler, uvOcta, 0u).x; +} + +[shader("pixel")] +float32_t4 CdcPS(SVertexAttributes input) : SV_Target0 +{ + switch (pc.cdc.mode) + { + case 0: + { + float32_t2 ndc = input.uv * 2.f - 1.f; + float32_t dist = length(ndc) * 1.015625f; + float32_t p = plot(dist, 1.0f, 0.75f); + float32_t3 col = float32_t3(p, p, p); + + float32_t normalizedStrength = f(ndc); + if (dist < normalizedStrength) + col += float32_t3(1.0f, 0.0f, 0.0f); + + return float32_t4(col, 1.0f); + } + case 1: + return float32_t4(inIESCandelaImage[pc.cdc.texIx].Sample(generalSampler, input.uv).x, 0.f, 0.f, 1.f); + default: + return float32_t4(0.f, 0.f, 0.f, 0.f); + } +} diff --git a/50.IESViewer/app_resources/imgui.opts.hlsl b/50.IESViewer/app_resources/imgui.opts.hlsl new file mode 100644 index 000000000..fc5cf0fb0 --- /dev/null +++ b/50.IESViewer/app_resources/imgui.opts.hlsl @@ -0,0 +1,16 @@ +#ifndef _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ +#define _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#define NBL_TEXTURES_BINDING_IX 0u +#define NBL_SAMPLER_STATES_BINDING_IX 1u +#define NBL_TEXTURES_SET_IX 0u +#define NBL_SAMPLER_STATES_SET_IX 0u +#define NBL_TEXTURES_COUNT 10u +#define NBL_SAMPLERS_COUNT 2u + +#endif // _THIS_EXAMPLE_IMGUI_OPTS_HLSL_INCLUDED_ + diff --git a/50.IESViewer/app_resources/imgui.unified.hlsl b/50.IESViewer/app_resources/imgui.unified.hlsl new file mode 100644 index 000000000..03e5624b0 --- /dev/null +++ b/50.IESViewer/app_resources/imgui.unified.hlsl @@ -0,0 +1,7 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "imgui.opts.hlsl" // bindings info +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" // pixel entry point +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" // vertex entry point diff --git a/50.IESViewer/inputs.json b/50.IESViewer/inputs.json new file mode 100644 index 000000000..fbb833112 --- /dev/null +++ b/50.IESViewer/inputs.json @@ -0,0 +1,14 @@ +{ + "directories": [ + "mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" + ], + "files": [ + "mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", + "mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + ], + "gui": true, + "writeAssets": false +} \ No newline at end of file diff --git a/50.IESViewer/main.cpp b/50.IESViewer/main.cpp new file mode 100644 index 000000000..ca44888ef --- /dev/null +++ b/50.IESViewer/main.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "App.hpp" + +IESViewer::IESViewer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ AppWindowWidth, AppWindowHeight }, AppDepthBufferFormat, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) +{ + +} + +NBL_MAIN_FUNC(IESViewer) diff --git a/59_QuaternionTests/CMakeLists.txt b/59_QuaternionTests/CMakeLists.txt new file mode 100644 index 000000000..84152c9b8 --- /dev/null +++ b/59_QuaternionTests/CMakeLists.txt @@ -0,0 +1,68 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/quaternionTest.comp.hlsl", + "KEY": "quaternionTest", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h new file mode 100644 index 000000000..89478d1ad --- /dev/null +++ b/59_QuaternionTests/CQuaternionTester.h @@ -0,0 +1,186 @@ +#ifndef _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_59_QUATERNION_TESTER_INCLUDED_ + +#define GLM_FORCE_RADIANS +#include +#include +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + +#include "nbl/examples/examples.hpp" +#include "app_resources/common.hlsl" +#include "nbl/examples/Tester/ITester.h" +#include +#include + +using namespace nbl; + +class CQuaternionTester final : public ITester +{ + using base_t = ITester; + +public: + CQuaternionTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; + +private: + QuaternionInputTestValues generateInputTestValues() override + { + std::uniform_real_distribution realDistribution(-1.0f, 1.0f); + std::uniform_real_distribution realDistribution01(0.0f, 1.0f); + std::uniform_real_distribution realDistributionRad(-numbers::pi, numbers::pi); + + QuaternionInputTestValues testInput; + testInput.axis = hlsl::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.angle = realDistributionRad(getRandomEngine()); + testInput.quat0 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat0 = hlsl::normalize(testInput.quat0); + testInput.quat1 = math::quaternion::create(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())), realDistribution(getRandomEngine())); + testInput.quat1 = hlsl::normalize(testInput.quat1); + testInput.quat2 = testInput.quat0 * realDistribution(getRandomEngine()) * 1000.f; + testInput.quat3 = testInput.quat1 * realDistribution(getRandomEngine()) * 1000.f; + testInput.pitch = realDistributionRad(getRandomEngine()); + testInput.yaw = realDistributionRad(getRandomEngine()); + testInput.roll = realDistributionRad(getRandomEngine()); + testInput.rotationMat = float32_t3x3(glm::rotate(realDistributionRad(getRandomEngine()), hlsl::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))))); + testInput.scaleFactor = realDistribution01(getRandomEngine()) * 1000.f; + + testInput.scaleRotationMat = testInput.rotationMat; + testInput.scaleRotationMat *= testInput.scaleFactor; + + testInput.interpolationFactor = realDistribution01(getRandomEngine()); + testInput.someVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + return testInput; + } + + QuaternionTestValues determineExpectedResults(const QuaternionInputTestValues& testInput) override + { + const auto glmquat0 = glm::quat(testInput.quat0.data.w, testInput.quat0.data.x, testInput.quat0.data.y, testInput.quat0.data.z); + const auto glmquat1 = glm::quat(testInput.quat1.data.w, testInput.quat1.data.x, testInput.quat1.data.y, testInput.quat1.data.z); + const auto glmquat2 = glm::quat(testInput.quat2.data.w, testInput.quat2.data.x, testInput.quat2.data.y, testInput.quat2.data.z); + const auto glmquat3 = glm::quat(testInput.quat3.data.w, testInput.quat3.data.x, testInput.quat3.data.y, testInput.quat3.data.z); + + QuaternionTestValues expected; + { + const auto glmquat = glm::angleAxis(testInput.angle, testInput.axis); + expected.quatFromAngleAxis.data.x = glmquat.data.data[0]; + expected.quatFromAngleAxis.data.y = glmquat.data.data[1]; + expected.quatFromAngleAxis.data.z = glmquat.data.data[2]; + expected.quatFromAngleAxis.data.w = glmquat.data.data[3]; + } + { + const auto rotmat = glm::yawPitchRoll(testInput.yaw, testInput.pitch, testInput.roll); + const auto glmquat = glm::quat_cast(rotmat); + expected.quatFromEulerAngles.data.x = glmquat.data.data[0]; + expected.quatFromEulerAngles.data.y = glmquat.data.data[1]; + expected.quatFromEulerAngles.data.z = glmquat.data.data[2]; + expected.quatFromEulerAngles.data.w = glmquat.data.data[3]; + } + { + glm::mat3x3 rotmat; + rotmat[0] = testInput.rotationMat[0]; + rotmat[1] = testInput.rotationMat[1]; + rotmat[2] = testInput.rotationMat[2]; + const auto glmquat = glm::quat_cast(glm::transpose(rotmat)); + expected.quatFromMat.data.x = glmquat.data.data[0]; + expected.quatFromMat.data.y = glmquat.data.data[1]; + expected.quatFromMat.data.z = glmquat.data.data[2]; + expected.quatFromMat.data.w = glmquat.data.data[3]; + + expected.quatFromScaledMat.data = hlsl::normalize(expected.quatFromMat.data) * testInput.scaleFactor; + } + { + const auto rotmat = glm::transpose(glm::mat3_cast(glmquat0)); + expected.rotationMat[0] = rotmat[0]; + expected.rotationMat[1] = rotmat[1]; + expected.rotationMat[2] = rotmat[2]; + } + { + const auto rotmat = transpose(glm::mat3_cast(glmquat2)); + expected.scaleRotationMat[0] = rotmat[0]; + expected.scaleRotationMat[1] = rotmat[1]; + expected.scaleRotationMat[2] = rotmat[2]; + } + { + const auto mult = glmquat0 * glmquat1; + expected.quatMult.data.x = mult.data.data[0]; + expected.quatMult.data.y = mult.data.data[1]; + expected.quatMult.data.z = mult.data.data[2]; + expected.quatMult.data.w = mult.data.data[3]; + } + { + const auto slerped = glm::slerp(glmquat0, glmquat1, testInput.interpolationFactor); + expected.quatSlerp.data.x = slerped.data.data[0]; + expected.quatSlerp.data.y = slerped.data.data[1]; + expected.quatSlerp.data.z = slerped.data.data[2]; + expected.quatSlerp.data.w = slerped.data.data[3]; + + expected.quatFlerp.data = expected.quatSlerp.data; + } + { + const auto mult = glmquat2 * glmquat3; + expected.quatScaledMult.data.x = mult.data.data[0]; + expected.quatScaledMult.data.y = mult.data.data[1]; + expected.quatScaledMult.data.z = mult.data.data[2]; + expected.quatScaledMult.data.w = mult.data.data[3]; + } + expected.transformedVec = glmquat0 * testInput.someVec; + + return expected; + } + + bool verifyTestResults(const QuaternionTestValues& expectedTestValues, const QuaternionTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyVectorTestValue("create from axis angle", expectedTestValues.quatFromAngleAxis.data, testValues.quatFromAngleAxis.data, testIteration, seed, testType, 1e-2, true); + pass &= verifyVectorTestValue("create from Euler angles", expectedTestValues.quatFromEulerAngles.data, testValues.quatFromEulerAngles.data, testIteration, seed, testType, 1e-2, true); + pass &= verifyVectorTestValue("create from rotation matrix", expectedTestValues.quatFromMat.data, testValues.quatFromMat.data, testIteration, seed, testType, 1e-2, true); + pass &= verifyScaledVectorTestValue("create from scale rotation matrix", expectedTestValues.quatFromScaledMat.data, testValues.quatFromScaledMat.data, testIteration, seed, testType, 1e-4, 1e-2); + + pass &= verifyTestValue("construct matrix", expectedTestValues.rotationMat, testValues.rotationMat, testIteration, seed, testType, 1e-2); + pass &= verifyTestValue("construct matrix (scaled)", expectedTestValues.scaleRotationMat, testValues.scaleRotationMat, testIteration, seed, testType, 1e-2); + + pass &= verifyVectorTestValue("multiply quat", expectedTestValues.quatMult.data, testValues.quatMult.data, testIteration, seed, testType, 1e-2, true); + pass &= verifyVectorTestValue("slerp quat", expectedTestValues.quatSlerp.data, testValues.quatSlerp.data, testIteration, seed, testType, 1e-2, true); + pass &= verifyVectorTestValue("flerp quat", expectedTestValues.quatFlerp.data, testValues.quatFlerp.data, testIteration, seed, testType, 1e-1, true); + pass &= verifyTestValue("transform vector", expectedTestValues.transformedVec, testValues.transformedVec, testIteration, seed, testType, 1e-2); + + pass &= verifyScaledVectorTestValue("multiply scaled quat", expectedTestValues.quatScaledMult.data, testValues.quatScaledMult.data, testIteration, seed, testType, 1e-4, 1e-2); + return pass; + } + + template + bool verifyScaledVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + if (nbl::hlsl::testing::orientationCompare(expectedVal, testVal, maxRelativeDifference) && + nbl::hlsl::testing::vectorLengthCompare(expectedVal, testVal, maxAbsoluteDifference, maxRelativeDifference)) + return true; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; + } + + template + bool verifyVectorTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference, const bool testOrientation) + { + if (compareVectorTestValues(expectedVal, testVal, maxAllowedDifference, testOrientation)) + return true; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; + } + + template requires concepts::FloatingPointLikeVectorial + bool compareVectorTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference, const bool testOrientation) + { + if (testOrientation) + return nbl::hlsl::testing::orientationCompare(lhs, rhs, maxAllowedDifference); + return nbl::hlsl::testing::relativeApproxCompare(lhs, rhs, maxAllowedDifference); + } +}; + +#endif diff --git a/59_QuaternionTests/app_resources/common.hlsl b/59_QuaternionTests/app_resources/common.hlsl new file mode 100644 index 000000000..312b037f4 --- /dev/null +++ b/59_QuaternionTests/app_resources/common.hlsl @@ -0,0 +1,65 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_59_QUATERNION_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_59_QUATERNION_COMMON_INCLUDED_ + +#include + +using namespace nbl::hlsl; +struct QuaternionInputTestValues +{ + math::quaternion quat0; + math::quaternion quat1; + math::quaternion quat2; + math::quaternion quat3; + float32_t3 axis; + float angle; + float pitch; + float yaw; + float roll; + float32_t3x3 rotationMat; + float scaleFactor; + float32_t3x3 scaleRotationMat; + float interpolationFactor; + float32_t3 someVec; +}; + +struct QuaternionTestValues +{ + math::quaternion quatFromAngleAxis; + math::quaternion quatFromEulerAngles; + math::quaternion quatFromMat; + math::quaternion quatFromScaledMat; + float32_t3x3 rotationMat; + float32_t3x3 scaleRotationMat; + math::quaternion quatMult; + math::quaternion quatSlerp; + math::quaternion quatFlerp; + math::quaternion quatScaledMult; + float32_t3 transformedVec; +}; + +struct QuaternionTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(QuaternionInputTestValues) input, NBL_REF_ARG(QuaternionTestValues) output) + { + output.quatFromAngleAxis = math::quaternion::create(input.axis, input.angle); + output.quatFromEulerAngles = math::quaternion::create(input.pitch, input.yaw, input.roll); + output.quatFromMat = math::quaternion::create(input.rotationMat); + output.quatFromScaledMat = math::quaternion::create(input.scaleRotationMat); + + output.rotationMat = _static_cast(input.quat0); + output.scaleRotationMat = _static_cast(input.quat2); + + output.quatMult = input.quat0 * input.quat1; + output.quatSlerp = math::quaternion::slerp(input.quat0, input.quat1, input.interpolationFactor); + output.quatFlerp = math::quaternion::flerp(input.quat0, input.quat1, input.interpolationFactor); + output.transformedVec = input.quat0.transformVector(input.someVec, true); + + output.quatScaledMult = input.quat2 * input.quat3; + } +}; + +#endif diff --git a/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl b/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl new file mode 100644 index 000000000..5d3e6577a --- /dev/null +++ b/59_QuaternionTests/app_resources/quaternionTest.comp.hlsl @@ -0,0 +1,19 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "common.hlsl" +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(256, 1, 1)] +[shader("compute")] +void main() +{ + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + QuaternionTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/59_QuaternionTests/main.cpp b/59_QuaternionTests/main.cpp new file mode 100644 index 000000000..00a60aef8 --- /dev/null +++ b/59_QuaternionTests/main.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "app_resources/common.hlsl" + +#include "CQuaternionTester.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +//using namespace glm; + +class QuaternionTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + QuaternionTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + { + CQuaternionTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"quaternionTest">(m_device.get()); + + CQuaternionTester quaternionTester(8); + quaternionTester.setupPipeline(pplnSetupData); + if (!quaternionTester.performTestsAndVerifyResults("QuaternionTestLog.txt")) + return false; + } + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override {} + + bool keepRunning() override { return false; } +}; + +NBL_MAIN_FUNC(QuaternionTest) diff --git a/61_UI/main.cpp b/61_UI/main.cpp index 643cab079..503a2e421 100644 --- a/61_UI/main.cpp +++ b/61_UI/main.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include /* Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. @@ -252,14 +253,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA } // draw scene { - float32_t3x4 viewMatrix; - float32_t4x4 viewProjMatrix; - // TODO: get rid of legacy matrices - { - const auto& camera = interface.camera; - memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); - memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); - } + const auto& camera = interface.camera; + float32_t3x4 viewMatrix = camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = camera.getConcatenatedMatrix(); const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); // tear down scene every frame @@ -570,21 +566,21 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - matrix4SIMD projection; + hlsl::float32_t4x4 projection; if (isPerspective) if(isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); else { float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; if(isLH) - projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); } return projection; @@ -720,33 +716,32 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA * note it also modifies input view matrix but projection matrix is immutable */ -// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases static struct { - core::matrix4SIMD view, projection, model; + hlsl::float32_t4x4 view, projection, model; } imguizmoM16InOut; ImGuizmo::SetID(0u); - imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix())); - imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix()); - imguizmoM16InOut.model = core::transpose(matrix4SIMD(model)); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4,4,3,4>(camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); + imguizmoM16InOut.model = hlsl::transpose(hlsl::math::linalg::promote_affine<4,4,3,4>(model)); { if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ - transformParams.editTransformDecomposition = true; - sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams); + transformParams.editTransformDecomposition = true; + sceneResolution = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); } - model = core::transpose(imguizmoM16InOut.model).extractSub3x4(); + model = hlsl::math::linalg::truncate<3,4,4,4>(hlsl::transpose(imguizmoM16InOut.model)); // to Nabla + update camera & model matrices // TODO: make it more nicely, extract: // - Position by computing inverse of the view matrix and grabbing its translation // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position // But then set the view matrix this way anyway, because up-vector may not be compatible const auto& view = camera.getViewMatrix(); - const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + const_cast(view) = hlsl::math::linalg::truncate<3,4,4,4>(hlsl::transpose(imguizmoM16InOut.view)); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) // update concatanated matrix const auto& projection = camera.getProjectionMatrix(); camera.setProjectionMatrix(projection); @@ -783,9 +778,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA ImGui::Separator(); }; - addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, model.pointer()); - addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer()); - addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false); + addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, &model[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, &view[0][0]); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, &projection[0][0], false); ImGui::End(); } @@ -867,9 +862,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA smart_refctd_ptr subAllocDS; SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); // mutables - core::matrix3x4SIMD model; + hlsl::float32_t3x4 model = hlsl::math::linalg::diagonal(1.0f); std::string_view objectName; TransformRequestParams transformParams; uint16_t2 sceneResolution = {1280,720}; diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index 0928d3b61..7a700b861 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -64,20 +64,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - shaders/globals.hlsl - shaders/runtimeDeviceConfigCaps.hlsl - shaders/main_pipeline/common.hlsl - shaders/main_pipeline/dtm.hlsl - shaders/main_pipeline/fragment.hlsl - shaders/main_pipeline/fragment_shader.hlsl - shaders/main_pipeline/fragment_shader_debug.hlsl - shaders/main_pipeline/line_style.hlsl - shaders/main_pipeline/resolve_alphas.hlsl - shaders/main_pipeline/vertex_shader.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(REQUIRED_CAPS [=[ @@ -113,7 +99,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -129,4 +114,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index af46da896..bd4de23ce 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -30,14 +30,6 @@ else() endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/test.comp.hlsl - app_resources/benchmark/benchmark.comp.hlsl - app_resources/benchmark/common.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -62,7 +54,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -78,4 +69,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index a4f177f16..ea8def7ba 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -42,13 +42,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // since emulated_float64_t rounds to zero std::fesetround(FE_TOWARDZERO); - // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - - // In contrast to fences, we just need one semaphore to rule all dispatches + return true; } @@ -97,10 +95,14 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso auto printOnFailure = [this](EmulatedFloatTestDevice device) { + std::string errorMsgPrefix = ""; if (device == EmulatedFloatTestDevice::CPU) - m_logger->log("CPU test fail:", ILogger::ELL_ERROR); + errorMsgPrefix = "CPU test fail:"; else - m_logger->log("GPU test fail:", ILogger::ELL_ERROR); + errorMsgPrefix = "GPU test fail:"; + + m_logger->log("%s", ILogger::ELL_ERROR, errorMsgPrefix.c_str()); + m_logFile << errorMsgPrefix << '\n'; }; auto printOnArithmeticFailure = [this](const char* valName, uint64_t expectedValue, uint64_t testValue, uint64_t a, uint64_t b) @@ -120,9 +122,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso ss << std::bitset<64>(expectedValue) << " - expectedValue bit pattern\n"; ss << std::bitset<64>(testValue) << " - testValue bit pattern \n"; - m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, ss.str().c_str()); + m_logFile << ss.str() << '\n'; - std::cout << "ULP error: " << std::max(expectedValue, testValue) - std::min(expectedValue, testValue) << "\n\n"; + //std::cout << "ULP error: " << std::max(expectedValue, testValue) - std::min(expectedValue, testValue) << "\n\n"; }; @@ -133,14 +136,18 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso auto printOnComparisonFailure = [this](const char* valName, int expectedValue, int testValue, double a, double b) { - m_logger->log("for input values: A = %f B = %f", ILogger::ELL_ERROR, a, b); + std::string inputValuesStr = std::string("for input values: A = ") + std::to_string(a) + std::string(" B = ") + std::to_string(b); + + m_logger->log("%s", ILogger::ELL_ERROR, inputValuesStr.c_str()); + m_logFile << inputValuesStr << '\n'; std::stringstream ss; ss << valName << " not equal!"; ss << "\nexpected value: " << std::boolalpha << bool(expectedValue); ss << "\ntest value: " << std::boolalpha << bool(testValue); - m_logger->log(ss.str().c_str(), ILogger::ELL_ERROR); + m_logger->log("%s", ILogger::ELL_ERROR, ss.str().c_str()); + m_logFile << ss.str() << '\n'; }; if (calcULPError(expectedValues.int32CreateVal, testValues.int32CreateVal) > 1u) @@ -438,6 +445,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso m_logger->log("Correct GPU determinated values!", ILogger::ELL_PERFORMANCE); }; + m_logFile.open("EmulatedFloatTestLog.txt", std::ios::out | std::ios::trunc); + if (!m_logFile.is_open()) + m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); + printTestOutput("emulatedFloat64RandomValuesTest", emulatedFloat64RandomValuesTest(submitter)); printTestOutput("emulatedFloat64RandomValuesTestContrastingExponents", emulatedFloat64RandomValuesTestContrastingExponents(submitter)); printTestOutput("emulatedFloat64NegAndPosZeroTest", emulatedFloat64NegAndPosZeroTest(submitter)); @@ -450,6 +461,8 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso printTestOutput("emulatedFloat64BNaNTest", emulatedFloat64BNaNTest(submitter)); printTestOutput("emulatedFloat64BInfTest", emulatedFloat64OneValIsZeroTest(submitter)); printTestOutput("emulatedFloat64BNegInfTest", emulatedFloat64OneValIsNegZeroTest(submitter)); + + m_logFile.close(); } template @@ -1171,6 +1184,8 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); return false; } + + std::ofstream m_logFile; }; NBL_MAIN_FUNC(CompatibilityTest) \ No newline at end of file diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index 1fdfc03ce..768379100 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -28,12 +28,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/render.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -54,7 +48,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -70,4 +63,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/67_RayQueryGeometry/include/common.hpp b/67_RayQueryGeometry/include/common.hpp index 84b0a3dcf..ac774b0df 100644 --- a/67_RayQueryGeometry/include/common.hpp +++ b/67_RayQueryGeometry/include/common.hpp @@ -23,9 +23,9 @@ using GeometryCollectionData = core::smart_refctd_ptr; using GeometryData = std::variant; struct ReferenceObjectCpu { - core::matrix3x4SIMD transform; + hlsl::float32_t3x4 transform; GeometryData data; - uint32_t instanceID; + uint32_t instanceID; }; } diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index b35000485..464583352 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include #include "nbl/this_example/builtin/build/spirv/keys.hpp" class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication @@ -199,7 +200,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built { core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1, 1000); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1f, 1000.0f); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } @@ -268,13 +269,10 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const auto projectionMatrix = camera.getProjectionMatrix(); const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + hlsl::float32_t3x4 modelMatrix = hlsl::math::linalg::identity(); - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + hlsl::float32_t4x4 modelViewProjectionMatrix = nbl::hlsl::math::linalg::promoted_mul(viewProjectionMatrix, modelMatrix); + hlsl::float32_t4x4 invModelViewProjectionMatrix = hlsl::inverse(modelViewProjectionMatrix); auto* queue = getGraphicsQueue(); @@ -307,7 +305,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const core::vector3df camPos = camera.getPosition().getAsVector3df(); pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + pc.invMVP = invModelViewProjectionMatrix; pc.scaleNDC = { 2.f / WIN_W, -2.f / WIN_H }; pc.offsetNDC = { -1.f, 1.f }; @@ -496,8 +494,8 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built auto transform_i = 0; auto nextTransform = [&transform_i]() { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(5.f * transform_i, 0, 0, 0)); + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + hlsl::math::linalg::setTranslation(transform, hlsl::float32_t3(5.f * transform_i, 0.0f, 0.0f)); transform_i++; return transform; }; @@ -983,7 +981,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); video::CDumbPresentationOracle oracle; smart_refctd_ptr geometryInfoBuffer; diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt index 842492167..96eb752c3 100644 --- a/70_FLIPFluids/CMakeLists.txt +++ b/70_FLIPFluids/CMakeLists.txt @@ -24,26 +24,6 @@ if(NBL_EMBED_BUILTIN_RESOURCES) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/compute/advectParticles.comp.hlsl - app_resources/compute/applyBodyForces.comp.hlsl - app_resources/compute/diffusion.comp.hlsl - app_resources/compute/genParticleVertices.comp.hlsl - app_resources/compute/particlesInit.comp.hlsl - app_resources/compute/prepareCellUpdate.comp.hlsl - app_resources/compute/pressureSolver.comp.hlsl - app_resources/compute/updateFluidCells.comp.hlsl - app_resources/cellUtils.hlsl - app_resources/common.hlsl - app_resources/descriptor_bindings.hlsl - app_resources/fluidParticles.fragment.hlsl - app_resources/fluidParticles.vertex.hlsl - app_resources/gridSampling.hlsl - app_resources/gridUtils.hlsl - app_resources/render_common.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -101,7 +81,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} @@ -117,4 +96,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} -) \ No newline at end of file +) diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index a70064245..e7334bff8 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -7,6 +7,7 @@ #include "nbl/examples/examples.hpp" // TODO: why is it not in nabla.h ? #include "nbl/asset/metadata/CHLSLMetadata.h" +#include using namespace nbl; using namespace nbl::core; @@ -232,7 +233,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso float zNear = 0.1f, zFar = 10000.f; core::vectorSIMDf cameraPosition(14, 8, 12); core::vectorSIMDf cameraTarget(0, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); + hlsl::float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), float(WIN_WIDTH) / WIN_HEIGHT, zNear, zFar); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); m_pRenderParams.zNear = zNear; @@ -883,22 +884,20 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso const auto projectionMatrix = camera.getProjectionMatrix(); const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + hlsl::float32_t3x4 modelMatrix = hlsl::math::linalg::identity(); - core::matrix3x4SIMD modelViewMatrix = core::concatenateBFollowedByA(viewMatrix, modelMatrix); - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + hlsl::float32_t3x4 modelViewMatrix = viewMatrix; + hlsl::float32_t4x4 modelViewProjectionMatrix = viewProjectionMatrix; - auto modelMat = core::concatenateBFollowedByA(core::matrix4SIMD(), modelMatrix); + auto modelMat = hlsl::math::linalg::promote_affine<4, 4, 3, 4>(modelMatrix); const core::vector3df camPos = camera.getPosition().getAsVector3df(); camPos.getAs4Values(camData.cameraPosition); - memcpy(camData.MVP, modelViewProjectionMatrix.pointer(), sizeof(camData.MVP)); - memcpy(camData.M, modelMat.pointer(), sizeof(camData.M)); - memcpy(camData.V, viewMatrix.pointer(), sizeof(camData.V)); - memcpy(camData.P, projectionMatrix.pointer(), sizeof(camData.P)); + memcpy(camData.MVP, &modelViewProjectionMatrix[0][0], sizeof(camData.MVP)); + memcpy(camData.M, &modelMat[0][0], sizeof(camData.M)); + memcpy(camData.V, &viewMatrix[0][0], sizeof(camData.V)); + memcpy(camData.P, &projectionMatrix[0][0], sizeof(camData.P)); { camDataRange.buffer = cameraBuffer; camDataRange.size = cameraBuffer->getSize(); @@ -1789,7 +1788,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; - Camera camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), core::matrix4SIMD()); + Camera camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), hlsl::float32_t4x4()); video::CDumbPresentationOracle oracle; bool m_shouldInitParticles = true; diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index d7bb13671..250f7444e 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -11,6 +11,7 @@ if(NBL_BUILD_IMGUI) list(APPEND NBL_LIBRARIES imtestengine "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") @@ -35,23 +36,6 @@ if(NBL_BUILD_IMGUI) endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/light_directional.rcall.hlsl - app_resources/light_point.rcall.hlsl - app_resources/light_spot.rcall.hlsl - app_resources/present.frag.hlsl - app_resources/raytrace.rahit.hlsl - app_resources/raytrace.rchit.hlsl - app_resources/raytrace.rgen.hlsl - app_resources/raytrace.rint.hlsl - app_resources/raytrace.rmiss.hlsl - app_resources/raytrace_procedural.rchit.hlsl - app_resources/raytrace_shadow.rahit.hlsl - app_resources/raytrace_shadow.rmiss.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) set(JSON [=[ @@ -116,7 +100,6 @@ set(COMPILE_OPTIONS NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS ${COMPILE_OPTIONS} diff --git a/71_RayTracingPipeline/include/common.hpp b/71_RayTracingPipeline/include/common.hpp index 6727c879c..e6b538618 100644 --- a/71_RayTracingPipeline/include/common.hpp +++ b/71_RayTracingPipeline/include/common.hpp @@ -26,7 +26,7 @@ struct ReferenceObjectCpu { core::smart_refctd_ptr data; Material material; - core::matrix3x4SIMD transform; + hlsl::float32_t3x4 transform; }; diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index ecaf53b7f..f6b64c5ca 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -9,7 +9,10 @@ #include "nbl/builtin/hlsl/indirect_commands.hlsl" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" - +#include +#include +#include +#include class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -28,17 +31,6 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui "Spot" }; - struct ShaderBindingTable - { - SBufferRange raygenGroupRange; - SBufferRange hitGroupsRange; - uint32_t hitGroupsStride; - SBufferRange missGroupsRange; - uint32_t missGroupsStride; - SBufferRange callableGroupsRange; - uint32_t callableGroupsStride; - }; - public: inline RaytracingPipelineApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) @@ -476,9 +468,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui m_camera.setProjectionMatrix([&]() { - static matrix4SIMD projection; + static hlsl::float32_t4x4 projection; - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(m_cameraSetting.fov), io.DisplaySize.x / io.DisplaySize.y, m_cameraSetting.zNear, @@ -542,9 +534,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // Set Camera { core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( core::radians(60.0f), - WIN_W / WIN_H, + float(WIN_W / WIN_H), 0.01f, 500.0f ); @@ -620,18 +612,15 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto projectionMatrix = m_camera.getProjectionMatrix(); const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); + //hlsl::float32_t3x4 modelMatrix; - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + hlsl::float32_t4x4 modelViewProjectionMatrix = viewProjectionMatrix; if (m_cachedModelViewProjectionMatrix != modelViewProjectionMatrix) { m_frameAccumulationCounter = 0; m_cachedModelViewProjectionMatrix = modelViewProjectionMatrix; } - core::matrix4SIMD invModelViewProjectionMatrix; - modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + hlsl::float32_t4x4 invModelViewProjectionMatrix = hlsl::inverse(modelViewProjectionMatrix); { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; @@ -665,29 +654,16 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui pc.frameCounter = m_frameAccumulationCounter; const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); pc.camPos = { camPos.X, camPos.Y, camPos.Z }; - memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + pc.invMVP = invModelViewProjectionMatrix; cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); if (m_useIndirectCommand) - { - cmdbuf->traceRaysIndirect( - SBufferBinding{ - .offset = 0, - .buffer = m_indirectBuffer, - }); - } + cmdbuf->traceRaysIndirect({.offset=0,.buffer=m_indirectBuffer}); else - { - cmdbuf->traceRays( - m_shaderBindingTable.raygenGroupRange, - m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, - m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, - m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, - WIN_W, WIN_H, 1); - } + cmdbuf->traceRays(m_shaderBindingTable,WIN_W,WIN_H,1); } // pipeline barrier @@ -916,22 +892,22 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui bool createIndirectBuffer() { - const auto getBufferRangeAddress = [](const SBufferRange& range) + const auto getBufferRangeAddress = [](const SBufferRange& range) { return range.buffer->getDeviceAddress() + range.offset; }; const auto command = TraceRaysIndirectCommand_t{ - .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygenGroupRange), - .raygenShaderRecordSize = m_shaderBindingTable.raygenGroupRange.size, - .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.missGroupsRange), - .missShaderBindingTableSize = m_shaderBindingTable.missGroupsRange.size, - .missShaderBindingTableStride = m_shaderBindingTable.missGroupsStride, - .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hitGroupsRange), - .hitShaderBindingTableSize = m_shaderBindingTable.hitGroupsRange.size, - .hitShaderBindingTableStride = m_shaderBindingTable.hitGroupsStride, - .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callableGroupsRange), - .callableShaderBindingTableSize = m_shaderBindingTable.callableGroupsRange.size, - .callableShaderBindingTableStride = m_shaderBindingTable.callableGroupsStride, + .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygen), + .raygenShaderRecordSize = m_shaderBindingTable.raygen.size, + .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.miss.range), + .missShaderBindingTableSize = m_shaderBindingTable.miss.range.size, + .missShaderBindingTableStride = m_shaderBindingTable.miss.stride, + .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hit.range), + .hitShaderBindingTableSize = m_shaderBindingTable.hit.range.size, + .hitShaderBindingTableStride = m_shaderBindingTable.hit.stride, + .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callable.range), + .callableShaderBindingTableSize = m_shaderBindingTable.callable.range.size, + .callableShaderBindingTableStride = m_shaderBindingTable.callable.stride, .width = WIN_W, .height = WIN_H, .depth = 1, @@ -972,15 +948,15 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; const auto handleSizeAligned = nbl::core::alignUp(handleSize, limits.shaderGroupHandleAlignment); - auto& raygenRange = m_shaderBindingTable.raygenGroupRange; + auto& raygenRange = m_shaderBindingTable.raygen; - auto& hitRange = m_shaderBindingTable.hitGroupsRange; + auto& hitRange = m_shaderBindingTable.hit.range; const auto hitHandles = pipeline->getHitHandles(); - auto& missRange = m_shaderBindingTable.missGroupsRange; + auto& missRange = m_shaderBindingTable.miss.range; const auto missHandles = pipeline->getMissHandles(); - auto& callableRange = m_shaderBindingTable.callableGroupsRange; + auto& callableRange = m_shaderBindingTable.callable.range; const auto callableHandles = pipeline->getCallableHandles(); raygenRange = { @@ -992,19 +968,19 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui .offset = raygenRange.size, .size = core::alignUp(missHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.missGroupsStride = handleSizeAligned; + m_shaderBindingTable.miss.stride = handleSizeAligned; hitRange = { .offset = missRange.offset + missRange.size, .size = core::alignUp(hitHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.hitGroupsStride = handleSizeAligned; + m_shaderBindingTable.hit.stride = handleSizeAligned; callableRange = { .offset = hitRange.offset + hitRange.size, .size = core::alignUp(callableHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), }; - m_shaderBindingTable.callableGroupsStride = handleSizeAligned; + m_shaderBindingTable.callable.stride = handleSizeAligned; const auto bufferSize = raygenRange.size + missRange.size + hitRange.size + callableRange.size; @@ -1021,7 +997,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : missHandles) { memcpy(pMissData, &handle, handleSize); - pMissData += m_shaderBindingTable.missGroupsStride; + pMissData += m_shaderBindingTable.miss.stride; } // copy hit region @@ -1029,7 +1005,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : hitHandles) { memcpy(pHitData, &handle, handleSize); - pHitData += m_shaderBindingTable.hitGroupsStride; + pHitData += m_shaderBindingTable.miss.stride; } // copy callable region @@ -1037,17 +1013,21 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (const auto& handle : callableHandles) { memcpy(pCallableData, &handle, handleSize); - pCallableData += m_shaderBindingTable.callableGroupsStride; + pCallableData += m_shaderBindingTable.callable.stride; } { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; - params.size = bufferSize; - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(raygenRange.buffer); - missRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - hitRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); - callableRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + smart_refctd_ptr buffer; + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; + params.size = bufferSize; + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(buffer); + } + raygenRange.buffer = smart_refctd_ptr(buffer); + missRange.buffer = smart_refctd_ptr(raygenRange.buffer); + hitRange.buffer = smart_refctd_ptr(raygenRange.buffer); + callableRange.buffer = smart_refctd_ptr(raygenRange.buffer); } return true; @@ -1071,13 +1051,14 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + hlsl::math::linalg::setTranslation(transform, float32_t3(x, y, z)); + return transform; }; - core::matrix3x4SIMD planeTransform; - planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); + const auto planeRotation = hlsl::math::quaternion::create(hlsl::float32_t3(1.f, 0.f, 0.f), core::radians(-90.0f)); + hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::_static_cast(planeRotation)); // triangles geometries auto geometryCreator = make_smart_refctd_ptr(); @@ -1228,7 +1209,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui inst.base.instanceCustomIndex = i; inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; inst.base.mask = 0xFF; - inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; + inst.transform = isProceduralInstance ? hlsl::float32_t3x4() : cpuObjects[i].transform; instance->instance = inst; } @@ -1387,7 +1368,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto future = reservation.convert(params); if (future.copy() != IQueue::RESULT::SUCCESS) { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + m_logger->log("Failed to await submission future!", ILogger::ELL_ERROR); return false; } // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! @@ -1467,7 +1448,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui float camXAngle = 32.f / 180.f * 3.14159f; } m_cameraSetting; - Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), hlsl::float32_t4x4()); Light m_light = { .direction = {-1.0f, -1.0f, -0.4f}, @@ -1510,7 +1491,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui smart_refctd_ptr m_rayTracingDs; smart_refctd_ptr m_rayTracingPipeline; uint64_t m_rayTracingStackSize; - ShaderBindingTable m_shaderBindingTable; + IGPURayTracingPipeline::SShaderBindingTable m_shaderBindingTable; smart_refctd_ptr m_presentDs; smart_refctd_ptr m_presentDsPool; @@ -1519,8 +1500,8 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui smart_refctd_ptr m_converter; - core::matrix4SIMD m_cachedModelViewProjectionMatrix; + hlsl::float32_t4x4 m_cachedModelViewProjectionMatrix; bool m_useIndirectCommand = false; }; -NBL_MAIN_FUNC(RaytracingPipelineApp) \ No newline at end of file +NBL_MAIN_FUNC(RaytracingPipelineApp) diff --git a/73_GeometryInspector/CMakeLists.txt b/73_GeometryInspector/CMakeLists.txt new file mode 100644 index 000000000..8eed20a70 --- /dev/null +++ b/73_GeometryInspector/CMakeLists.txt @@ -0,0 +1,23 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI AND NBL_BUILD_DEBUG_DRAW) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + "${NBL_EXT_MITSUBA_LOADER_LIB}" + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::DebugDraw) +endif() + diff --git a/73_GeometryInspector/include/common.hpp b/73_GeometryInspector/include/common.hpp new file mode 100644 index 000000000..cc06db2c1 --- /dev/null +++ b/73_GeometryInspector/include/common.hpp @@ -0,0 +1,22 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; + +#include "transform.hpp" +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/73_GeometryInspector/include/transform.hpp b/73_GeometryInspector/include/transform.hpp new file mode 100644 index 000000000..6ac299c4b --- /dev/null +++ b/73_GeometryInspector/include/transform.hpp @@ -0,0 +1,162 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + + +struct TransformRequestParams +{ + float camDistance = 8.f; + uint8_t sceneTexDescIx = ~0; + bool useWindow = false, editTransformDecomposition = false, enableViewManipulate = false; +}; + +nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ +// TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + nbl::hlsl::uint16_t2 retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + if(params.enableViewManipulate) + ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/73_GeometryInspector/main.cpp b/73_GeometryInspector/main.cpp new file mode 100644 index 000000000..570ce52d2 --- /dev/null +++ b/73_GeometryInspector/main.cpp @@ -0,0 +1,741 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "common.hpp" + +#include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" +#include + +#ifdef NBL_BUILD_MITSUBA_LOADER +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#endif + +#include "nbl/ext/DebugDraw/CDrawAABB.h" +#include "nbl/ext/ImGui/ImGui.h" + +class GeometryInspectorApp final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + + enum DrawBoundingBoxMode + { + DBBM_NONE, + DBBM_AABB, + DBBM_OBB, + DBBM_COUNT + }; + + public: + inline GeometryInspectorApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + #ifdef NBL_BUILD_MITSUBA_LOADER + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); + #endif + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i=0u; icreateCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1})) + return logFail("Couldn't create Command Buffer!"); + } + + + auto scRes = static_cast(m_surface->getSwapchainResources()); + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),scRes->getRenderpass(),0,{}); + if (!m_renderer) + return logFail("Failed to create renderer!"); + + auto* renderpass = scRes->getRenderpass(); + + { + ext::debug_draw::DrawAABB::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.transfer = getTransferUpQueue(); + params.drawMode = ext::debug_draw::DrawAABB::ADM_DRAW_BATCH; + params.batchPipelineLayout = ext::debug_draw::DrawAABB::createDefaultPipelineLayout(m_device.get()); + params.renderpass = smart_refctd_ptr(renderpass); + params.utilities = m_utils; + m_bbRenderer = ext::debug_draw::DrawAABB::create(std::move(params)); + } + + // gui descriptor setup + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ETC_REPEAT; + params.TextureWrapV = ETC_REPEAT; + params.TextureWrapW = ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getGraphicsQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static hlsl::float32_t4x4 projection; + + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix( + core::radians(m_cameraSetting.fov), + io.DisplaySize.x / io.DisplaySize.y, + m_cameraSetting.zNear, + m_cameraSetting.zFar); + + return projection; + }()); + + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); + + + ImGui::Text("Inspector"); + ImGui::ListBox("Selected polygon", &m_selectedMesh, + [](void* userData, int index) -> const char* { + auto* meshInstances = reinterpret_cast(userData); + return meshInstances[index].name.data(); + }, + m_meshInstances.data(), + m_meshInstances.size()); + + ImGui::Checkbox("Draw AABB", &m_shouldDrawAABB); + ImGui::Checkbox("Draw OBB", &m_shouldDrawOBB); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + static struct + { + hlsl::float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + auto& selectedInstance = m_renderer->getInstance(m_selectedMesh); + + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4, 3, 4>(m_camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(m_camera.getProjectionMatrix()); + imguizmoM16InOut.projection[1][1] *= -1.f; // Flip y coordinates. https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + imguizmoM16InOut.model = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4, 3, 4>(selectedInstance.world)); + { + m_transformParams.enableViewManipulate = true; + EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], m_transformParams); + } + selectedInstance.world = hlsl::float32_t3x4(hlsl::transpose(imguizmoM16InOut.model)); + + ImGui::End(); + }); + // + if (!reloadModel()) + return false; + + m_camera.mapKeysToArrows(); + + onAppInitializedFinish(); + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); + m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let m_camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + } + }, m_logger.get()); + + bool reload = false; + m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let m_camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + if (e.keyCode == E_KEY_CODE::EKC_R && e.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + if (reload) reloadModel(); + + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + update(nextPresentationTimestamp); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + { + // begin renderpass + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height} + }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = framebuffer, + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + const SViewport viewport = { + .x = static_cast(currentRenderArea.offset.x), + .y = static_cast(currentRenderArea.offset.y), + .width = static_cast(currentRenderArea.extent.width), + .height = static_cast(currentRenderArea.extent.height) + }; + cb->setViewport(0u,1u,&viewport); + + cb->setScissor(0u,1u,¤tRenderArea); + } + + // draw scene + float32_t3x4 viewMatrix = m_camera.getViewMatrix(); + float32_t4x4 viewProjMatrix = m_camera.getConcatenatedMatrix(); + + m_renderer->render(cb,CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix)); + + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + const auto& renderInstance = m_renderer->getInstance(m_selectedMesh); + const auto& meshInstance = m_meshInstances[m_selectedMesh]; + core::vector debugDrawInstances; + debugDrawInstances.reserve(2); + const auto world4x4 = float32_t4x4{ + renderInstance.world[0], + renderInstance.world[1], + renderInstance.world[2], + float32_t4(0, 0, 0, 1) + }; + if (m_shouldDrawAABB) + { + const auto aabbTransform = ext::debug_draw::DrawAABB::getTransformFromAABB(meshInstance.aabb); + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, aabbTransform), .color = float32_t4(1, 1, 1, 1)}); + } + if (m_shouldDrawOBB) + { + debugDrawInstances.push_back(ext::debug_draw::InstanceData{ .transform = math::linalg::promoted_mul(world4x4, meshInstance.obb.transform), .color = float32_t4(0, 0, 1, 1)}); + } + m_bbRenderer->render({ cb, viewProjMatrix }, drawFinished, debugDrawInstances); + + cb->beginDebugMarker("Render ImGui"); + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cb->bindGraphicsPipeline(uiPipeline); + cb->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + if (!m_ui.manager->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR); + return {}; + } + cb->endDebugMarker(); + + cb->endRenderPass(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] Geometry Inspector"; + { + caption += ", displaying ["; + caption += m_modelPath; + caption += "]"; + m_window->setCaption(caption); + } + + updateGUIDescriptorSet(); + return retval; + } + + protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + + private: + // TODO: standardise this across examples, and take from `argv` + bool m_nonInteractiveTest = false; + + bool reloadModel() + { + if (m_nonInteractiveTest) // TODO: maybe also take from argv and argc + m_modelPath = (sharedInputCWD/"ply/Spanner-ply.ply").string(); + else + { + pfd::open_file file("Choose a supported Model File", sharedInputCWD.string(), + { + "All Supported Formats", "*.ply *.stl *.serialized *.obj", + "TODO (.ply)", "*.ply", + "TODO (.stl)", "*.stl", + "Mitsuba 0.6 Serialized (.serialized)", "*.serialized", + "Wavefront Object (.obj)", "*.obj" + }, + false + ); + if (file.result().empty()) + return false; + m_modelPath = file.result()[0]; + } + + // free up + m_renderer->m_instances.clear(); + m_renderer->clearGeometries({.semaphore=m_semaphore.get(),.value=m_realFrameIx}); + m_assetMgr->clearAllAssetCache(); + + //! load the geometry + IAssetLoader::SAssetLoadParams params = {}; + params.logger = m_logger.get(); + auto bundle = m_assetMgr->getAsset(m_modelPath,params); + if (bundle.getContents().empty()) + return false; + + // + core::vector> geometries; + switch (bundle.getAssetType()) + { + case IAsset::E_TYPE::ET_GEOMETRY: + for (const auto& item : bundle.getContents()) + if (auto polyGeo=IAsset::castDown(item); polyGeo) + geometries.push_back(polyGeo); + break; + default: + m_logger->log("Asset loaded but not a supported type (ET_GEOMETRY,ET_GEOMETRY_COLLECTION)",ILogger::ELL_ERROR); + break; + } + if (geometries.empty()) + return false; + + using aabb_t = hlsl::shapes::AABB<3,float32_t>; + auto printAABB = [&](const aabb_t& aabb, const char* extraMsg="")->void + { + m_logger->log("%s AABB is (%f,%f,%f) -> (%f,%f,%f)",ILogger::ELL_INFO,extraMsg,aabb.minVx.x,aabb.minVx.y,aabb.minVx.z,aabb.maxVx.x,aabb.maxVx.y,aabb.maxVx.z); + }; + auto bound = aabb_t::create(); + // convert the geometries + { + smart_refctd_ptr converter = CAssetConverter::create({.device=m_device.get()}); + + const auto transferFamily = getTransferUpQueue()->getFamilyIndex(); + + struct SInputs : CAssetConverter::SInputs + { + virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const + { + return sharedBufferOwnership; + } + + core::vector sharedBufferOwnership; + } inputs = {}; + core::vector> patches(geometries.size(),CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + { + inputs.logger = m_logger.get(); + std::get>(inputs.assets) = {&geometries.front().get(),geometries.size()}; + std::get>(inputs.patches) = patches; + // set up shared ownership so we don't have to + core::unordered_set families; + families.insert(transferFamily); + families.insert(getGraphicsQueue()->getFamilyIndex()); + if (families.size()>1) + for (const auto fam : families) + inputs.sharedBufferOwnership.push_back(fam); + } + + // reserve + auto reservation = converter->reserve(inputs); + if (!reservation) + { + m_logger->log("Failed to reserve GPU objects for CPU->GPU conversion!",ILogger::ELL_ERROR); + return false; + } + + // convert + { + auto semaphore = m_device->createSemaphore(0u); + + constexpr auto MultiBuffering = 2; + std::array,MultiBuffering> commandBuffers = {}; + { + auto pool = m_device->createCommandPool(transferFamily,IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,commandBuffers,smart_refctd_ptr(m_logger)); + } + commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + std::array commandBufferSubmits; + for (auto i=0; ilog("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + } + + auto tmp = hlsl::float32_t4x3( + hlsl::float32_t3(1,0,0), + hlsl::float32_t3(0,1,0), + hlsl::float32_t3(0,0,1), + hlsl::float32_t3(0,0,0) + ); + const auto& converted = reservation.getGPUObjects(); + core::vector meshWorlds; + for (uint32_t i = 0; i < converted.size(); i++) + { + const auto& geom = converted[i]; + const auto aabb = geom.value->getAABB(); + printAABB(aabb,"Geometry"); + tmp[3].x += aabb.getExtent().x; + meshWorlds.emplace_back(hlsl::transpose(tmp)); + const auto transformed = hlsl::shapes::util::transform(meshWorlds.back(), aabb); + bound = hlsl::shapes::util::union_(transformed,bound); + + const auto& cpuGeom = geometries[i].get(); + const auto obb = CPolygonGeometryManipulator::calculateOBB( + cpuGeom->getPositionView().getElementCount(), + [geo = cpuGeom](size_t vertex_i) { + hlsl::float32_t3 pt; + geo->getPositionView().decodeElement(vertex_i, pt); + return pt; + }); + + m_meshInstances.push_back({ .name = std::format("Mesh {}", i), .aabb = aabb, .obb = obb }); + } + + printAABB(bound,"Total"); + if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) + return false; + + for (auto geom_i = 0u; geom_i < m_renderer->getGeometries().size(); geom_i++) + m_renderer->m_instances.push_back({ + .world = meshWorlds[geom_i], + .packedGeo = &m_renderer->getGeometry(geom_i) + }); + } + + // get scene bounds and reset m_camera + { + const float32_t distance = 0.05; + const auto diagonal = bound.getExtent(); + { + const auto measure = hlsl::length(diagonal); + const auto aspectRatio = float(m_window->getWidth())/float(m_window->getHeight()); + m_camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(1.2f,aspectRatio,distance*measure*0.1f,measure*4.0f)); + m_camera.setMoveSpeed(measure*0.04); + } + const auto pos = bound.maxVx+diagonal*distance; + m_camera.setPosition(vectorSIMDf(pos.x,pos.y,pos.z)); + const auto center = (bound.minVx+bound.maxVx)*0.5f; + m_camera.setTarget(vectorSIMDf(center.x,center.y,center.z)); + } + + // TODO: write out the geometry + + return true; + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline uint8_t MaxUITextureCount = 1u; + // + smart_refctd_ptr m_renderer; + + struct MeshInstance + { + std::string name; + hlsl::shapes::AABB<3, float32_t> aabb; + hlsl::shapes::OBB<3, float32_t> obb; + }; + core::vector m_meshInstances; + int m_selectedMesh = 0; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array,MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + // + struct CameraSetting + { + float fov = 60.f; + float zNear = 0.1f; + float zFar = 10000.f; + float moveSpeed = 1.f; + float rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + + } m_cameraSetting; + Camera m_camera = Camera(core::vectorSIMDf(0,0,0), core::vectorSIMDf(0,0,0), hlsl::float32_t4x4()); + // mutables + std::string m_modelPath; + + smart_refctd_ptr m_bbRenderer; + bool m_shouldDrawAABB; + bool m_shouldDrawOBB; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + core::smart_refctd_ptr m_guiDescriptorSetPool; + + TransformRequestParams m_transformParams; + }; + +NBL_MAIN_FUNC(GeometryInspectorApp) diff --git a/73_SolidAngleVisualizer/CMakeLists.txt b/73_SolidAngleVisualizer/CMakeLists.txt new file mode 100644 index 000000000..6438c8e06 --- /dev/null +++ b/73_SolidAngleVisualizer/CMakeLists.txt @@ -0,0 +1,94 @@ +if(NBL_BUILD_IMGUI) + set(NBL_EXTRA_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" + ) + + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet + # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + set(DEPENDS + app_resources/hlsl/common.hlsl + app_resources/hlsl/gpu_common.hlsl + app_resources/hlsl/Drawing.hlsl + app_resources/hlsl/Sampling.hlsl + app_resources/hlsl/silhouette.hlsl + app_resources/hlsl/utils.hlsl + app_resources/hlsl/parallelogram_sampling.hlsl + + # app_resources/hlsl/test.comp.hlsl + app_resources/hlsl/benchmark/benchmark.comp.hlsl + app_resources/hlsl/benchmark/common.hlsl + ) + target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) + set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + + set(SM 6_8) + set(JSON [=[ + [ + + { + "INPUT": "app_resources/hlsl/benchmark/benchmark.comp.hlsl", + "KEY": "benchmark", + }, + ] + ]=]) + string(CONFIGURE "${JSON}" JSON) + + set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} + ) + + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + ) + + NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + ) +endif() \ No newline at end of file diff --git a/73_SolidAngleVisualizer/README.md b/73_SolidAngleVisualizer/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl new file mode 100644 index 000000000..4338bd958 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -0,0 +1,594 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ + +#include "common.hlsl" +#include "gpu_common.hlsl" + +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float32_t3 faceCenter, float32_t3 faceNormal) +{ + float32_t3 viewVec = normalize(-faceCenter); // Vector from camera to face + return dot(faceNormal, viewVec) > 0.0f; +} + +// doesn't change Z coordinate +float32_t3 sphereToCircle(float32_t3 spherePoint) +{ + if (spherePoint.z >= 0.0f) + { + return float32_t3(spherePoint.xy * CIRCLE_RADIUS, spherePoint.z); + } + else + { + float32_t r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float32_t uv2Plus1 = r2 + 1.0f; + return float32_t3((spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS, spherePoint.z); + } +} + +#if VISUALIZE_SAMPLES + +float32_t drawGreatCircleArc(float32_t3 fragPos, float32_t3 points[2], float32_t aaWidth, float32_t width = 0.01f) +{ + float32_t3 v0 = normalize(points[0]); + float32_t3 v1 = normalize(points[1]); + float32_t3 ndc = normalize(fragPos); + + float32_t3 arcNormal = normalize(cross(v0, v1)); + float32_t dist = abs(dot(ndc, arcNormal)); + + float32_t dotMid = dot(v0, v1); + bool onArc = (dot(ndc, v0) >= dotMid) && (dot(ndc, v1) >= dotMid); + + if (!onArc) + return 0.0f; + + float32_t avgDepth = (length(points[0]) + length(points[1])) * 0.5f; + float32_t depthScale = 3.0f / avgDepth; + + width = min(width * depthScale, 0.02f); + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + return alpha; +} + +float32_t drawCross2D(float32_t2 fragPos, float32_t2 center, float32_t size, float32_t thickness) +{ + float32_t2 ndc = abs(fragPos - center); + + // Check if point is inside the cross (horizontal or vertical bar) + bool inHorizontal = (ndc.x <= size && ndc.y <= thickness); + bool inVertical = (ndc.y <= size && ndc.x <= thickness); + + return (inHorizontal || inVertical) ? 1.0f : 0.0f; +} + +float32_t4 drawHiddenEdges(float32_t3x4 modelMatrix, float32_t3 spherePos, uint32_t silEdgeMask, float32_t aaWidth) +{ + float32_t4 color = 0; + float32_t3 hiddenEdgeColor = float32_t3(0.1, 0.1, 0.1); + + NBL_UNROLL + for (uint32_t i = 0; i < 12; i++) + { + // skip silhouette edges + if (silEdgeMask & (1u << i)) + continue; + + uint32_t2 edge = allEdges[i]; + + float32_t3 v0 = normalize(getVertex(modelMatrix, edge.x)); + float32_t3 v1 = normalize(getVertex(modelMatrix, edge.y)); + + bool neg0 = v0.z < 0.0f; + bool neg1 = v1.z < 0.0f; + + // fully hidden + if (neg0 && neg1) + continue; + + float32_t3 p0 = v0; + float32_t3 p1 = v1; + + // clip if needed + if (neg0 ^ neg1) + { + float32_t t = v0.z / (v0.z - v1.z); + float32_t3 clip = normalize(lerp(v0, v1, t)); + + p0 = neg0 ? clip : v0; + p1 = neg1 ? clip : v1; + } + + float32_t3 pts[2] = {p0, p1}; + float32_t c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.003f); + color += float32_t4(hiddenEdgeColor * c, c); + } + + return color; +} + +float32_t4 drawCorner(float32_t3 cornerNDCPos, float32_t2 ndc, float32_t aaWidth, float32_t dotSize, float32_t innerDotSize, float32_t3 dotColor) +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + float32_t dist = length(ndc - cornerNDCPos.xy); + + // outer dot + float32_t outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, + dotSize + aaWidth, + dist); + + if (outerAlpha <= 0.0f) + return color; + + color += float32_t4(dotColor * outerAlpha, outerAlpha); + + // ------------------------------------------------- + // inner black dot for hidden corners + // ------------------------------------------------- + if (cornerNDCPos.z < 0.0f && innerDotSize > 0.0) + { + float32_t innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, + innerDotSize + aaWidth, + dist); + + // ensure it stays inside the outer dot + innerAlpha *= outerAlpha; + + color -= float32_t4(innerAlpha.xxx, 0.0f); + } + + return color; +} + +// Draw a line segment in NDC space +float32_t lineSegment(float32_t2 ndc, float32_t2 a, float32_t2 b, float32_t thickness) +{ + float32_t2 pa = ndc - a; + float32_t2 ba = b - a; + float32_t h = saturate(dot(pa, ba) / dot(ba, ba)); + float32_t dist = length(pa - ba * h); + return smoothstep(thickness, thickness * 0.5, dist); +} + +// Draw an arrow head (triangle) in NDC space +float32_t arrowHead(float32_t2 ndc, float32_t2 tip, float32_t2 direction, float32_t size) +{ + // Create perpendicular vector + float32_t2 perp = float32_t2(-direction.y, direction.x); + + // Three points of the arrow head triangle + float32_t2 p1 = tip; + float32_t2 p2 = tip - direction * size + perp * size * 0.5; + float32_t2 p3 = tip - direction * size - perp * size * 0.5; + + // Check if point is inside triangle using barycentric coordinates + float32_t2 v0 = p3 - p1; + float32_t2 v1 = p2 - p1; + float32_t2 v2 = ndc - p1; + + float32_t dot00 = dot(v0, v0); + float32_t dot01 = dot(v0, v1); + float32_t dot02 = dot(v0, v2); + float32_t dot11 = dot(v1, v1); + float32_t dot12 = dot(v1, v2); + + float32_t invDenom = 1.0 / (dot00 * dot11 - dot01 * dot01); + float32_t u = (dot11 * dot02 - dot01 * dot12) * invDenom; + float32_t v = (dot00 * dot12 - dot01 * dot02) * invDenom; + + bool inside = (u >= 0.0) && (v >= 0.0) && (u + v <= 1.0); + + // Add some antialiasing + float32_t minDist = min(min( + length(ndc - p1), + length(ndc - p2)), + length(ndc - p3)); + + return inside ? 1.0 : smoothstep(0.02, 0.0, minDist); +} + +// Helper to draw an edge with proper color mapping +float32_t4 drawEdge(uint32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float32_t aaWidth, float32_t width = 0.003f) +{ + float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); +}; + +float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWidth, float32_t dotSize) +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + + float32_t innerDotSize = dotSize * 0.5f; + + for (uint32_t i = 0; i < 8; i++) + { + float32_t3 cornerCirclePos = sphereToCircle(normalize(getVertex(modelMatrix, i))); + color += drawCorner(cornerCirclePos, ndc, aaWidth, dotSize, 0.0, colorLUT[i]); + } + + return color; +} + +#ifdef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ +float32_t4 drawClippedSilhouetteVertices(float32_t2 ndc, ClippedSilhouette silhouette, float32_t aaWidth) +{ + float32_t4 color = 0; + float32_t dotSize = 0.03f; + + for (uint i = 0; i < silhouette.count; i++) + { + float32_t3 cornerCirclePos = sphereToCircle(normalize(silhouette.vertices[i])); + float32_t dist = length(ndc - cornerCirclePos.xy); + + // Smooth circle for the vertex + float32_t alpha = 1.0f - smoothstep(dotSize * 0.8f, dotSize, dist); + + if (alpha > 0.0f) + { + // Color gradient: Red (index 0) to Cyan (last index) + // This helps verify the CCW winding order visually + float32_t t = float32_t(i) / float32_t(max(1u, silhouette.count - 1)); + float32_t3 vertexColor = lerp(float32_t3(1, 0, 0), float32_t3(0, 1, 1), t); + + color += float32_t4(vertexColor * alpha, alpha); + } + } + return color; +} +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ + +float32_t4 drawRing(float32_t2 ndc, float32_t aaWidth) +{ + float32_t positionLength = length(ndc); + float32_t ringWidth = 0.003f; + float32_t ringDistance = abs(positionLength - CIRCLE_RADIUS); + float32_t ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + return ringAlpha * float32_t4(1, 1, 1, 1); +} + +// Returns the number of visible faces and populates the faceIndices array +uint getVisibleFaces(int3 region, out uint faceIndices[3]) +{ + uint count = 0; + + // Check X axis + if (region.x == 0) + faceIndices[count++] = 3; // X+ + else if (region.x == 2) + faceIndices[count++] = 2; // X- + + // Check Y axis + if (region.y == 0) + faceIndices[count++] = 5; // Y+ + else if (region.y == 2) + faceIndices[count++] = 4; // Y- + + // Check Z axis + if (region.z == 0) + faceIndices[count++] = 1; // Z+ + else if (region.z == 2) + faceIndices[count++] = 0; // Z- + + return count; +} + +float32_t4 drawVisibleFaceOverlay(float32_t3x4 modelMatrix, float32_t3 spherePos, int3 region, float32_t aaWidth) +{ + uint faceIndices[3]; + uint count = getVisibleFaces(region, faceIndices); + + float32_t4 color = 0; + + for (uint i = 0; i < count; i++) + { + uint fIdx = faceIndices[i]; + float32_t3 n = localNormals[fIdx]; + + // Transform normal to world space (using the same logic as your corners) + float32_t3 worldNormal = -normalize(mul((float3x3)modelMatrix, n)); + worldNormal.z = -worldNormal.z; // Invert Z for correct orientation + + // Very basic visualization: highlight if the sphere position + // is generally pointing towards that face's normal + float32_t alignment = dot(spherePos, worldNormal); + if (alignment > 0.95f) + { + // Use different colors for different face indices + color += float32_t4(colorLUT[fIdx % 24], 0.5f); + } + } + return color; +} + +float32_t4 drawFaces(float32_t3x4 modelMatrix, float32_t3 spherePos, float32_t aaWidth) +{ + float32_t4 color = 0.0f; + float32_t3 ndc = normalize(spherePos); + + float3x3 rotMatrix = (float3x3)modelMatrix; + + // Check each of the 6 faces + for (uint32_t faceIdx = 0; faceIdx < 6; faceIdx++) + { + float32_t3 n_world = mul(rotMatrix, localNormals[faceIdx]); + + // Check if face is visible + if (!isFaceVisible(faceCenters[faceIdx], n_world)) + continue; + + // Get the 4 corners of this face + float32_t3 faceVerts[4]; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t cornerIdx = faceToCorners[faceIdx][i]; + faceVerts[i] = normalize(getVertex(modelMatrix, cornerIdx)); + } + + // Compute face center for winding + float32_t3 faceCenter = float32_t3(0, 0, 0); + for (uint32_t i = 0; i < 4; i++) + faceCenter += faceVerts[i]; + faceCenter = normalize(faceCenter); + + // Check if point is inside this face + bool isInside = true; + float32_t minDist = 1e10; + + for (uint32_t i = 0; i < 4; i++) + { + float32_t3 v0 = faceVerts[i]; + float32_t3 v1 = faceVerts[(i + 1) % 4]; + + // Skip edges behind camera + if (v0.z < 0.0f && v1.z < 0.0f) + { + isInside = false; + break; + } + + // Great circle normal + float32_t3 edgeNormal = normalize(cross(v0, v1)); + + // Ensure normal points inward + if (dot(edgeNormal, faceCenter) < 0.0f) + edgeNormal = -edgeNormal; + + float32_t d = dot(ndc, edgeNormal); + + if (d < -1e-6f) + { + isInside = false; + break; + } + + minDist = min(minDist, abs(d)); + } + + if (isInside) + { + float32_t alpha = smoothstep(0.0f, aaWidth * 2.0f, minDist); + + // Use colorLUT based on face index (0-5) + float32_t3 faceColor = colorLUT[faceIdx]; + + float32_t shading = saturate(ndc.z * 0.8f + 0.2f); + color += float32_t4(faceColor * shading * alpha, alpha); + } + } + + return color; +} + +// ============================================================================ +// Spherical geometry drawing helpers (for pyramid visualization) +// ============================================================================ + +// Draw a great circle where dot(p, axis) = 0 +// Used to visualize caliper planes +float32_t4 drawGreatCirclePlane( + float32_t3 axis, + float32_t3 spherePos, + float32_t aaWidth, + float32_t3 color, + float32_t width = 0.005f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // Distance from the great circle plane + float32_t distFromPlane = abs(dot(fragDir, axis)); + + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, distFromPlane); + + return float32_t4(color * alpha, alpha); +} + +// Draw lune boundaries - two small circles at dot(p, axis) = offset ± halfWidth +// halfWidth and offset are in sin-space (not radians) +float32_t4 drawLuneBoundary(float32_t3 axis, float32_t halfWidth, float32_t offset, float32_t3 spherePos, float32_t aaWidth, float32_t3 color, float32_t lineWidth = 0.004f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // The lune boundaries are where dot(p, axis) = offset ± halfWidth + float32_t dotWithAxis = dot(fragDir, axis); + + // Draw both boundaries of the lune (accounting for offset) + float32_t upperBound = offset + halfWidth; + float32_t lowerBound = offset - halfWidth; + float32_t distFromUpperBoundary = abs(dotWithAxis - upperBound); + float32_t distFromLowerBoundary = abs(dotWithAxis - lowerBound); + + float32_t alphaUpper = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromUpperBoundary); + float32_t alphaLower = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromLowerBoundary); + + float32_t alpha = max(alphaUpper, alphaLower); + + return float32_t4(color * alpha, alpha); +} + +// Draw axis direction markers (dots at +/- axis from center) +float32_t4 drawAxisMarkers( + float32_t3 axis, + float32_t3 center, + float32_t2 ndc, + float32_t aaWidth, + float32_t3 color, + float32_t extent = 0.25f) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + // Positive axis endpoint + float32_t3 axisEndPos = normalize(center + axis * extent); + float32_t3 axisEndPosCircle = sphereToCircle(axisEndPos); + result += drawCorner(axisEndPosCircle, ndc, aaWidth, 0.025f, 0.0f, color); + + // Negative axis endpoint (smaller, dimmer) + float32_t3 axisEndNeg = normalize(center - axis * extent); + float32_t3 axisEndNegCircle = sphereToCircle(axisEndNeg); + result += drawCorner(axisEndNegCircle, ndc, aaWidth, 0.015f, 0.0f, color * 0.5f); + + return result; +} + +// ============================================================================ +// Visualization +// ============================================================================ + +// Draw half of a great circle (the visible half of a lune boundary) +float32_t4 drawGreatCircleHalf(float32_t3 normal, float32_t3 spherePos, float32_t3 axis3, float32_t aaWidth, float32_t3 color, float32_t thickness) +{ + // Point is on great circle if dot(point, normal) ≈ 0 + // Only draw the half where dot(point, axis3) > 0 (toward silhouette) + float32_t dist = abs(dot(spherePos, normal)); + float32_t sideFade = smoothstep(-0.1f, 0.1f, dot(spherePos, axis3)); + float32_t alpha = (1.0f - smoothstep(thickness - aaWidth, thickness + aaWidth, dist)) * sideFade; + return float32_t4(color * alpha, alpha); +} + +// Visualize the best caliper edge (the edge that determined axis1) +float32_t4 visualizeBestCaliperEdge(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t bestEdgeIdx, uint32_t count, float32_t3 spherePos, float32_t aaWidth) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + if (bestEdgeIdx >= count) + return result; + + uint32_t nextIdx = (bestEdgeIdx + 1 < count) ? bestEdgeIdx + 1 : 0; + float32_t3 v0 = vertices[bestEdgeIdx]; + float32_t3 v1 = vertices[nextIdx]; + + // Draw the best caliper edge with a thicker, gold line + float32_t3 pts[2] = {v0, v1}; + float32_t3 highlightColor = float32_t3(1.0f, 0.8f, 0.0f); + float32_t alpha = drawGreatCircleArc(spherePos, pts, aaWidth, 0.008f); + result += float32_t4(highlightColor * alpha, alpha); + + return result; +} + +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + +uint32_t getEdgeVisibility(float32_t3x4 modelMatrix, uint32_t edgeIdx) +{ + + // Adjacency of edges to faces + // Corrected Adjacency of edges to faces + static const uint32_t2 edgeToFaces[12] = { + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) + }; + + uint32_t2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)modelMatrix; + float32_t3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float32_t3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) + return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) + return 2; + + // Hidden edge: both faces hidden + return 0; +} + +uint32_t computeGroundTruthEdgeMask(float32_t3x4 modelMatrix) +{ + uint32_t mask = 0u; + NBL_UNROLL + for (uint32_t j = 0; j < 12; j++) + { + // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry + if (getEdgeVisibility(modelMatrix, j) == 1) + { + mask |= (1u << j); + } + } + return mask; +} + +void validateEdgeVisibility(float32_t3x4 modelMatrix, uint32_t sil, uint32_t vertexCount, uint32_t generatedSilMask) +{ + uint32_t mismatchAccumulator = 0; + + // The Ground Truth now represents the full 3D silhouette, clipped or not. + uint32_t groundTruthMask = computeGroundTruthEdgeMask(modelMatrix); + + // The comparison checks if the generated mask perfectly matches the full 3D ground truth. + uint32_t mismatchMask = groundTruthMask ^ generatedSilMask; + + if (mismatchMask != 0) + { + NBL_UNROLL + for (uint32_t j = 0; j < 12; j++) + { + if ((mismatchMask >> j) & 1u) + { + uint32_t2 edge = allEdges[j]; + // Accumulate vertex indices where error occurred + mismatchAccumulator |= (1u << edge.x) | (1u << edge.y); + } + } + } + + // Simple Write (assuming all fragments calculate the same result) + InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); +} +#endif // DEBUG_DATA + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl new file mode 100644 index 000000000..3b49d17ca --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl @@ -0,0 +1,128 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "app_resources/hlsl/common.hlsl" +#include "app_resources/hlsl/benchmark/common.hlsl" +#include "app_resources/hlsl/silhouette.hlsl" +#include "app_resources/hlsl/parallelogram_sampling.hlsl" +#include "app_resources/hlsl/pyramid_sampling.hlsl" +#include "app_resources/hlsl/triangle_sampling.hlsl" + +using namespace nbl::hlsl; + +[[vk::binding(0, 0)]] RWByteAddressBuffer outputBuffer; +[[vk::push_constant]] BenchmarkPushConstants pc; + +static const SAMPLING_MODE benchmarkMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + +[numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] + [shader("compute")] void + main(uint32_t3 invocationID : SV_DispatchThreadID) +{ + // Perturb model matrix slightly per sample group + float32_t3x4 perturbedMatrix = pc.modelMatrix; + perturbedMatrix[0][3] += float32_t(invocationID.x) * 1e-6f; + + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(perturbedMatrix, region, configIndex, vertexCount); + + ClippedSilhouette silhouette = (ClippedSilhouette)0; + silhouette.compute(perturbedMatrix, vertexCount, sil); + + float32_t pdf; + uint32_t triIdx; + uint32_t validSampleCount = 0; + float32_t3 sampleDir = float32_t3(0.0, 0.0, 0.0); + + bool sampleValid; + if (benchmarkMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + benchmarkMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + TriangleFanSampler samplingData; + samplingData = TriangleFanSampler::create(silhouette, benchmarkMode); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += samplingData.sample(silhouette, xi, pdf, triIdx); + validSampleCount++; + } + } + else if (benchmarkMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + // Precompute parallelogram for sampling + silhouette.normalize(); + SilEdgeNormals silEdgeNormals; + Parallelogram parallelogram = Parallelogram::create(silhouette, silEdgeNormals); + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += parallelogram.sample(silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + { + // Precompute spherical pyramid and Urena sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + UrenaSampler urena = UrenaSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += urena.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + { + // Precompute spherical pyramid and biquadratic sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BiquadraticSampler biquad = BiquadraticSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += biquad.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + // Precompute spherical pyramid and bilinear sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BilinearSampler bilin = BilinearSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += bilin.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + + const uint32_t offset = sizeof(uint32_t) * invocationID.x; + outputBuffer.Store(offset, pdf + validSampleCount + triIdx + asuint(sampleDir.x) + asuint(sampleDir.y) + asuint(sampleDir.z)); +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl new file mode 100644 index 000000000..3091bc793 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl @@ -0,0 +1,11 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 64u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1000000u; + diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl new file mode 100644 index 000000000..d63ec3c6a --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -0,0 +1,136 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#define FAST 1 + +namespace nbl +{ + namespace hlsl + { + // Sampling mode enum + enum SAMPLING_MODE : uint32_t + { + TRIANGLE_SOLID_ANGLE, + TRIANGLE_PROJECTED_SOLID_ANGLE, + PROJECTED_PARALLELOGRAM_SOLID_ANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR, + Count + }; + + struct ResultData + { + // Silhouette + uint32_t3 region; + uint32_t silhouetteIndex; + uint32_t silhouetteVertexCount; + uint32_t silhouette; + uint32_t positiveVertCount; + uint32_t edgeVisibilityMismatch; + uint32_t clipMask; + uint32_t clipCount; + uint32_t rotatedSil; + uint32_t wrapAround; + uint32_t rotatedClipMask; + uint32_t rotateAmount; + uint32_t vertices[6]; + uint32_t clippedSilhouetteVertexCount; + float32_t3 clippedSilhouetteVertices[7]; + uint32_t clippedSilhouetteVerticesIndices[7]; + + // Parallelogram + uint32_t parallelogramDoesNotBound; + float32_t parallelogramArea; + uint32_t failedVertexIndex; + uint32_t edgeIsConvex[4]; + uint32_t parallelogramVerticesInside; + uint32_t parallelogramEdgesInside; + float32_t2 parallelogramCorners[4]; + + // spherical triangle + uint32_t maxTrianglesExceeded; + uint32_t sphericalLuneDetected; + uint32_t triangleCount; + float32_t solidAngles[5]; + float32_t totalSolidAngles; + + // Sampling ray visualization data + uint32_t sampleCount; + float32_t4 rayData[512]; // xyz = direction, w = PDF + + // Pyramid sampling debug data + float32_t3 pyramidAxis1; // First caliper axis direction + float32_t3 pyramidAxis2; // Second caliper axis direction + float32_t3 pyramidCenter; // Silhouette center direction + float32_t pyramidHalfWidth1; // Half-width along axis1 (sin-space) + float32_t pyramidHalfWidth2; // Half-width along axis2 (sin-space) + float32_t pyramidOffset1; // Center offset along axis1 + float32_t pyramidOffset2; // Center offset along axis2 + float32_t pyramidSolidAngle; // Bounding region solid angle + uint32_t pyramidBestEdge; // Which edge produced best caliper + uint32_t pyramidSpansHemisphere; // Warning: silhouette >= hemisphere + float32_t pyramidMin1; // Min dot product along axis1 + float32_t pyramidMax1; // Max dot product along axis1 + float32_t pyramidMin2; // Min dot product along axis2 + float32_t pyramidMax2; // Max dot product along axis2 + uint32_t axis2BiggerThanAxis1; + + // Sampling stats + uint32_t validSampleCount; + uint32_t threadCount; // Used as a hack for fragment shader, as dividend for validSampleCount + }; + +#ifdef __HLSL_VERSION + [[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; +#endif + + struct PushConstants + { + float32_t3x4 modelMatrix; + float32_t4 viewport; + uint32_t sampleCount; + uint32_t frameIndex; + }; + + struct PushConstantRayVis + { + float32_t4x4 viewProjMatrix; + float32_t3x4 viewMatrix; + float32_t3x4 modelMatrix; + float32_t3x4 invModelMatrix; + float32_t4 viewport; + uint32_t frameIndex; + }; + + struct BenchmarkPushConstants + { + float32_t3x4 modelMatrix; + uint32_t sampleCount; + }; + + static const float32_t3 colorLUT[27] = { + float32_t3(0, 0, 0), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1), float32_t3(1, 1, 1)}; + +#ifndef __HLSL_VERSION + static const char *colorNames[27] = {"Black", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown", "White"}; +#endif // __HLSL_VERSION + } +} +#endif // _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl new file mode 100644 index 000000000..142471493 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl @@ -0,0 +1,175 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ + +#include "utils.hlsl" + +static const float32_t CIRCLE_RADIUS = 0.5f; +static const float32_t INV_CIRCLE_RADIUS = 1.0f / CIRCLE_RADIUS; + +// --- Geometry Utils --- +#define MAX_SILHOUETTE_VERTICES 7 + +// Special index values for clip points +static const uint32_t CLIP_POINT_A = 23; // Clip point between last positive and first negative +static const uint32_t CLIP_POINT_B = 24; // Clip point between last negative and first positive + +static const float32_t3 constCorners[8] = { + float32_t3(-0.5f, -0.5f, -0.5f), float32_t3(0.5f, -0.5f, -0.5f), float32_t3(-0.5f, 0.5f, -0.5f), float32_t3(0.5f, 0.5f, -0.5f), + float32_t3(-0.5f, -0.5f, 0.5f), float32_t3(0.5f, -0.5f, 0.5f), float32_t3(-0.5f, 0.5f, 0.5f), float32_t3(0.5f, 0.5f, 0.5f)}; + +static const uint32_t2 allEdges[12] = { + {0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, // X axis + {0, 2}, + {1, 3}, + {4, 6}, + {5, 7}, // Y axis + {0, 4}, + {1, 5}, + {2, 6}, + {3, 7}, // Z axis +}; + +// Maps face index (0-5) to its 4 corner indices in CCW order +static const uint32_t faceToCorners[6][4] = { + {0, 2, 3, 1}, // Face 0: Z- + {4, 5, 7, 6}, // Face 1: Z+ + {0, 4, 6, 2}, // Face 2: X- + {1, 3, 7, 5}, // Face 3: X+ + {0, 1, 5, 4}, // Face 4: Y- + {2, 6, 7, 3} // Face 5: Y+ +}; + +static float32_t3 corners[8]; +static float32_t3 faceCenters[6] = { + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0), + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0)}; + +static const float32_t3 localNormals[6] = { + float32_t3(0, 0, -1), // Face 0 (Z-) + float32_t3(0, 0, 1), // Face 1 (Z+) + float32_t3(-1, 0, 0), // Face 2 (X-) + float32_t3(1, 0, 0), // Face 3 (X+) + float32_t3(0, -1, 0), // Face 4 (Y-) + float32_t3(0, 1, 0) // Face 5 (Y+) +}; + +// TODO: unused, remove later +// Vertices are ordered CCW relative to the camera view. +static const uint32_t silhouettes[27][7] = { + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5}, // 3: Red + {4, 4, 5, 7, 6, 0, 0}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, 0, 0}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, 0, 0}, // 12: Pink + {4, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, 0, 0}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, 0, 0}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, 0, 0}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown +}; + +// Binary packed silhouettes +static const uint32_t binSilhouettes[27] = { + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, +}; + +uint32_t getSilhouetteVertex(uint32_t packedSil, uint32_t index) +{ + return (packedSil >> (3u * index)) & 0x7u; +} + +// Get silhouette size +uint32_t getSilhouetteSize(uint32_t sil) +{ + return (sil >> 29u) & 0x7u; +} + +// Check if vertex has negative z +bool getVertexZNeg(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + float32_t3 localPos = float32_t3( + (vertexIdx & 1) ? 0.5f : -0.5f, + (vertexIdx & 2) ? 0.5f : -0.5f, + (vertexIdx & 4) ? 0.5f : -0.5f); + + float32_t transformedZ = nbl::hlsl::dot(modelMatrix[2].xyz, localPos) + modelMatrix[2].w; + return transformedZ < 0.0f; +#else + return corners[vertexIdx].z < 0.0f; +#endif +} + +// Get world position of cube vertex +float32_t3 getVertex(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + // Reconstruct local cube corner from index bits + float32_t sx = (vertexIdx & 1) ? 0.5f : -0.5f; + float32_t sy = (vertexIdx & 2) ? 0.5f : -0.5f; + float32_t sz = (vertexIdx & 4) ? 0.5f : -0.5f; + + float32_t4x3 model = transpose(modelMatrix); + + // Transform to world + // Full position, not just Z like getVertexZNeg + return model[0].xyz * sx + + model[1].xyz * sy + + model[2].xyz * sz + + model[3].xyz; + // return mul(modelMatrix, float32_t4(sx, sy, sz, 1.0f)); +#else + return corners[vertexIdx]; +#endif +} + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl new file mode 100644 index 000000000..cd02171af --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl @@ -0,0 +1,418 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ + +#include +#include +#include "silhouette.hlsl" +#include "drawing.hlsl" + +#define MAX_CURVE_APEXES 2 +#define GET_PROJ_VERT(i) silhouette.vertices[i].xy *CIRCLE_RADIUS + +// ============================================================================ +// Minimum bounding rectangle on projected sphere +// ============================================================================ +struct Parallelogram +{ + float16_t2 corner; + float16_t2 axisDir; + float16_t width; + float16_t height; + + // ======================================================================== + // Projection helpers + // ======================================================================== + + static float32_t3 circleToSphere(float32_t2 circlePoint) + { + float32_t2 xy = circlePoint / CIRCLE_RADIUS; + float32_t xy_len_sq = dot(xy, xy); + return float32_t3(xy, sqrt(1.0f - xy_len_sq)); + } + + // ======================================================================== + // Curve evaluation helpers + // ======================================================================== + + static float32_t2 evalCurvePoint(float32_t3 S, float32_t3 E, float32_t t) + { + float32_t3 v = S + t * (E - S); + float32_t invLen = rsqrt(dot(v, v)); + return v.xy * (invLen * CIRCLE_RADIUS); + } + + static float32_t2 evalCurveTangent(float32_t3 S, float32_t3 E, float32_t t) + { + float32_t3 v = S + t * (E - S); + float32_t vLenSq = dot(v, v); + + if (vLenSq < 1e-12f) + return normalize(E.xy - S.xy); + + float32_t3 p = v * rsqrt(vLenSq); + float32_t3 vPrime = E - S; + float32_t2 tangent2D = (vPrime - p * dot(p, vPrime)).xy; + + float32_t len = length(tangent2D); + return (len > 1e-7f) ? tangent2D / len : normalize(E.xy - S.xy); + } + + // Get both endpoint tangents (shares SdotE computation) + static void getProjectedTangents(float32_t3 S, float32_t3 E, out float32_t2 t0, out float32_t2 t1) + { + float32_t SdotE = dot(S, E); + + float32_t2 tangent0_2D = (E - S * SdotE).xy; + float32_t2 tangent1_2D = (E * SdotE - S).xy; + + float32_t len0Sq = dot(tangent0_2D, tangent0_2D); + float32_t len1Sq = dot(tangent1_2D, tangent1_2D); + + const float32_t eps = 1e-14f; + + if (len0Sq > eps && len1Sq > eps) + { + t0 = tangent0_2D * rsqrt(len0Sq); + t1 = tangent1_2D * rsqrt(len1Sq); + return; + } + + // Rare fallback path + float32_t2 diff = E.xy - S.xy; + float32_t diffLenSq = dot(diff, diff); + float32_t2 fallback = diffLenSq > eps ? diff * rsqrt(diffLenSq) : float32_t2(1.0f, 0.0f); + + t0 = len0Sq > eps ? tangent0_2D * rsqrt(len0Sq) : fallback; + t1 = len1Sq > eps ? tangent1_2D * rsqrt(len1Sq) : fallback; + } + + // Compute apex with clamping to prevent apex explosion + static void computeApexClamped(float32_t2 p0, float32_t2 p1, float32_t2 t0, float32_t2 t1, out float32_t2 apex) + { + float32_t denom = t0.x * t1.y - t0.y * t1.x; + float32_t2 center = (p0 + p1) * 0.5f; + + if (abs(denom) < 1e-6f) + { + apex = center; + return; + } + + float32_t2 dp = p1 - p0; + float32_t s = (dp.x * t1.y - dp.y * t1.x) / denom; + apex = p0 + s * t0; + + float32_t2 toApex = apex - center; + float32_t distSq = dot(toApex, toApex); + float32_t maxDistSq = CIRCLE_RADIUS * CIRCLE_RADIUS * 4.0f; + + if (distSq > maxDistSq) + { + apex = center + toApex * (CIRCLE_RADIUS * 2.0f * rsqrt(distSq)); + } + } + + // ======================================================================== + // Bounding box computation (rotating calipers) + // + // testEdgeForAxis and computeBoundsForAxis are + // templated on a bool to select between two precision levels: + // + // Accurate=false (used by tryCaliperDir, O(N^2) total calls): + // Tests vertices + edge midpoints only. Cheap (just dot products) and + // sufficient for *ranking* candidate axes, even though it may + // underestimate the true extent of convex edges. + // + // Accurate=true (used by buildForAxis, called once): + // Also computes tangent-line apex intersections for convex edges to + // find the true extremum. Great circle arcs that project as convex + // curves can bulge beyond their endpoints; the apex (tangent + // evaluation + line intersection + clamping) captures this but is + // ~4x more expensive per edge. + // + // The fast path gives the same relative ranking of axes (the + // approximation error is consistent across candidates), so the + // cheapest axis found by Fast is also the cheapest under Accurate. + // ======================================================================== + + static void testPoint(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, float32_t2 pt, float32_t2 dir, float32_t2 perpDir) + { + float32_t projAlong = dot(pt, dir); + float32_t projPerp = dot(pt, perpDir); + + minAlong = min(minAlong, projAlong); + maxAlong = max(maxAlong, projAlong); + minPerp = min(minPerp, projPerp); + maxPerp = max(maxPerp, projPerp); + } + + // Accurate=false (Fast): tests vertex + midpoint only. Used O(N^2) times for axis ranking. + // Accurate=true: also computes tangent-line apex for convex edges. Used once for final rect. + template + static void testEdgeForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) + { + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + const float32_t2 projectedVertex = GET_PROJ_VERT(I); + + testPoint(minAlong, maxAlong, minPerp, maxPerp, projectedVertex, dir, perpDir); + + bool isN3 = (n3Mask & (1u << I)) != 0; + + if (Accurate) + { + bool isConvex = (convexMask & (1u << I)) != 0; + + if (!isN3 && !isConvex) + return; + + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + float32_t2 midPoint = evalCurvePoint(S, E, 0.5f); + + if (isN3) + { + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } + + if (isConvex) + { + float32_t2 t0, endTangent; + getProjectedTangents(S, E, t0, endTangent); + + if (dot(t0, perpDir) > 0.0f) + { + float32_t2 apex0; + if (isN3) + { + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + computeApexClamped(projectedVertex, midPoint, t0, tangentAtMid, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + + if (dot(tangentAtMid, perpDir) > 0.0f) + { + float32_t2 apex1; + computeApexClamped(midPoint, E.xy * CIRCLE_RADIUS, tangentAtMid, endTangent, apex1); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex1, dir, perpDir); + } + } + else + { + computeApexClamped(projectedVertex, E.xy * CIRCLE_RADIUS, t0, endTangent, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + } + } + } + } + else + { + if (isN3) + { + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[I], silhouette.vertices[nextIdx], 0.5f); + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } + } + } + + // Unrolled bounding box computation for a given axis direction. + // Accurate=false: fast path for axis ranking during candidate selection. + // Accurate=true: tight bounds with apex computation for the final rectangle. + template + static void computeBoundsForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) + { + testEdgeForAxis<0, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<1, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<2, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 3) + { + testEdgeForAxis<3, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 4) + { + testEdgeForAxis<4, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 5) + { + testEdgeForAxis<5, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 6) + { + testEdgeForAxis<6, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + } + } + } + } + } + + static void tryCaliperDir(inout float32_t bestArea, inout float32_t2 bestDir, const float32_t2 dir, const ClippedSilhouette silhouette, uint32_t n3Mask) + { + float32_t2 perpDir = float32_t2(-dir.y, dir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, 0, n3Mask, dir, perpDir); + + float32_t area = (maxAlong - minAlong) * (maxPerp - minPerp); + if (area < bestArea) + { + bestArea = area; + bestDir = dir; + } + } + + template + static void processEdge(inout float32_t bestArea, inout float32_t2 bestDir, inout uint32_t convexMask, inout uint32_t n3Mask, const ClippedSilhouette silhouette, inout SilEdgeNormals precompSil) + { + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + precompSil.edgeNormals[I] = float16_t3(cross(S, E)); + + float32_t2 t0, t1; + getProjectedTangents(S, E, t0, t1); + + tryCaliperDir(bestArea, bestDir, t0, silhouette, n3Mask); + + if (nbl::hlsl::cross2D(S.xy, E.xy) < -1e-6f) + { + convexMask |= (1u << I); + tryCaliperDir(bestArea, bestDir, t1, silhouette, n3Mask); + + if (dot(t0, t1) < 0.5f) + { + n3Mask |= (1u << I); + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + tryCaliperDir(bestArea, bestDir, tangentAtMid, silhouette, n3Mask); + } + } + } + + // ======================================================================== + // Factory methods + // ======================================================================== + + static Parallelogram buildForAxis(const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir) + { + float32_t2 perpDir = float32_t2(-dir.y, dir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + + Parallelogram result; + result.width = float16_t(maxAlong - minAlong); + result.height = float16_t(maxPerp - minPerp); + result.axisDir = float16_t2(dir); + result.corner = float16_t2(minAlong * dir + minPerp * float16_t2(-dir.y, dir.x)); + + return result; + } + + // Silhouette vertices must be normalized before calling create() + static Parallelogram create(const ClippedSilhouette silhouette, out SilEdgeNormals precompSil +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, + inout float32_t4 color +#endif + ) + { + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + uint32_t convexMask = 0; + uint32_t n3Mask = 0; + float32_t bestArea = 1e10f; + float32_t2 bestDir = float32_t2(1.0f, 0.0f); + + processEdge<0>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<1>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<2>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 3) + { + processEdge<3>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 4) + { + processEdge<4>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 5) + { + processEdge<5>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 6) + { + processEdge<6>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + } + } + } + } + + tryCaliperDir(bestArea, bestDir, float32_t2(1.0f, 0.0f), silhouette, n3Mask); + tryCaliperDir(bestArea, bestDir, float32_t2(0.0f, 1.0f), silhouette, n3Mask); + + Parallelogram best = buildForAxis(silhouette, convexMask, n3Mask, bestDir); + +#if VISUALIZE_SAMPLES + for (uint32_t i = 0; i < silhouette.count; i++) + { + if (convexMask & (1u << i)) + { + uint32_t nextIdx = (i + 1) % silhouette.count; + float32_t2 p0 = GET_PROJ_VERT(i); + float32_t2 p1 = GET_PROJ_VERT(nextIdx); + + float32_t2 t0, endTangent; + getProjectedTangents(silhouette.vertices[i], silhouette.vertices[nextIdx], t0, endTangent); + + if (n3Mask & (1u << i)) + { + float32_t2 tangentAtMid = evalCurveTangent(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); + + float32_t2 apex0, apex1; + computeApexClamped(p0, midPoint, t0, tangentAtMid, apex0); + computeApexClamped(midPoint, p1, tangentAtMid, endTangent, apex1); + + color += drawCorner(float32_t3(apex0, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + color += drawCorner(float32_t3(midPoint, 0.0f), ndc, aaWidth, 0.02, 0.0f, float32_t3(0, 1, 0)); + color += drawCorner(float32_t3(apex1, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0.5, 0)); + } + else + { + float32_t2 apex; + computeApexClamped(p0, p1, t0, endTangent, apex); + color += drawCorner(float32_t3(apex, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + } + } + } +#endif +#if DEBUG_DATA + DebugDataBuffer[0].parallelogramArea = best.width * best.height; +#endif + + return best; + } + + float32_t3 sample(NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + float16_t2 perpDir = float16_t2(-axisDir.y, axisDir.x); + + float16_t2 circleXY = corner + + float16_t(xi.x) * width * axisDir + + float16_t(xi.y) * height * perpDir; + + float32_t3 direction = circleToSphere(circleXY); + + valid = direction.z > 0.0f && silhouette.isInside(direction); + // PDF in solid angle measure: the rectangle is in circle-space (scaled by CIRCLE_RADIUS), + // and the orthographic projection Jacobian is dA_circle/dω = CIRCLE_RADIUS^2 * z + pdf = valid ? (CIRCLE_RADIUS * CIRCLE_RADIUS * direction.z / (float32_t(width) * float32_t(height))) : 0.0f; + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl new file mode 100644 index 000000000..fab111b3e --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl @@ -0,0 +1,568 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ + +#include "gpu_common.hlsl" + +#include +#include +#include +#include + +#include "silhouette.hlsl" +#include "drawing.hlsl" + +// ============================================================================ +// Spherical Rectangle Bound via Rotating Calipers +// +// Bounds the silhouette with a spherical rectangle (intersection of two +// orthogonal lunes). Each lune is defined by two great circles (planes +// through the origin). The rectangle is parameterized for downstream +// samplers (Urena, bilinear, biquadratic) in pyramid_sampling/*.hlsl. +// +// Algorithm: +// 1. Rotating Calipers: Find the edge that minimizes the lune-width proxy +// dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) +// No per-edge normalization needed, scalar triple product suffices. +// +// 2. Build orthonormal frame from the minimum-width edge: +// - axis1 = normalize(cross(A, B)), pole of the primary lune +// - axis2, axis3 complete the frame via edge-based candidate search +// (tryPrimaryFrameCandidate), oriented toward silhouette center +// +// 3. Project vertices onto the frame as (x/z, y/z) +// to find the bounding rectangle extents (rectR0, rectExtents) +// +// 4. Fallback: if the primary frame leaves vertices near the z=0 plane, +// fix axis3 = camera forward (0,0,1) and search axis1/axis2 via +// tryFallbackFrameCandidate +// +// Key property: If all vertices are inside a great circle half-space, +// then all edges (geodesic arcs) are also inside. No edge extremum +// checking needed (unlike parallelogram_sampling which works in +// projected 2D space where arcs can bulge beyond vertices). +// ============================================================================ +// Spherical rectangle bound: stores the orthonormal frame and gnomonic +// projection extents. Consumed by UrenaSampler, BilinearSampler, BiquadraticSampler. +struct SphericalPyramid +{ + // Orthonormal frame for the bounding region + float32_t3 axis1; // Primary axis (from minimum-width edge's great circle normal) + float32_t3 axis2; // Secondary axis (perpendicular to axis1) + float32_t3 axis3; // Forward axis, toward silhouette (primary) or camera forward (fallback) + + // SphericalRectangle parameters (in the local frame where axis3 is Z) + float32_t3 rectR0; // Corner position in local frame + float32_t2 rectExtents; // Width (along axis1) and height (along axis2) + float32_t solidAngle; // Solid angle of the bounding region (steradians) + + // ======================================================================== + // Rotating Calipers - Minimum Width Edge Finding (Scalar Triple Product) + // ======================================================================== + + // Simplified metric: dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) + // This is a lune-area proxy, no per-edge normalization needed for comparison. + // Per-vertex cost: one dot product with precomputed edge normal. + // Per-edge cost: one cross product (replaces addition + rsqrt). + // + // Triangular column-major traversal (rotating calipers pattern): + // Vertex V_j checks against edges 0..j-2. + // V2 -> edge 0; V3 -> edges 0,1; V4 -> edges 0,1,2; etc. + // Total checks: (N-2)(N-1)/2 instead of N(N-2). + // + // Endpoints: dot(cross(A,B), A) = dot(cross(A,B), B) = 0, never affect max. + static void findMinimumWidthEdge(const ClippedSilhouette silhouette, out uint32_t bestEdge, out float32_t3 bestV0, out float32_t3 bestV1, out float32_t bestWidth, out SilEdgeNormals precompSil) + { + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + // Edge normals: cross(v[i], v[i+1]), inward-facing for CCW-from-origin winding + float32_t3 en0 = cross(silhouette.vertices[0], silhouette.vertices[1]); + precompSil.edgeNormals[0] = float16_t3(en0); + float32_t3 en1 = cross(silhouette.vertices[1], silhouette.vertices[2]); + precompSil.edgeNormals[1] = float16_t3(en1); + + // Per-edge max(dot(en_i, v_j)), positive = inside, maximum = widest vertex + float32_t maxDot0 = dot(silhouette.vertices[2], en0); // V2 vs edge 0 + + float32_t maxDot1 = 1e10f; + float32_t maxDot2 = 1e10f; + float32_t maxDot3 = 1e10f; + float32_t maxDot4 = 1e10f; + + if (silhouette.count > 3) + { + float32_t3 en2 = cross(silhouette.vertices[2], silhouette.vertices[3]); + precompSil.edgeNormals[2] = float16_t3(en2); + + // V3 vs edges 0, 1 + float32_t3 v3 = silhouette.vertices[3]; + maxDot0 = max(maxDot0, dot(v3, en0)); + maxDot1 = dot(v3, en1); + + if (silhouette.count > 4) + { + float32_t3 en3 = cross(silhouette.vertices[3], silhouette.vertices[4]); + precompSil.edgeNormals[3] = float16_t3(en3); + + // V4 vs edges 0, 1, 2 + float32_t3 v4 = silhouette.vertices[4]; + maxDot0 = max(maxDot0, dot(v4, en0)); + maxDot1 = max(maxDot1, dot(v4, en1)); + maxDot2 = dot(v4, en2); + + if (silhouette.count > 5) + { + float32_t3 en4 = cross(silhouette.vertices[4], silhouette.vertices[5]); + precompSil.edgeNormals[4] = float16_t3(en4); + + // V5 vs edges 0, 1, 2, 3 + float32_t3 v5 = silhouette.vertices[5]; + maxDot0 = max(maxDot0, dot(v5, en0)); + maxDot1 = max(maxDot1, dot(v5, en1)); + maxDot2 = max(maxDot2, dot(v5, en2)); + maxDot3 = dot(v5, en3); + + if (silhouette.count > 6) + { + // V6 vs edges 0, 1, 2, 3, 4 + float32_t3 v6 = silhouette.vertices[6]; + maxDot0 = max(maxDot0, dot(v6, en0)); + maxDot1 = max(maxDot1, dot(v6, en1)); + maxDot2 = max(maxDot2, dot(v6, en2)); + maxDot3 = max(maxDot3, dot(v6, en3)); + maxDot4 = dot(v6, en4); + } + } + } + } + + // Best edge: minimum maxDot, no per-edge normalization needed. + // Relative epsilon prevents tie-breaking flicker when two edges have + // nearly identical widths — the current winner is "sticky" unless a + // new edge is meaningfully better (0.1% narrower). + const float32_t EDGE_SELECT_EPS = 1e-3f; + + bestWidth = maxDot0; + bestEdge = 0; + bestV0 = silhouette.vertices[0]; + bestV1 = silhouette.vertices[1]; + + if (silhouette.count > 3) + { + bool better = maxDot1 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot1 : bestWidth; + bestEdge = better ? 1 : bestEdge; + bestV0 = better ? silhouette.vertices[1] : bestV0; + bestV1 = better ? silhouette.vertices[2] : bestV1; + + if (silhouette.count > 4) + { + better = maxDot2 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot2 : bestWidth; + bestEdge = better ? 2 : bestEdge; + bestV0 = better ? silhouette.vertices[2] : bestV0; + bestV1 = better ? silhouette.vertices[3] : bestV1; + + if (silhouette.count > 5) + { + better = maxDot3 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot3 : bestWidth; + bestEdge = better ? 3 : bestEdge; + bestV0 = better ? silhouette.vertices[3] : bestV0; + bestV1 = better ? silhouette.vertices[4] : bestV1; + + if (silhouette.count > 6) + { + better = maxDot4 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot4 : bestWidth; + bestEdge = better ? 4 : bestEdge; + bestV0 = better ? silhouette.vertices[4] : bestV0; + bestV1 = better ? silhouette.vertices[5] : bestV1; + } + } + } + } + + // Check the last 2 edges missed by the triangular traversal: + // Edge count-2: vertices[count-2] -> vertices[count-1], check V0..V[count-3] + // Edge count-1: vertices[count-1] -> vertices[0], check V1..V[count-2] + // Explicit per-count unrolling avoids the generic loop with runtime index comparisons. + { + // Penultimate edge: vertices[count-2] -> vertices[count-1] + const uint32_t penIdx = silhouette.count - 2; + float32_t3 enPen = cross(silhouette.vertices[penIdx], silhouette.vertices[penIdx + 1]); + precompSil.edgeNormals[penIdx] = float16_t3(enPen); + float32_t maxDotPen = dot(silhouette.vertices[0], enPen); + if (silhouette.count > 3) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[1], enPen)); + if (silhouette.count > 4) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[2], enPen)); + if (silhouette.count > 5) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[3], enPen)); + if (silhouette.count > 6) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[4], enPen)); + } + } + } + } + + bool betterPen = maxDotPen < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterPen ? maxDotPen : bestWidth; + bestEdge = betterPen ? penIdx : bestEdge; + bestV0 = betterPen ? silhouette.vertices[penIdx] : bestV0; + bestV1 = betterPen ? silhouette.vertices[penIdx + 1] : bestV1; + + // Last edge: vertices[count-1] -> vertices[0] (wrap-around) + const uint32_t lastIdx = silhouette.count - 1; + float32_t3 enLast = cross(silhouette.vertices[lastIdx], silhouette.vertices[0]); + precompSil.edgeNormals[lastIdx] = float16_t3(enLast); + float32_t maxDotLast = dot(silhouette.vertices[1], enLast); + if (silhouette.count > 3) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[2], enLast)); + if (silhouette.count > 4) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[3], enLast)); + if (silhouette.count > 5) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[4], enLast)); + if (silhouette.count > 6) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[5], enLast)); + } + } + } + } + + bool betterLast = maxDotLast < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterLast ? maxDotLast : bestWidth; + bestEdge = betterLast ? lastIdx : bestEdge; + bestV0 = betterLast ? silhouette.vertices[lastIdx] : bestV0; + bestV1 = betterLast ? silhouette.vertices[0] : bestV1; + } + } + + // ======================================================================== + // Template-Unrolled Projection Helpers + // ======================================================================== + + // Project a single vertex onto candidate axes, updating bounds and minZ in one fused pass + template + static void projectAndBound(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + float32_t3 v = vertices[I]; + float32_t x = dot(v, projAxis1); + float32_t y = dot(v, projAxis2); + float32_t z = dot(v, projAxis3); + minZ = min(minZ, z); + float32_t rcpZ = rcp(z); + float32_t projX = x * rcpZ; + float32_t projY = y * rcpZ; + bound.x = min(bound.x, projX); + bound.y = min(bound.y, projY); + bound.z = max(bound.z, projX); + bound.w = max(bound.w, projY); + } + + // Project all silhouette vertices (template-unrolled, fused bounds + minZ) + static void projectAllVertices(const ClippedSilhouette silhouette, float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + bound = float32_t4(1e10f, 1e10f, -1e10f, -1e10f); + minZ = 1e10f; + projectAndBound<0>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<1>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<2>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 3) + { + projectAndBound<3>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 4) + { + projectAndBound<4>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 5) + { + projectAndBound<5>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 6) + { + projectAndBound<6>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + } + } + } + } + } + + // ======================================================================== + // Template-Unrolled Frame Candidate Selection + // ======================================================================== + + // Try an edge as frame candidate for the primary path (axis1 fixed, find best axis2/axis3) + template + static void tryPrimaryFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis1, float32_t3 axis3Ref, + NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis2, + NBL_REF_ARG(float32_t3) bestAxis3, NBL_REF_ARG(bool) found, + NBL_REF_ARG(float32_t) bestMinZ, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + // Candidate axis2: perpendicular to edge, in plane perpendicular to axis1 + float32_t3 axis2Cand = cross(fixedAxis1, edge); + float32_t lenSq = dot(axis2Cand, axis2Cand); + if (lenSq < 1e-14f) + return; + axis2Cand *= rsqrt(lenSq); + + // Candidate axis3: completes the frame + float32_t3 axis3Cand = cross(fixedAxis1, axis2Cand); + + // Ensure axis3 points toward center (same hemisphere as reference) + if (dot(axis3Cand, axis3Ref) < 0.0f) + { + axis2Cand = -axis2Cand; + axis3Cand = -axis3Cand; + } + + // Fused: check all vertices have positive z AND compute bounding rect in one pass + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, fixedAxis1, axis2Cand, axis3Cand, bound, minZ); + + // Skip if any vertex would have z <= 0 + if (minZ <= 1e-6f) + return; + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis2 = axis2Cand; + bestAxis3 = axis3Cand; + bestMinZ = minZ; + bestBound = bound; + found = true; + } + } + + // Try an edge as frame candidate for the fallback path (axis3 fixed, find best axis1/axis2) + template + static void tryFallbackFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis3, NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis1, NBL_REF_ARG(float32_t3) bestAxis2, NBL_REF_ARG(uint32_t) bestEdge, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + float32_t3 edgeInPlane = edge - fixedAxis3 * dot(edge, fixedAxis3); + float32_t lenSq = dot(edgeInPlane, edgeInPlane); + if (lenSq < 1e-14f) + return; + + float32_t3 axis1Cand = edgeInPlane * rsqrt(lenSq); + float32_t3 axis2Cand = cross(fixedAxis3, axis1Cand); + + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, axis1Cand, axis2Cand, fixedAxis3, bound, minZ); + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis1 = axis1Cand; + bestAxis2 = axis2Cand; + bestBound = bound; + bestEdge = I; + } + } + + // ======================================================================== + // Visualization + // ======================================================================== + +#if VISUALIZE_SAMPLES + float32_t4 visualize(float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth) + { + float32_t4 color = float32_t4(0, 0, 0, 0); + + // Colors for visualization + float32_t3 boundColor1 = float32_t3(1.0f, 0.5f, 0.5f); // Light red for axis1 bounds + float32_t3 boundColor2 = float32_t3(0.5f, 0.5f, 1.0f); // Light blue for axis2 bounds + float32_t3 centerColor = float32_t3(1.0f, 1.0f, 0.0f); // Yellow for center + + float32_t x0 = rectR0.x; + float32_t x1 = rectR0.x + rectExtents.x; + float32_t y0 = rectR0.y; + float32_t y1 = rectR0.y + rectExtents.y; + float32_t z = rectR0.z; + + // Great circle normals for the 4 edges (in local frame, then transform to world) + float32_t3 bottomNormalLocal = normalize(float32_t3(0, -z, y0)); + float32_t3 topNormalLocal = normalize(float32_t3(0, z, -y1)); + float32_t3 leftNormalLocal = normalize(float32_t3(-z, 0, x0)); + float32_t3 rightNormalLocal = normalize(float32_t3(z, 0, -x1)); + + // Transform to world space + float32_t3 bottomNormal = bottomNormalLocal.x * axis1 + bottomNormalLocal.y * axis2 + bottomNormalLocal.z * axis3; + float32_t3 topNormal = topNormalLocal.x * axis1 + topNormalLocal.y * axis2 + topNormalLocal.z * axis3; + float32_t3 leftNormal = leftNormalLocal.x * axis1 + leftNormalLocal.y * axis2 + leftNormalLocal.z * axis3; + float32_t3 rightNormal = rightNormalLocal.x * axis1 + rightNormalLocal.y * axis2 + rightNormalLocal.z * axis3; + + // Draw the 4 bounding great circles + color += drawGreatCircleHalf(bottomNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(topNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(leftNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + color += drawGreatCircleHalf(rightNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + + // Draw center point (center of the rectangle projected onto sphere) + float32_t centerX = (x0 + x1) * 0.5f; + float32_t centerY = (y0 + y1) * 0.5f; + float32_t3 centerLocal = normalize(float32_t3(centerX, centerY, z)); + float32_t3 centerWorld = centerLocal.x * axis1 - centerLocal.y * axis2 + centerLocal.z * axis3; + + float32_t3 centerCircle = sphereToCircle(centerWorld); + color += drawCorner(centerCircle, ndc, aaWidth, 0.025f, 0.0f, centerColor); + + color += drawCorner(axis1, ndc, aaWidth, 0.025f, 0.0f, float32_t3(1.0f, 0.0f, 0.0f)); + color += drawCorner(axis2, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 1.0f, 0.0f)); + color += drawCorner(axis3, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 0.0f, 1.0f)); + + return color; + } +#endif // VISUALIZE_SAMPLES + + // ======================================================================== + // Factory + // ======================================================================== + + static SphericalPyramid create(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, NBL_REF_ARG(SilEdgeNormals) silEdgeNormals +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, inout float32_t4 color +#endif + ) + { + SphericalPyramid self; + + // Step 1: Find minimum-width edge using rotating calipers with lune metric + uint32_t bestEdge; + float32_t3 bestV0, bestV1; + float32_t minWidth; + findMinimumWidthEdge(silhouette, bestEdge, bestV0, bestV1, minWidth, silEdgeNormals); + + // Step 2: Build orthonormal frame from best edge + // axis1 = perpendicular to the best edge's great circle (primary caliper direction) + self.axis1 = normalize(cross(bestV0, bestV1)); + + // Compute centroid for reference direction + float32_t3 center = silhouette.getCenter(); + float32_t3 centerInPlane = center - self.axis1 * dot(center, self.axis1); + float32_t3 axis3Ref = normalize(centerInPlane); + + // Step 2b: Try each edge-aligned rotation around axis1 to find the axis2/axis3 + // orientation that keeps all vertices in the positive half-space with minimum + // bounding rectangle area + float32_t bestRectArea = 1e20f; + float32_t3 bestAxis2 = cross(axis3Ref, self.axis1); + float32_t3 bestAxis3 = axis3Ref; + bool foundValidFrame = false; + float32_t bestMinZ = 0.0f; + float32_t4 bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + tryPrimaryFrameCandidate<0>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<1>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<2>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 3) + { + tryPrimaryFrameCandidate<3, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 4) + { + tryPrimaryFrameCandidate<4, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 5) + { + tryPrimaryFrameCandidate<5, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 6) + { + tryPrimaryFrameCandidate<6, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + } + } + } + } + + self.axis2 = bestAxis2; + self.axis3 = bestAxis3; + + // Fallback: if the primary path failed (no valid frame found, or axis3 leaves + // vertices too close to the z=0 singularity), fix axis3 = camera forward and + // search for the best axis1/axis2 rotation around it. + if (!foundValidFrame || bestMinZ < 0.15f) + { + // Use camera forward as axis3 (all silhouette vertices have z > 0 by construction) + self.axis3 = float32_t3(0.0f, 0.0f, 1.0f); + + // Find optimal axis1/axis2 rotation around axis3 by trying each edge + float32_t bestFallbackArea = 1e20f; + // axis3 = (0,0,1), so cross((0,0,1), (1,0,0)) = (0,1,0), cross((0,0,1), (0,1,0)) = (-1,0,0) + self.axis1 = float32_t3(0.0f, 1.0f, 0.0f); + self.axis2 = float32_t3(-1.0f, 0.0f, 0.0f); + + tryFallbackFrameCandidate<0>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<1>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<2>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 3) + { + tryFallbackFrameCandidate<3, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 4) + { + tryFallbackFrameCandidate<4, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 5) + { + tryFallbackFrameCandidate<5, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 6) + { + tryFallbackFrameCandidate<6, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + } + } + } + } + } + + // Degenerate bounds check (single computation, after primary/fallback decision) + if (bounds.x >= bounds.z || bounds.y >= bounds.w) + bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + self.rectR0 = float32_t3(bounds.xy, 1.0f); + self.rectExtents = float32_t2(bounds.zw - bounds.xy); + +#if VISUALIZE_SAMPLES + color += drawCorner(center, ndc, aaWidth, 0.05f, 0.0f, float32_t3(1.0f, 0.0f, 1.0f)); + color += visualizeBestCaliperEdge(silhouette.vertices, bestEdge, silhouette.count, spherePos, aaWidth); + color += self.visualize(spherePos, ndc, aaWidth); +#endif + +#if DEBUG_DATA + DebugDataBuffer[0].pyramidAxis1 = self.axis1; + DebugDataBuffer[0].pyramidAxis2 = self.axis2; + DebugDataBuffer[0].pyramidCenter = center; + DebugDataBuffer[0].pyramidHalfWidth1 = (atan(bounds.z) - atan(bounds.x)) * 0.5f; + DebugDataBuffer[0].pyramidHalfWidth2 = (atan(bounds.w) - atan(bounds.y)) * 0.5f; + DebugDataBuffer[0].pyramidSolidAngle = self.solidAngle; + DebugDataBuffer[0].pyramidBestEdge = bestEdge; + DebugDataBuffer[0].pyramidMin1 = bounds.x; + DebugDataBuffer[0].pyramidMin2 = bounds.y; + DebugDataBuffer[0].pyramidMax1 = bounds.z; + DebugDataBuffer[0].pyramidMax2 = bounds.w; +#endif + + return self; + } +}; + +#include "pyramid_sampling/urena.hlsl" +#include "pyramid_sampling/bilinear.hlsl" +#include "pyramid_sampling/biquadratic.hlsl" + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl new file mode 100644 index 000000000..7d3319a7c --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl @@ -0,0 +1,86 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#include + +// ============================================================================ +// Bilinear Approximation Sampling (closed-form, faster than biquadratic) +// ============================================================================ +// +struct BilinearSampler +{ + nbl::hlsl::sampling::Bilinear sampler; + + float32_t rcpTotalIntegral; + float32_t rectArea; + + // Precompute bilinear sampler from pyramid + static BilinearSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BilinearSampler self; + + // 4 corner positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = 1 / (x^2 + y^2 + 1)^(3/2) [z = 1.0 in local frame] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1; + + float32_t d; + d = xx0 + yy0 + 1.0f; + const float32_t v00 = rsqrt(d) / d; // x0y0 + d = xx1 + yy0 + 1.0f; + const float32_t v10 = rsqrt(d) / d; // x1y0 + d = xx0 + yy1 + 1.0f; + const float32_t v01 = rsqrt(d) / d; // x0y1 + d = xx1 + yy1 + 1.0f; + const float32_t v11 = rsqrt(d) / d; // x1y1 + + // Bilinear layout: (x0y0, x0y1, x1y0, x1y1) + self.sampler = nbl::hlsl::sampling::Bilinear::create(float32_t4(v00, v01, v10, v11)); + + // Total integral = average of 4 corners (bilinear integral over unit square) + const float32_t totalIntegral = (v00 + v10 + v01 + v11) * 0.25f; + self.rcpTotalIntegral = 1.0f / max(totalIntegral, 1e-20f); + self.rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + + return self; + } + + // Sample a direction on the spherical pyramid using bilinear importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample UV from bilinear distribution (closed-form via quadratic formula) + float32_t rcpPdf; + float32_t2 uv = sampler.generate(rcpPdf, xi); + + // Step 2: UV to direction + // Bilinear sampler convention: u.y = first-sampled axis (X), u.x = second-sampled axis (Y) + const float32_t localX = pyramid.rectR0.x + uv.y * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + uv.x * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // PDF in solid angle space: 1 / (rcpPdf * dSA * rectArea) + // rcpPdf already = 1/pdfUV from Bilinear::generate, avoid redundant reciprocal + const float32_t dsa = rcpLen / dist2; + pdf = 1.0f / max(rcpPdf * dsa * rectArea, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl new file mode 100644 index 000000000..e75c89595 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl @@ -0,0 +1,158 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ + +// ============================================================================ +// Biquadratic Approximation Sampling (Hart et al. 2020) +// ============================================================================ +// +// Precomputed biquadratic sampler for importance sampling solid angle density. +// Build once from a SphericalPyramid, then call sample() per random pair. + +struct BiquadraticSampler +{ + // Column-major: cols[i] = (row0[i], row1[i], row2[i]) for fast sliceAtY via dot + float32_t3x3 cols; + + // Precomputed marginal (Y) polynomial: f(y) = c0 + y*(c1 + y*c2) + float32_t margC0, margC1, margC2, margIntegral; + + float32_t rcpTotalIntegral; + float32_t rcpIntegralTimesRcpArea; // rcpTotalIntegral / rectArea (fused for PDF computation) + + // Newton-Raphson CDF inversion for a quadratic PDF (2 iterations) + // Solves: c0*t + (c1/2)*t^2 + (c2/3)*t^3 = u * integral + // Returns sampled t and the PDF value at t (avoids redundant recomputation by caller). + // 2 iterations give ~4 decimal digits, should be sufficient for importance sampling with rejection? + static float32_t sampleQuadraticCDF(float32_t u, float32_t c0, float32_t c1, float32_t c2, float32_t integral, out float32_t lastPdfVal) + { + const float32_t target = u * integral; + const float32_t c1half = c1 * 0.5f; + const float32_t c2third = c2 * (1.0f / 3.0f); + float32_t t = u; + + // Iteration 1 + float32_t cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + // Iteration 2 + cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + return t; + } + + // Precompute biquadratic sampler from pyramid (call ONCE, reuse for all samples) + static BiquadraticSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BiquadraticSampler self; + + // 3x3 grid positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + 0.5f * pyramid.rectExtents.x; + const float32_t x2 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + 0.5f * pyramid.rectExtents.y; + const float32_t y2 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = rsqrt(x^2+y^2+1) / (x^2+y^2+1) [z = rectR0.z = 1.0] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1, xx2 = x2 * x2; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1, yy2 = y2 * y2; + + float32_t3 row0, row1, row2; + float32_t d; + + d = xx0 + yy0 + 1.0f; + row0.x = rsqrt(d) / d; + d = xx1 + yy0 + 1.0f; + row0.y = rsqrt(d) / d; + d = xx2 + yy0 + 1.0f; + row0.z = rsqrt(d) / d; + + d = xx0 + yy1 + 1.0f; + row1.x = rsqrt(d) / d; + d = xx1 + yy1 + 1.0f; + row1.y = rsqrt(d) / d; + d = xx2 + yy1 + 1.0f; + row1.z = rsqrt(d) / d; + + d = xx0 + yy2 + 1.0f; + row2.x = rsqrt(d) / d; + d = xx1 + yy2 + 1.0f; + row2.y = rsqrt(d) / d; + d = xx2 + yy2 + 1.0f; + row2.z = rsqrt(d) / d; + + // Store column-major for sliceAtY: cols[i] = (row0[i], row1[i], row2[i]) + self.cols[0] = float32_t3(row0.x, row1.x, row2.x); + self.cols[1] = float32_t3(row0.y, row1.y, row2.y); + self.cols[2] = float32_t3(row0.z, row1.z, row2.z); + + // Marginal along Y: Simpson's rule integral of each row + const float32_t3 marginal = float32_t3( + (row0.x + 4.0f * row0.y + row0.z) / 6.0f, + (row1.x + 4.0f * row1.y + row1.z) / 6.0f, + (row2.x + 4.0f * row2.y + row2.z) / 6.0f); + + // Precompute marginal polynomial: f(y) = c0 + y*(c1 + y*c2) + self.margC0 = marginal[0]; + self.margC1 = -3.0f * marginal[0] + 4.0f * marginal[1] - marginal[2]; + self.margC2 = 2.0f * (marginal[0] - 2.0f * marginal[1] + marginal[2]); + self.margIntegral = (marginal[0] + 4.0f * marginal[1] + marginal[2]) / 6.0f; + + self.rcpTotalIntegral = 1.0f / max(self.margIntegral, 1e-20f); + const float32_t rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + self.rcpIntegralTimesRcpArea = self.rcpTotalIntegral / max(rectArea, 1e-20f); + + return self; + } + + // Sample a direction on the spherical pyramid using biquadratic importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample Y from precomputed marginal polynomial + float32_t margPdfAtY; + const float32_t y = sampleQuadraticCDF(xi.y, margC0, margC1, margC2, margIntegral, margPdfAtY); + + // Step 2: Compute conditional X slice at sampled Y via Lagrange basis + const float32_t y2 = y * y; + const float32_t3 Ly = float32_t3(2.0f * y2 - 3.0f * y + 1.0f, -4.0f * y2 + 4.0f * y, 2.0f * y2 - y); + const float32_t3 slice = float32_t3(dot(cols[0], Ly), dot(cols[1], Ly), dot(cols[2], Ly)); + + // Step 3: Build conditional polynomial and sample X + const float32_t condC0 = slice[0]; + const float32_t condC1 = -3.0f * slice[0] + 4.0f * slice[1] - slice[2]; + const float32_t condC2 = 2.0f * (slice[0] - 2.0f * slice[1] + slice[2]); + const float32_t condIntegral = (slice[0] + 4.0f * slice[1] + slice[2]) / 6.0f; + float32_t condPdfAtX; + const float32_t x = sampleQuadraticCDF(xi.x, condC0, condC1, condC2, condIntegral, condPdfAtX); + + // Step 4: UV to direction + const float32_t localX = pyramid.rectR0.x + x * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + y * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * + rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // Step 5: PDF in solid angle space = condPdfAtX / (totalIntegral * dSA * rectArea) + // condPdfAtX is reused from the last Newton iteration + const float32_t dsa = rcpLen / dist2; + pdf = condPdfAtX * rcpIntegralTimesRcpArea / max(dsa, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl new file mode 100644 index 000000000..6709bf7da --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl @@ -0,0 +1,87 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ + +// ============================================================================ +// Sampling using Urena 2003 (SphericalRectangle) +// ============================================================================ + +struct UrenaSampler +{ + float32_t solidAngle; // Solid angle of the bounding region (steradians) + float32_t samplerK; // = 2*pi - q (angle offset for horizontal sampling) + float32_t samplerB0; // = n_z[0] (normalized edge parameter) + float32_t samplerB1; // = n_z[2] (normalized edge parameter) + + // Precompute solid angle AND sampler intermediates in one pass + // (solidAngleOfRectangle and generate() both compute n_z/cosGamma -- fuse them) + static UrenaSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + UrenaSampler self; + + const float32_t4 denorm_n_z = float32_t4(-pyramid.rectR0.y, pyramid.rectR0.x + pyramid.rectExtents.x, pyramid.rectR0.y + pyramid.rectExtents.y, -pyramid.rectR0.x); + const float32_t4 n_z = denorm_n_z / sqrt((float32_t4)(pyramid.rectR0.z * pyramid.rectR0.z) + denorm_n_z * denorm_n_z); + const float32_t4 cosGamma = float32_t4(-n_z[0] * n_z[1], -n_z[1] * n_z[2], + -n_z[2] * n_z[3], -n_z[3] * n_z[0]); + + nbl::hlsl::math::sincos_accumulator adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[0]); + adder.addCosine(cosGamma[1]); + const float32_t p = adder.getSumofArccos(); + adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[2]); + adder.addCosine(cosGamma[3]); + const float32_t q = adder.getSumofArccos(); + + self.solidAngle = p + q - 2.0f * nbl::hlsl::numbers::pi; + self.samplerK = 2.0f * nbl::hlsl::numbers::pi - q; + self.samplerB0 = n_z[0]; + self.samplerB1 = n_z[2]; + + return self; + } + + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Inlined Urena 2003 with algebraic simplifications: + const float32_t r1x = pyramid.rectR0.x + pyramid.rectExtents.x; + const float32_t r1y = pyramid.rectR0.y + pyramid.rectExtents.y; + + // Horizontal CDF inversion + const float32_t au = xi.x * solidAngle + samplerK; + float32_t sinAu, cosAu; + sincos(au, sinAu, cosAu); + const float32_t fu = (cosAu * samplerB0 - samplerB1) / sinAu; + + // cu = sign(fu)/sqrt(cu_2), xu = cu/sqrt(1-cu^2) + // Fused: xu = sign(fu)/sqrt(cu_2 - 1) [eliminates 2 sqrt + 2 div -> 1 rsqrt] + const float32_t cu_2 = max(fu * fu + samplerB0 * samplerB0, 1.0f); + const float32_t xu = clamp( + (fu >= 0.0f ? 1.0f : -1.0f) * rsqrt(max(cu_2 - 1.0f, 1e-10f)), + pyramid.rectR0.x, r1x); + const float32_t d_2 = xu * xu + 1.0f; + + // Vertical sampling in h-space (div -> rsqrt + mul) + const float32_t h0 = pyramid.rectR0.y * rsqrt(d_2 + pyramid.rectR0.y * pyramid.rectR0.y); + const float32_t h1 = r1y * rsqrt(d_2 + r1y * r1y); + const float32_t hv = h0 + xi.y * (h1 - h0); + + // Normalized direction via ||(xu,yv,1)||^2 = d_2/(1-hv^2): + // localDir.y = yv/||v|| = hv (exact cancellation) + // localDir.xz = (xu, 1) * t where t = sqrt(1-hv^2)/sqrt(d_2) + // Eliminates: sqrt(d_2), yv computation, and normalize() + const float32_t t = sqrt(max(1.0f - hv * hv, 0.0f)) * rsqrt(d_2); + const float32_t3 localDir = float32_t3(xu * t, hv, t); + + float32_t3 direction = localDir.x * pyramid.axis1 + + localDir.y * pyramid.axis2 + + localDir.z * pyramid.axis3; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + pdf = 1.0f / max(solidAngle, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl new file mode 100644 index 000000000..d01b3a07f --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl @@ -0,0 +1,289 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include +#include "utils.hlsl" + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// Visualizes a ray as an arrow from origin in NDC space +// Returns color (rgb), intensity (a), and depth (in extra component) +struct ArrowResult +{ + float32_t4 color : SV_Target0; + float32_t depth : SV_Depth; +}; + +[[vk::push_constant]] struct PushConstantRayVis pc; + +#if VISUALIZE_SAMPLES +#include "drawing.hlsl" + +// Ray-AABB intersection in world space +// Returns the distance to the nearest intersection point, or -1 if no hit +float32_t rayAABBIntersection(float32_t3 rayOrigin, float32_t3 rayDir, float32_t3 aabbMin, float32_t3 aabbMax) +{ + float32_t3 invDir = 1.0f / rayDir; + float32_t3 t0 = (aabbMin - rayOrigin) * invDir; + float32_t3 t1 = (aabbMax - rayOrigin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + float32_t tNear = max(max(tmin.x, tmin.y), tmin.z); + float32_t tFar = min(min(tmax.x, tmax.y), tmax.z); + + // Check if ray intersects AABB + if (tNear > tFar || tFar < 0.0) + return -1.0; + + // Return the nearest positive intersection + return tNear >= 0.0 ? tNear : tFar; +} + +// Project 3D point to NDC space +float32_t2 projectToNDC(float32_t3 worldPos, float32_t4x4 viewProj, float32_t aspect) +{ + float32_t4 clipPos = mul(viewProj, float32_t4(worldPos, 1.0)); + clipPos /= clipPos.w; + + // Apply aspect ratio correction + clipPos.x *= aspect; + + return clipPos.xy; +} + +ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf, float32_t arrowLength, float32_t2 ndcPos, float32_t aspect) +{ + ArrowResult result; + result.color = float32_t4(0, 0, 0, 0); + result.depth = 0.0; // Far plane in reversed-Z + + float32_t3 rayDir = normalize(directionAndPdf.xyz); + float32_t pdf = directionAndPdf.w; + + // Define the 3D line segment + float32_t3 worldStart = rayOrigin; + float32_t3 worldEnd = rayOrigin + rayDir * arrowLength; + + // Transform to view space (camera space) for clipping + float32_t4x4 viewMatrix = pc.viewProjMatrix; // If you have view matrix separately, use that + // For now, we'll work in clip space and check w values + + float32_t4 clipStart = mul(pc.viewProjMatrix, float32_t4(worldStart, 1.0)); + float32_t4 clipEnd = mul(pc.viewProjMatrix, float32_t4(worldEnd, 1.0)); + + // Clip against near plane (w = 0 plane in clip space) + // If both points are behind camera, reject + if (clipStart.w <= 0.001 && clipEnd.w <= 0.001) + return result; + + // If line crosses the near plane, clip it + float32_t t0 = 0.0; + float32_t t1 = 1.0; + + if (clipStart.w <= 0.001) + { + // Start is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t0 = saturate(t); + clipStart = lerp(clipStart, clipEnd, t0); + worldStart = lerp(worldStart, worldEnd, t0); + } + + if (clipEnd.w <= 0.001) + { + // End is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t1 = saturate(t); + clipEnd = lerp(clipStart, clipEnd, t1); + worldEnd = lerp(worldStart, worldEnd, t1); + } + + // Now check if the clipped segment is valid + if (t0 >= t1) + return result; + + // Perspective divide to NDC + float32_t2 ndcStart = clipStart.xy / clipStart.w; + float32_t2 ndcEnd = clipEnd.xy / clipEnd.w; + + // Apply aspect ratio correction + ndcStart.x *= aspect; + ndcEnd.x *= aspect; + + // Calculate arrow direction in NDC + float32_t2 arrowVec = ndcEnd - ndcStart; + float32_t arrowNDCLength = length(arrowVec); + + // Skip if arrow is too small on screen + if (arrowNDCLength < 0.005) + return result; + + // Calculate perpendicular distance to line segment in NDC space + float32_t2 toPixel = ndcPos - ndcStart; + float32_t t_ndc = saturate(dot(toPixel, arrowVec) / dot(arrowVec, arrowVec)); + + // Draw line shaft + float32_t lineThickness = 0.002; + float32_t lineIntensity = lineSegment(ndcPos, ndcStart, ndcEnd, lineThickness); + + // Calculate perspective-correct depth + if (lineIntensity > 0.0) + { + // Interpolate in clip space + float32_t4 clipPos = lerp(clipStart, clipEnd, t_ndc); + + // Compute NDC depth for reversed-Z + float32_t depthNDC = clipPos.z / clipPos.w; + result.depth = 1.0f - depthNDC; + + // Clip against valid depth range + if (result.depth < 0.0 || result.depth > 1.0) + { + lineIntensity = 0.0; + } + } + + // Modulate by PDF + float32_t pdfIntensity = saturate(pdf * 0.5); + float32_t3 finalColor = float32_t3(pdfIntensity, pdfIntensity, pdfIntensity); + + result.color = float32_t4(finalColor, lineIntensity); + return result; +} + +// Returns both tMin (entry) and tMax (exit) for ray-AABB intersection +struct AABBIntersection +{ + float32_t tMin; // Distance to front face (entry point) + float32_t tMax; // Distance to back face (exit point) + bool hit; // Whether ray intersects the AABB at all +}; + +AABBIntersection rayAABBIntersectionFull(float32_t3 origin, float32_t3 dir, float32_t3 boxMin, float32_t3 boxMax) +{ + AABBIntersection result; + result.hit = false; + result.tMin = 0.0f; + result.tMax = 0.0f; + + float32_t3 invDir = 1.0f / dir; + float32_t3 t0 = (boxMin - origin) * invDir; + float32_t3 t1 = (boxMax - origin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + result.tMin = max(max(tmin.x, tmin.y), tmin.z); + result.tMax = min(min(tmax.x, tmax.y), tmax.z); + + // Ray intersects if tMax >= tMin and tMax > 0 + result.hit = (result.tMax >= result.tMin) && (result.tMax > 0.0f); + + // If we're inside the box, tMin will be negative + // In that case, we want to use tMax (exit point) + if (result.tMin < 0.0f) + result.tMin = 0.0f; + + return result; +} +#endif // VISUALIZE_SAMPLES + +// [shader("pixel")] +[[vk::location(0)]] ArrowResult main(SVertexAttributes vx) +{ + ArrowResult output; +#if VISUALIZE_SAMPLES + output.color = float32_t4(0.0, 0.0, 0.0, 0.0); + output.depth = 0.0; // Far plane in reversed-Z (near=0, far=1) + float32_t maxDepth = 0.0; // Track closest depth (minimum in reversed-Z) + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + + // Convert to NDC space with aspect ratio correction + float32_t2 ndcPos = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndcPos.x *= aspect; + + for (uint32_t v = 0; v < DebugDataBuffer[0].clippedSilhouetteVertexCount; v++) + { + float32_t4 clipPos = mul(pc.viewProjMatrix, float32_t4(DebugDataBuffer[0].clippedSilhouetteVertices[v], 1.0)); + float32_t3 ndcPosVertex = clipPos.xyz / clipPos.w; + if (ndcPosVertex.z < maxDepth) + continue; + + float32_t4 intensity = drawCorner(ndcPosVertex, ndcPos, aaWidth, 0.03, 0.0, colorLUT[DebugDataBuffer[0].clippedSilhouetteVerticesIndices[v]]); + + // Update depth only where we drew something + if (any(intensity.rgb > 0.0)) + { + output.color.rgb += intensity.rgb; + maxDepth = max(maxDepth, 1.0f - ndcPosVertex.z); + } + } + + uint32_t sampleCount = DebugDataBuffer[0].sampleCount; + + for (uint32_t i = 0; i < sampleCount; i++) + { + float32_t3 rayOrigin = float32_t3(0, 0, 0); + float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; + float32_t3 rayDir = normalize(directionAndPdf.xyz); + + // Define cube bounds in local space + float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); + float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + + // Transform ray to local space of the cube (using precomputed inverse) + float32_t3 localRayOrigin = mul(pc.invModelMatrix, float32_t4(rayOrigin, 1.0)).xyz; + float32_t3 localRayDir = normalize(mul(pc.invModelMatrix, float32_t4(rayDir, 0.0)).xyz); + + // Get both entry and exit distances + AABBIntersection intersection = rayAABBIntersectionFull(localRayOrigin, localRayDir, cubeLocalMin, cubeLocalMax); + + float32_t arrowLength; + float32_t3 arrowColor; + + if (intersection.hit) + { + // Use tMax (exit point at back face) instead of tMin (entry point at front face) + float32_t3 localExitPoint = localRayOrigin + localRayDir * intersection.tMax; + float32_t3 worldExitPoint = mul(pc.modelMatrix, float32_t4(localExitPoint, 1.0)).xyz; + arrowLength = length(worldExitPoint - rayOrigin); + arrowColor = float32_t3(0.0, 1.0, 0.0); // Green for valid samples + } + else + { + // Ray doesn't intersect - THIS SHOULD NEVER HAPPEN with correct sampling! + float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; + arrowLength = length(cubeCenter - rayOrigin) + 2.0; + arrowColor = float32_t3(1.0, 0.0, 0.0); // Red for BROKEN samples + } + + ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); + + // Only update depth if arrow was actually drawn + if (arrow.color.a > 0.0) + { + maxDepth = max(maxDepth, arrow.depth); + } + + // Modulate arrow color by its alpha (only add where arrow is visible) + output.color.rgb += arrowColor * arrow.color.a; + output.color.a = max(output.color.a, arrow.color.a); + } + + // Clamp to prevent overflow + output.color = saturate(output.color); + output.color.a = 1.0; + + // Write the closest depth (minimum in reversed-Z) + output.depth = maxDepth; + +#endif + return output; +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl new file mode 100644 index 000000000..8213c17fc --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl @@ -0,0 +1,244 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ + +#include "gpu_common.hlsl" + +struct ClippedSilhouette +{ + float32_t3 vertices[MAX_SILHOUETTE_VERTICES]; // Max 7 vertices after clipping, unnormalized + uint32_t count; + + void normalize() + { + vertices[0] = nbl::hlsl::normalize(vertices[0]); + vertices[1] = nbl::hlsl::normalize(vertices[1]); + vertices[2] = nbl::hlsl::normalize(vertices[2]); + if (count > 3) + { + vertices[3] = nbl::hlsl::normalize(vertices[3]); + if (count > 4) + { + vertices[4] = nbl::hlsl::normalize(vertices[4]); + if (count > 5) + { + vertices[5] = nbl::hlsl::normalize(vertices[5]); + if (count > 6) + { + vertices[6] = nbl::hlsl::normalize(vertices[6]); + } + } + } + } + } + + // Compute the silhouette centroid (average direction) + float32_t3 getCenter() + { + float32_t3 sum = float32_t3(0, 0, 0); + + NBL_UNROLL + for (uint32_t i = 0; i < MAX_SILHOUETTE_VERTICES; i++) + { + if (i < count) + sum += vertices[i]; + } + + return nbl::hlsl::normalize(sum); + } + + static uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) + { + float32_t4x3 columnModel = transpose(modelMatrix); + float32_t3 obbCenter = columnModel[3].xyz; + float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + + float32_t3 rcpSqScales = rcp(float32_t3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); + + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; + + region = uint32_t3( + normalizedProj.x < -0.5f ? 0 : (normalizedProj.x > 0.5f ? 2 : 1), + normalizedProj.y < -0.5f ? 0 : (normalizedProj.y > 0.5f ? 2 : 1), + normalizedProj.z < -0.5f ? 0 : (normalizedProj.z > 0.5f ? 2 : 1)); + + configIndex = region.x + region.y * 3u + region.z * 9u; + + uint32_t sil = binSilhouettes[configIndex]; + vertexCount = getSilhouetteSize(sil); + + return sil; + } + + void compute(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil) + { + count = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (uint32_t i = 0; i < 4; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + + if (vertexCount == 6) + { + NBL_UNROLL + for (uint32_t i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + uint32_t clipCount = countbits(clipMask); + + // Invert clip mask to find first positive vertex + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + + // Check if wrap-around is needed (first and last bits negative) + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); + + // Compute rotation amount + uint32_t rotateAmount = wrapAround + ? firstbitlow(invertedMask) // first positive + : firstbithigh(clipMask) + 1; // first vertex after last negative + + // Rotate masks + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + uint32_t positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + uint32_t lastPosIdx = positiveCount - 1; + uint32_t firstNegIdx = positiveCount; + + float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); + float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); + float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); + + float32_t3 vLastNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float32_t3 vFirstPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); + + NBL_UNROLL + for (uint32_t i = 0; i < positiveCount; i++) + { + float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); + +#if DEBUG_DATA + uint32_t originalIndex = (i + rotateAmount) % vertexCount; + DebugDataBuffer[0].clippedSilhouetteVertices[count] = v0; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = originalIndex; +#endif + vertices[count++] = v0; + } + + if (clipCount > 0 && clipCount < vertexCount) + { +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipA; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_A; +#endif + vertices[count++] = clipA; + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipB; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_B; +#endif + vertices[count++] = clipB; + } + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertexCount = count; + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; +#endif + } +}; + +struct SilEdgeNormals +{ + float16_t3 edgeNormals[MAX_SILHOUETTE_VERTICES]; // 10.5 floats instead of 21 + uint32_t count; + + // Better not use and calculate it while creating the sampler + static SilEdgeNormals create(NBL_CONST_REF_ARG(ClippedSilhouette) sil) + { + SilEdgeNormals result = (SilEdgeNormals)0; + result.count = sil.count; + + float32_t3 v0 = sil.vertices[0]; + float32_t3 v1 = sil.vertices[1]; + float32_t3 v2 = sil.vertices[2]; + + result.edgeNormals[0] = float16_t3(cross(v0, v1)); + result.edgeNormals[1] = float16_t3(cross(v1, v2)); + + if (sil.count > 3) + { + float32_t3 v3 = sil.vertices[3]; + result.edgeNormals[2] = float16_t3(cross(v2, v3)); + + if (sil.count > 4) + { + float32_t3 v4 = sil.vertices[4]; + result.edgeNormals[3] = float16_t3(cross(v3, v4)); + + if (sil.count > 5) + { + float32_t3 v5 = sil.vertices[5]; + result.edgeNormals[4] = float16_t3(cross(v4, v5)); + + if (sil.count > 6) + { + float32_t3 v6 = sil.vertices[6]; + result.edgeNormals[5] = float16_t3(cross(v5, v6)); + result.edgeNormals[6] = float16_t3(cross(v6, v0)); + } + else + { + result.edgeNormals[5] = float16_t3(cross(v5, v0)); + } + } + else + { + result.edgeNormals[4] = float16_t3(cross(v4, v0)); + } + } + else + { + result.edgeNormals[3] = float16_t3(cross(v3, v0)); + } + } + else + { + result.edgeNormals[2] = float16_t3(cross(v2, v0)); + } + + return result; + } + + bool isInside(float32_t3 dir) + { + float16_t3 d = float16_t3(dir); + half maxDot = dot(d, edgeNormals[0]); + maxDot = max(maxDot, dot(d, edgeNormals[1])); + maxDot = max(maxDot, dot(d, edgeNormals[2])); + maxDot = max(maxDot, dot(d, edgeNormals[3])); + maxDot = max(maxDot, dot(d, edgeNormals[4])); + maxDot = max(maxDot, dot(d, edgeNormals[5])); + maxDot = max(maxDot, dot(d, edgeNormals[6])); + return maxDot <= float16_t(0.0f); + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl new file mode 100644 index 000000000..bba9aba75 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl @@ -0,0 +1,305 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +#include "drawing.hlsl" +#include "utils.hlsl" +#include "silhouette.hlsl" +#include "triangle_sampling.hlsl" +#include "pyramid_sampling.hlsl" +#include "parallelogram_sampling.hlsl" + +[[vk::push_constant]] struct PushConstants pc; + +static const SAMPLING_MODE samplingMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + +void computeCubeGeo() +{ + for (uint32_t i = 0; i < 8; i++) + corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; + + for (uint32_t f = 0; f < 6; f++) + { + faceCenters[f] = float32_t3(0, 0, 0); + for (uint32_t v = 0; v < 4; v++) + faceCenters[f] += corners[faceToCorners[f][v]]; + faceCenters[f] /= 4.0f; + } +} + +void validateSilhouetteEdges(uint32_t sil, uint32_t vertexCount, inout uint32_t silEdgeMask) +{ +#if DEBUG_DATA + { + for (uint32_t i = 0; i < vertexCount; i++) + { + uint32_t vIdx = i % vertexCount; + uint32_t v1Idx = (i + 1) % vertexCount; + + uint32_t v0Corner = getSilhouetteVertex(sil, vIdx); + uint32_t v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (uint32_t e = 0; e < 12; e++) + { + uint32_t2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(pc.modelMatrix, sil, vertexCount, silEdgeMask); + } +#endif +} + +void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 spherePos) +{ + ndc = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndc.x *= aspect; + + float32_t2 normalized = ndc / CIRCLE_RADIUS; + float32_t r2 = dot(normalized, normalized); + + if (r2 <= 1.0f) + { + spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float32_t uv2Plus1 = r2 + 1.0f; + spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); +} + +#if VISUALIZE_SAMPLES +float32_t4 visualizeSample(float32_t3 sampleDir, float32_t2 xi, uint32_t index, float32_t2 screenUV, float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth +#if DEBUG_DATA + , + inout RWStructuredBuffer DebugDataBuffer +#endif +) +{ + float32_t4 accumColor = 0; + + float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen + float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner + bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); + + float32_t dist3D = distance(sampleDir, normalize(spherePos)); + float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); + + if (alpha3D > 0.0f /* && !isInsidePSS*/) + { + float32_t3 sampleColor = colorLUT[index].rgb; + accumColor += float32_t4(sampleColor * alpha3D, alpha3D); + } + + // if (isInsidePSS) + // { + // // Map the raw xi to the PSS square dimensions + // float32_t2 xiPixelPos = pssPos + xi * pssSize; + // float32_t dist2D = distance(screenUV, xiPixelPos); + + // float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); + // if (alpha2D > 0.0f) + // { + // float32_t3 sampleColor = colorLUT[index].rgb; + // accumColor += float32_t4(sampleColor * alpha2D, alpha2D); + // } + // } + + // // just the outline of the PSS + // if (isInsidePSS && accumColor.a < 0.1) + // accumColor = float32_t4(0.1, 0.1, 0.1, 1.0); + + return accumColor; +} +#endif // VISUALIZE_SAMPLES + +// [shader("pixel")] +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + for (uint32_t i = 0; i < 1; i++) + { + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + float32_t3 spherePos; + float32_t2 ndc; + computeSpherePos(vx, ndc, spherePos); +#if !FAST || DEBUG_DATA + computeCubeGeo(); +#endif + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); + + uint32_t silEdgeMask = 0; // TODO: take from 'fast' compute() +#if DEBUG_DATA + validateSilhouetteEdges(sil, vertexCount, silEdgeMask); +#endif + ClippedSilhouette silhouette; + silhouette.compute(pc.modelMatrix, vertexCount, sil); + +#if VISUALIZE_SAMPLES + // Draw silhouette edges on the sphere + for (uint32_t ei = 0; ei < silhouette.count; ei++) + { + float32_t3 v0 = normalize(silhouette.vertices[ei]); + float32_t3 v1 = normalize(silhouette.vertices[(ei + 1) % silhouette.count]); + float32_t3 pts[2] = {v0, v1}; + color += drawEdge(0, pts, spherePos, aaWidth); + } +#endif + + TriangleFanSampler samplingData; + Parallelogram parallelogram; + SphericalPyramid pyramid; + UrenaSampler urena; + BiquadraticSampler biquad; + BilinearSampler bilin; + + SilEdgeNormals silEdgeNormals; + //===================================================================== + // Building + //===================================================================== + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + samplingData = TriangleFanSampler::create(silhouette, samplingMode); + } + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + silhouette.normalize(); + parallelogram = Parallelogram::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + } + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + pyramid = SphericalPyramid::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + + if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + urena = UrenaSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + biquad = BiquadraticSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + bilin = BilinearSampler::create(pyramid); + } + +#if DEBUG_DATA + uint32_t validSampleCount = 0u; + DebugDataBuffer[0].sampleCount = pc.sampleCount; +#endif + //===================================================================== + // Sampling + //===================================================================== + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + // Hash the invocation to offset the grid + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5) / 8.0f, + (float32_t(i >> 3u) + 0.5) / 8.0f); + + float32_t pdf; + uint32_t index = 0; + float32_t3 sampleDir; + bool valid; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + sampleDir = samplingData.sample(silhouette, xi, pdf, index); + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + sampleDir = parallelogram.sample(silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + sampleDir = urena.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + sampleDir = biquad.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + sampleDir = bilin.sample(pyramid, silEdgeNormals, xi, pdf, valid); + + if (!valid) + { + pdf = 0.0f; + // sampleDir = float32_t3(0, 0, 1); + } +#if DEBUG_DATA + else + { + validSampleCount++; + } + + DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); +#endif + +#if VISUALIZE_SAMPLES + // Draw samples on sphere + color += visualizeSample(sampleDir, xi, index, vx.uv, spherePos, ndc, aaWidth +#if DEBUG_DATA + , + DebugDataBuffer +#endif + ); +#else + if (pdf > 0.0f) + color += float4(sampleDir * 0.02f / pdf, 1.0f); +#endif // VISUALIZE_SAMPLES + } + +#if VISUALIZE_SAMPLES + + // For debugging: Draw a small indicator of which faces are found + // color += drawVisibleFaceOverlay(pc.modelMatrix, spherePos, region, aaWidth); + + // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); + + // Draw clipped silhouette vertices + // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); + // color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); + // color += drawCorners(pc.modelMatrix, ndc, aaWidth, 0.05f); + color += drawRing(ndc, aaWidth); + + if (all(vx.uv >= float32_t2(0.f, 0.97f)) && all(vx.uv <= float32_t2(0.03f, 1.0f))) + { + return float32_t4(colorLUT[configIndex], 1.0f); + } +#else +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + InterlockedAdd(DebugDataBuffer[0].validSampleCount, validSampleCount); + InterlockedAdd(DebugDataBuffer[0].threadCount, 1u); + DebugDataBuffer[0].region = uint32_t3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + for (uint32_t i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; + +#endif + } + + return color; +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl new file mode 100644 index 000000000..46277ca27 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl @@ -0,0 +1,241 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ + +// Include the spherical triangle utilities +#include "gpu_common.hlsl" +#include +#include +#include +#include +#include +#include "silhouette.hlsl" + +using namespace nbl::hlsl; + +// Maximum number of triangles we can have after clipping +// Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles +// With clipping, one more edge. 7 - 2 = 5 max triangles because fanning from one vertex +#define MAX_TRIANGLES 5 + +struct TriangleFanSampler +{ + uint32_t count; // Number of valid triangles + uint32_t samplingMode; // Mode used during build + float32_t totalWeight; // Sum of all triangle weights + float32_t3 faceNormal; // Face normal (only used for projected mode) + float32_t triangleSolidAngles[MAX_TRIANGLES]; // Weight per triangle (for selection) + uint32_t triangleIndices[MAX_TRIANGLES]; // Vertex index i (forms triangle with v0, vi, vi+1) + + float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float32_t3 v2, float32_t3 N) + { + // 1. Get edge normals (unit vectors) + // We use the cross product of the vertices (unit vectors on sphere) + float32_t3 n0 = cross(v0, v1); + float32_t3 n1 = cross(v1, v2); + float32_t3 n2 = cross(v2, v0); + + // 2. Normalize edge normals (magnitude is sin of the arc length) + float32_t l0 = length(n0); + float32_t l1 = length(n1); + float32_t l2 = length(n2); + + // Guard against degenerate triangles + if (l0 < 1e-7 || l1 < 1e-7 || l2 < 1e-7) + return 0.0f; + + n0 /= l0; + n1 /= l1; + n2 /= l2; + + // 3. Get arc lengths (angles in radians) + float32_t a = asin(clamp(l0, -1.0f, 1.0f)); // side v0-v1 + float32_t b = asin(clamp(l1, -1.0f, 1.0f)); // side v1-v2 + float32_t c = asin(clamp(l2, -1.0f, 1.0f)); // side v2-v0 + + // Handle acos/asin quadrant if dot product is negative + if (dot(v0, v1) < 0) + a = 3.14159265 - a; + if (dot(v1, v2) < 0) + b = 3.14159265 - b; + if (dot(v2, v0) < 0) + c = 3.14159265 - c; + + // 4. Compute projected solid angle + float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); + + // Return the absolute value of the total + return abs(Gamma); + } + + // Build fan triangulation, cache weights for triangle selection + static TriangleFanSampler create(ClippedSilhouette silhouette, uint32_t mode) + { + TriangleFanSampler self; + self.count = 0; + self.totalWeight = 0.0f; + self.samplingMode = mode; + self.faceNormal = float32_t3(0, 0, 0); + + if (silhouette.count < 3) + return self; + + const float32_t3 v0 = silhouette.vertices[0]; + const float32_t3 origin = float32_t3(0, 0, 0); + + // Compute face normal ONCE before the loop - silhouette is planar! + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 v1 = silhouette.vertices[1]; + float32_t3 v2 = silhouette.vertices[2]; + self.faceNormal = normalize(cross(v1 - v0, v2 - v0)); + } + + // Build fan triangulation from v0 + NBL_UNROLL + for (uint32_t i = 1; i < silhouette.count - 1; i++) + { + float32_t3 v1 = silhouette.vertices[i]; + float32_t3 v2 = silhouette.vertices[i + 1]; + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Skip degenerate triangles + if (shapeTri.pyramidAngles()) + continue; + + // Calculate triangle solid angle + float32_t solidAngle; + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + solidAngle = shapeTri.projectedSolidAngleOfTriangle(self.faceNormal, shapeTri.cos_sides, shapeTri.csc_sides, cos_vertices); + } + else + { + solidAngle = shapeTri.solidAngleOfTriangle(); + } + + if (solidAngle <= 0.0f) + continue; + + // Store only what's needed for weighted selection + self.triangleSolidAngles[self.count] = solidAngle; + self.triangleIndices[self.count] = i; + self.totalWeight += solidAngle; + self.count++; + } + +#if DEBUG_DATA + // Validate no antipodal edges exist (would create spherical lune) + for (uint32_t i = 0; i < silhouette.count; i++) + { + uint32_t j = (i + 1) % silhouette.count; + float32_t3 n1 = normalize(silhouette.vertices[i]); + float32_t3 n2 = normalize(silhouette.vertices[j]); + + if (dot(n1, n2) < -0.99f) + { + DebugDataBuffer[0].sphericalLuneDetected = 1; + assert(false && "Spherical lune detected: antipodal silhouette edge"); + } + } + DebugDataBuffer[0].maxTrianglesExceeded = (self.count > MAX_TRIANGLES); + DebugDataBuffer[0].triangleCount = self.count; + DebugDataBuffer[0].totalSolidAngles = self.totalWeight; + for (uint32_t tri = 0; tri < self.count; tri++) + { + DebugDataBuffer[0].solidAngles[tri] = self.triangleSolidAngles[tri]; + } +#endif + + return self; + } + + // Sample using cached selection weights, recompute geometry on-demand + float32_t3 sample(ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) + { + selectedIdx = 0; + + // Handle empty or invalid data + if (count == 0 || totalWeight <= 0.0f) + { + pdf = 0.0f; + return float32_t3(0, 0, 1); + } + + // Select triangle using cached weighted random selection + float32_t targetWeight = xi.x * totalWeight; + float32_t cumulativeWeight = 0.0f; + float32_t prevCumulativeWeight = 0.0f; + + NBL_UNROLL + for (uint32_t i = 0; i < count; i++) + { + prevCumulativeWeight = cumulativeWeight; + cumulativeWeight += triangleSolidAngles[i]; + + if (targetWeight <= cumulativeWeight) + { + selectedIdx = i; + break; + } + } + + // Remap xi.x to [0,1] within selected triangle's solidAngle interval + float32_t triSolidAngle = triangleSolidAngles[selectedIdx]; + float32_t u = (targetWeight - prevCumulativeWeight) / max(triSolidAngle, 1e-7f); + + // Reconstruct the selected triangle geometry + uint32_t vertexIdx = triangleIndices[selectedIdx]; + float32_t3 v0 = silhouette.vertices[0]; + float32_t3 v1 = silhouette.vertices[vertexIdx]; + float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; + + float32_t3 fn = normalize(cross(v1 - v0, v2 - v0)); + + float32_t3 origin = float32_t3(0, 0, 0); + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Compute vertex angles once + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + float32_t3 sin_vertices = sqrt(float32_t3(1.0f, 1.0f, 1.0f) - cos_vertices * cos_vertices); + + // Sample based on mode + float32_t3 direction; + float32_t rcpPdf; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + sampling::ProjectedSphericalTriangle samplingTri = sampling::ProjectedSphericalTriangle::create(shapeTri); + + direction = samplingTri.generate(rcpPdf, triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], fn, false, float32_t2(u, xi.y)); + triSolidAngle = rcpPdf; // projected solid angle returned as rcpPdf + } + else + { + sampling::SphericalTriangle samplingTri = sampling::SphericalTriangle::create(shapeTri); + direction = samplingTri.generate(triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], float32_t2(u, xi.y)); + } + + // Calculate PDF + float32_t trianglePdf = 1.0f / triSolidAngle; + float32_t selectionProb = triSolidAngle / totalWeight; + pdf = trianglePdf * selectionProb; + + return normalize(direction); + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl new file mode 100644 index 000000000..832204cf2 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -0,0 +1,68 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#include +#include + +// TODO: implemented somewhere else? +// Bit rotation helpers +uint32_t rotl(uint32_t value, uint32_t bits, uint32_t width) +{ + // mask for the width + uint32_t mask = (width == 32) ? 0xFFFFFFFFu : ((1u << width) - 1u); + value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + + return ((value << bits) | (value >> (width - bits))) & mask; +} + +uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) +{ + uint32_t mask = ((1u << width) - 1u); + value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + + return ((value >> bits) | (value << (width - bits))) & mask; +} + +uint32_t packSilhouette(const uint32_t s[7]) +{ + uint32_t packed = 0; + uint32_t size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (uint32_t i = 1; i <= 6; ++i) + { + uint32_t v = s[i]; + if (v < 0) + v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint32_t, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; +} + +float32_t2 hammersleySample(uint32_t i, uint32_t numSamples) +{ + return float32_t2( + float32_t(i) / float32_t(numSamples), + float32_t(reversebits(i)) / 4294967295.0f); +} + +float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) +{ + return float32_t2( + float32_t(rnd()) * 2.3283064365386963e-10, + float32_t(rnd()) * 2.3283064365386963e-10); +} + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ diff --git a/12_MeshLoaders/config.json.template b/73_SolidAngleVisualizer/config.json.template similarity index 100% rename from 12_MeshLoaders/config.json.template rename to 73_SolidAngleVisualizer/config.json.template diff --git a/73_SolidAngleVisualizer/include/common.hpp b/73_SolidAngleVisualizer/include/common.hpp new file mode 100644 index 000000000..fe7d086dd --- /dev/null +++ b/73_SolidAngleVisualizer/include/common.hpp @@ -0,0 +1,19 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +// the example's headers +#include "transform.hpp" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/73_SolidAngleVisualizer/include/transform.hpp b/73_SolidAngleVisualizer/include/transform.hpp new file mode 100644 index 000000000..e1ffcd764 --- /dev/null +++ b/73_SolidAngleVisualizer/include/transform.hpp @@ -0,0 +1,213 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + +struct TransformRequestParams +{ + uint8_t sceneTexDescIx = ~0; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = true; +}; + +struct TransformReturnInfo +{ + nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; + bool allowCameraMovement = false; +}; + +TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + ImGui::Text("Use gizmo (T/R/G) or ViewManipulate widget to transform the cube"); + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_G)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + + // For UI editing, decompose temporarily + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::DragFloat3("Tr", matrixTranslation, 0.01f); + ImGui::DragFloat3("Rt", matrixRotation, 0.01f); + ImGui::DragFloat3("Sc", matrixScale, 0.01f); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + bool isWindowHovered = false; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ + // TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + TransformReturnInfo retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (isWindowHovered && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + // Standard Manipulate gizmo - let ImGuizmo modify the matrix directly + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + retval.allowCameraMovement = isWindowHovered && !ImGuizmo::IsUsing(); + + // ViewManipulate for rotating the view + if (params.enableViewManipulate) + { + // Store original translation and scale before ViewManipulate + // Decompose original matrix + nbl::hlsl::float32_t3 translation, rotation, scale; + ImGuizmo::DecomposeMatrixToComponents(matrix, &translation.x, &rotation.x, &scale.x); + // Create rotation-only matrix + nbl::hlsl::float32_t4x4 temp; + nbl::hlsl::float32_t3 baseTranslation(0.0f); + nbl::hlsl::float32_t3 baseScale(1.0f); + ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, &temp[0][0]); + temp = nbl::hlsl::transpose(temp); + + // Invert to make it "view-like" + nbl::hlsl::float32_t4x4 tempInv = nbl::hlsl::inverse(temp); + + // Create flip matrix (flip X to fix left/right) + nbl::hlsl::float32_t4x4 flip(1.0f); + flip[0][0] = -1.0f; // Flip X axis + + // Apply flip to the inverted matrix + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + + // Manipulate + ImGuizmo::ViewManipulate(&tempInv[0][0], 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + // Undo flip (flip is its own inverse, so multiply by flip again) + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + + // Invert back to model space + temp = nbl::hlsl::inverse(tempInv); + temp = nbl::hlsl::transpose(temp); + + // Extract rotation + nbl::hlsl::float32_t3 newRot; + ImGuizmo::DecomposeMatrixToComponents(&temp[0][0], &baseTranslation.x, &newRot.x, &baseScale.x); + // Recompose original matrix with new rotation but keep translation & scale + ImGuizmo::RecomposeMatrixFromComponents(&translation.x, &newRot.x, &scale.x, matrix); + + retval.allowCameraMovement &= isWindowHovered && !ImGuizmo::IsUsingViewManipulate(); + } + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ \ No newline at end of file diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp new file mode 100644 index 000000000..c60952394 --- /dev/null +++ b/73_SolidAngleVisualizer/main.cpp @@ -0,0 +1,1777 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "common.hpp" +#include +#include +#include "app_resources/hlsl/common.hlsl" +#include "app_resources/hlsl/benchmark/common.hlsl" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +/* +Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. + +Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations. +*/ +class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + inline SolidAngleVisualizer(const path &_localInputCWD, const path &_localOutputCWD, const path &_sharedInputCWD, const path &_sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({2048, 1024}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline bool onAppInitialized(smart_refctd_ptr &&system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + interface.m_visualizer = this; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {m_cmdBufs.data() + i, 1})) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()}; + m_scene = CGeometryCreatorScene::create( + {.transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies}, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + + // for the scene drawing pass + { + IGPURenderpass::SCreationParams params = {}; + const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{{.format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd}; + params.depthStencilAttachments = depthAttachments; + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + {.format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd}; + params.colorAttachments = colorAttachments; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd}; + subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + subpasses[0].colorAttachments[0] = {.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + params.subpasses = subpasses; + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, + { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = {// last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT} + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; + params.dependencies = dependencies; + auto solidAngleRenderpassParams = params; + m_mainRenderpass = m_device->createRenderpass(std::move(params)); + if (!m_mainRenderpass) + return logFail("Failed to create Main Renderpass!"); + + m_solidAngleRenderpass = m_device->createRenderpass(std::move(solidAngleRenderpassParams)); + if (!m_solidAngleRenderpass) + return logFail("Failed to create Solid Angle Renderpass!"); + } + + const auto &geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, {&geometries.front().get(), geometries.size()}); + // special case + { + const auto &pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto &name : m_scene->getInitParams().geometryNames) + { + if (name == "Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + // we'll only display one thing at a time + m_renderer->m_instances.resize(1); + + // Create graphics pipeline + { + auto loadPrecompiledShader = [&](auto key) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + assert(assets.size() == 1); + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + return shader; + }; + + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load pre-compiled fragment shaders (6 modes x 2 debug = 12 SolidAngleVis + 2 RayVis) + // Can't use string literal template args in a loop, so unroll manually + // Index: mode * 2 + debugFlag (0=release, 1=debug) + smart_refctd_ptr saVisShaders[SAMPLING_MODE::Count * DebugPermutations]; + saVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa">(m_device.get())); + saVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa_dbg">(m_device.get())); + saVisShaders[2] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa">(m_device.get())); + saVisShaders[3] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa_dbg">(m_device.get())); + saVisShaders[4] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para">(m_device.get())); + saVisShaders[5] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para_dbg">(m_device.get())); + saVisShaders[6] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle">(m_device.get())); + saVisShaders[7] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle_dbg">(m_device.get())); + saVisShaders[8] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad">(m_device.get())); + saVisShaders[9] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad_dbg">(m_device.get())); + saVisShaders[10] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear">(m_device.get())); + saVisShaders[11] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear_dbg">(m_device.get())); + + smart_refctd_ptr rayVisShaders[DebugPermutations]; + rayVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis">(m_device.get())); + rayVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis_dbg">(m_device.get())); + + smart_refctd_ptr solidAngleVisLayout, rayVisLayout; + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = + { + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1}}; + smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + + const asset::SPushConstantRange saRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstants)}}; + const asset::SPushConstantRange rayRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstantRayVis)}}; + + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + solidAngleVisLayout = m_device->createPipelineLayout(saRanges, dsLayout); + + rayVisLayout = m_device->createPipelineLayout(rayRanges, dsLayout); + + { + // Create all SolidAngleVis pipeline variants + for (uint32_t i = 0; i < SAMPLING_MODE::Count * DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = saVisShaders[i].get(), + .entryPoint = "main"}; + m_solidAngleVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, solidAngleVisLayout.get(), m_solidAngleRenderpass.get()); + if (!m_solidAngleVisPipelines[i]) + return logFail("Could not create SolidAngleVis Graphics Pipeline variant %d!", i); + } + + asset::SRasterizationParams rasterParams = ext::FullScreenTriangle::ProtoPipeline::DefaultRasterParams; + rasterParams.depthWriteEnable = true; + rasterParams.depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER; + + // Create all RayVis pipeline variants + for (uint32_t i = 0; i < DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = rayVisShaders[i].get(), + .entryPoint = "main"}; + m_rayVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, rayVisLayout.get(), m_mainRenderpass.get(), 0, {}, rasterParams); + if (!m_rayVisPipelines[i]) + return logFail("Could not create RayVis Graphics Pipeline variant %d!", i); + } + } + // Allocate the memory + { + constexpr size_t BufferSize = sizeof(ResultData); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + m_outputStorageBuffer = m_device->createBuffer(std::move(params)); + if (!m_outputStorageBuffer) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + m_outputStorageBuffer->setObjectDebugName("ResultData output buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = m_outputStorageBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_allocation = m_device->allocate(reqs, m_outputStorageBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_allocation.memory->map({0ull, m_allocation.memory->getAllocationSize()}, IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const ILogicalDevice::MappedMemoryRange memoryRange(m_allocation.memory.get(), 0ull, m_allocation.memory->getAllocationSize()); + if (!m_allocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + } + + // Create ImGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = {.setIx = 0u, .bindingIx = TexturesImGUIBindingIndex}; + params.resources.samplersInfo = {.setIx = 0u, .bindingIx = 1u}; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + interface.imGUI = ext::imgui::UI::create(std::move(params)); + if (!interface.imGUI) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + } + + // create rest of User Interface + { + auto *imgui = interface.imGUI.get(); + // create the suballocated descriptor set + { + // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto *layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, {&layout, 1}); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + if (!interface.subAllocDS) + return logFail("Failed to create the descriptor set"); + // make sure Texture Atlas slot is taken for eternity + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + interface.subAllocDS->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(interface.imGUI->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info}; + if (!m_device->updateDescriptorSets({&write, 1}, {})) + return logFail("Failed to write the descriptor set"); + } + imgui->registerListener([this]() + { interface(); }); + } + + interface.camera.mapKeysToWASD(); + + onAppInitializedFinish(); + return true; + } + + // + virtual inline bool onAppTerminated() + { + SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId; + IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + interface.subAllocDS->multi_deallocate(dummy, TexturesImGUIBindingIndex, 1, &fontAtlasDescIx); + return device_base_t::onAppTerminated(); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + // CPU events + update(nextPresentationTimestamp); + + { + const auto &virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const auto &virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || + !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) + recreateFramebuffers(); + } + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto *const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + if (m_solidAngleViewFramebuffer) + { + asset::SBufferRange range{ + .offset = 0, + .size = m_outputStorageBuffer->getSize(), + .buffer = m_outputStorageBuffer}; + cb->fillBuffer(range, 0u); + { + + const auto &creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); + cb->beginDebugMarker("Draw Circle View Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + static uint32_t lastFrameSeed = 0u; + lastFrameSeed = m_frameSeeding ? static_cast(m_realFrameIx) : lastFrameSeed; + PushConstants pc{ + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, + .sampleCount = static_cast(m_SampleCount), + .frameIndex = lastFrameSeed}; + const uint32_t debugIdx = m_debugVisualization ? 1u : 0u; + auto pipeline = m_solidAngleVisPipelines[m_samplingMode * DebugPermutations + debugIdx]; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + + if (m_debugVisualization) + { + m_device->waitIdle(); + std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); + m_device->waitIdle(); + } + } + // draw main view + if (m_mainViewFramebuffer) + { + { + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.1f, 0.1f, 0.1f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_mainViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; + beginRenderpass(cb, renderpassInfo); + } + { // draw rays visualization + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); + + cb->beginDebugMarker("Draw Rays visualization"); + // draw scene + { + float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); + float32_t3x4 view = *reinterpret_cast(&interface.camera.getViewMatrix()); + PushConstantRayVis pc{ + .viewProjMatrix = viewProj, + .viewMatrix = view, + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .invModelMatrix = hlsl::float32_t3x4(hlsl::transpose(hlsl::inverse(interface.m_OBBModelMatrix))), + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u}; + auto pipeline = m_rayVisPipelines[m_debugVisualization ? 1u : 0u]; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endDebugMarker(); + } + // draw scene + { + cb->beginDebugMarker("Main Scene Frame"); + + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + const auto &camera = interface.camera; + memcpy(&viewMatrix, &camera.getViewMatrix(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, &camera.getConcatenatedMatrix(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + auto &instance = m_renderer->m_instances[0]; + instance.world = float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)); + instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; + m_renderer->render(cb, viewParams); // draw the cube/OBB + + instance.world = float32_t3x4(1.0f); + instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk + m_renderer->render(cb, viewParams); + } + + cb->endDebugMarker(); + cb->endRenderPass(); + } + + { + cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0, 0}, + .extent = {m_window->getWidth(), m_window->getHeight()}}}; + beginRenderpass(cb, renderpassInfo); + } + // draw ImGUI + { + auto *imgui = interface.imGUI.get(); + auto *pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx + const auto *ds = interface.subAllocDS->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + // a timepoint in the future to release streaming resources for geometry + const ISemaphore::SWaitInfo drawFinished = {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u}; + if (!imgui->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS}; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb}}; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + {.semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE}}; + const IQueue::SSubmitInfo infos[] = + { + {.waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval, 1}}}; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency *getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; + return dependencies; + } + +private: + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + auto &camera = interface.camera; + camera.setMoveSpeed(interface.moveSpeed); + camera.setRotateSpeed(interface.rotateSpeed); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } uiEvents; + + // TODO: should be a member really + static std::chrono::microseconds previousEventTimestamp{}; + + // I think begin/end should always be called on camera, just events shouldn't be fed, why? + // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to + // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to + // `timeDiff` being computed since `lastVirtualUpTimeStamp` + camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t &events) -> void + { + if (interface.move) + camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + else + camera.mouseKeysUp(); + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.mouse.emplace_back(e); + + //if (e.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + //{ + // interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); + // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + //} + } }, + m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t &events) -> void + { + if (interface.move) + camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.keyboard.emplace_back(e); + } }, + m_logger.get()); + } + camera.endInputProcessing(nextPresentationTimestamp); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()), + .displaySize = {m_window->getWidth(), m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard}; + + // interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + interface.imGUI->update(params); + } + + void recreateFramebuffers() + { + + auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format) -> smart_refctd_ptr + { + auto image = m_device->createImage({{.type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x, resolution.y, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT}}); + if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format}; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; + + smart_refctd_ptr solidAngleView; + smart_refctd_ptr mainView; + const uint16_t2 solidAngleViewRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const uint16_t2 mainViewRes = interface.mainViewTransformReturnInfo.sceneResolution; + + // detect window minimization + if (solidAngleViewRes.x < 0x4000 && solidAngleViewRes.y < 0x4000 || + mainViewRes.x < 0x4000 && mainViewRes.y < 0x4000) + { + solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); + m_solidAngleViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_solidAngleRenderpass, + .depthStencilAttachments = &solidAngleDepthView.get(), + .colorAttachments = &solidAngleView.get(), + .width = solidAngleViewRes.x, + .height = solidAngleViewRes.y}}); + + mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); + auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); + m_mainViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_mainRenderpass, + .depthStencilAttachments = &mainDepthView.get(), + .colorAttachments = &mainView.get(), + .width = mainViewRes.x, + .height = mainViewRes.y}}); + } + else + { + m_solidAngleViewFramebuffer = nullptr; + m_mainViewFramebuffer = nullptr; + } + + // release previous slot and its image + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1}); + // + if (solidAngleView && mainView) + { + interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); + // update descriptor set + IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; + infos[0].desc = mainView; + infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[1].desc = solidAngleView; + infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)]}, + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)]}}; + m_device->updateDescriptorSets({write, static_cast(CInterface::Count)}, {}); + } + interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; + } + + inline void beginRenderpass(IGPUCommandBuffer *cb, const IGPUCommandBuffer::SRenderpassBeginInfo &info) + { + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->setScissor(0, 1, &info.renderArea); + const SViewport viewport = { + .x = 0, + .y = 0, + .width = static_cast(info.renderArea.extent.width), + .height = static_cast(info.renderArea.extent.height)}; + cb->setViewport(0u, 1u, &viewport); + } + + ~SolidAngleVisualizer() override + { + m_allocation.memory->unmap(); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; + constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB; + constexpr static inline auto TexturesImGUIBindingIndex = 0u; + // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes + constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + + static inline SAMPLING_MODE m_samplingMode = SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE; + static inline bool m_debugVisualization = true; + static inline int m_SampleCount = 64; + static inline bool m_frameSeeding = true; + static inline ResultData m_GPUOutResulData; + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_solidAngleRenderpass; + smart_refctd_ptr m_mainRenderpass; + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_solidAngleViewFramebuffer; + smart_refctd_ptr m_mainViewFramebuffer; + // Pipeline variants: SolidAngleVis indexed by [mode * 2 + debugFlag], RayVis by [debugFlag] + static constexpr uint32_t DebugPermutations = 2; + smart_refctd_ptr m_solidAngleVisPipelines[SAMPLING_MODE::Count * DebugPermutations]; + smart_refctd_ptr m_rayVisPipelines[DebugPermutations]; + // + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_outputStorageBuffer; + smart_refctd_ptr m_ds = nullptr; + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // UI stuff + struct CInterface + { + void operator()() + { + ImGuiIO &io = ImGui::GetIO(); + + // TODO: why is this a lambda and not just an assignment in a scope ? + camera.setProjectionMatrix([&]() + { + hlsl::float32_t4x4 projection; + + if (isPerspective) + if (isLH) + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); // TODO: why do I need to divide aspect ratio by 2? + else + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); + else + { + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + + if (isLH) + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); + else + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); + } + + return projection; }()); + + ImGuizmo::SetOrthographic(!isPerspective); + ImGuizmo::BeginFrame(); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Editor"); + + ImGui::Text("Benchmarking Solid Angle Visualizer"); + + if (ImGui::Button("Run Benchmark")) + { + SolidAngleVisualizer::SamplingBenchmark benchmark(*m_visualizer); + benchmark.run(); + } + ImGui::Separator(); + + ImGui::Text("Sampling Mode:"); + ImGui::SameLine(); + + const char *samplingModes[] = + { + "Triangle Solid Angle", + "Triangle Projected Solid Angle", + "Parallelogram Projected Solid Angle", + "Rectangle Pyramid Solid Angle", + "Biquadratic pyramid solid angle", + "Bilinear pyramid solid angle"}; + + int currentMode = static_cast(m_samplingMode); + + if (ImGui::Combo("##SamplingMode", ¤tMode, samplingModes, IM_ARRAYSIZE(samplingModes))) + { + m_samplingMode = static_cast(currentMode); + } + + ImGui::Checkbox("Debug Visualization", &m_debugVisualization); + ImGui::Text("Pipeline idx: SA=%d, Ray=%d", + static_cast(m_samplingMode) * DebugPermutations + (m_debugVisualization ? 1 : 0), + m_debugVisualization ? 1 : 0); + ImGui::Checkbox("Frame seeding", &m_frameSeeding); + + ImGui::SliderInt("Sample Count", &m_SampleCount, 0, 512); + + ImGui::Separator(); + + ImGui::Text("Camera"); + + if (ImGui::RadioButton("LH", isLH)) + isLH = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("RH", !isLH)) + isLH = false; + + if (ImGui::RadioButton("Perspective", isPerspective)) + isPerspective = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("Orthographic", !isPerspective)) + isPerspective = false; + + ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); + // ImGui::Checkbox("Enable camera movement", &move); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + + // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case + + if (isPerspective) + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + else + ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); + + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + + if (firstFrame) + { + camera.setPosition(cameraIntialPosition); + camera.setTarget(cameraInitialTarget); + camera.setUpVector(cameraInitialUp); + + camera.recomputeViewMatrix(); + } + firstFrame = false; + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + /* + * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout + * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection + + - VIEW: + + ImGuizmo + + | X[0] Y[0] Z[0] 0.0f | + | X[1] Y[1] Z[1] 0.0f | + | X[2] Y[2] Z[2] 0.0f | + | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | + + Nabla + + | X[0] X[1] X[2] -Dot(X, eye) | + | Y[0] Y[1] Y[2] -Dot(Y, eye) | + | Z[0] Z[1] Z[2] -Dot(Z, eye) | + + = transpose(nbl::core::matrix4SIMD()) + + - PERSPECTIVE [PROJECTION CASE]: + + ImGuizmo + + | (temp / temp2) (0.0) (0.0) (0.0) | + | (0.0) (temp / temp3) (0.0) (0.0) | + | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | + | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | + + Nabla + + | w (0.0) (0.0) (0.0) | + | (0.0) -h (0.0) (0.0) | + | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | + | (0.0) (0.0) (-1.0) (0.0) | + + = transpose() + + * + * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, + * note it also modifies input view matrix but projection matrix is immutable + */ + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_TRS = TRS{}; + } + + { + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + // TODO: camera will return hlsl::float32_tMxN + auto view = camera.getViewMatrix(); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4>(view)); + + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); + + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + transformParams.editTransformDecomposition = true; + mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + move = mainViewTransformReturnInfo.allowCameraMovement; + + ImGuizmo::DecomposeMatrixToComponents(&imguizmoM16InOut.model[0][0], &m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); + } + // object meta display + //{ + // ImGui::Begin("Object"); + // ImGui::Text("type: \"%s\"", objectName.data()); + // ImGui::End(); + //} + + // solid angle view window + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); + static bool isOpen = true; + ImGui::Begin("Projected Solid Angle View", &isOpen, 0); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); + solidAngleViewTransformReturnInfo.allowCameraMovement = false; // not used in this view + ImGui::Image({renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW]}, contentRegionSize); + ImGui::End(); + } + + // Show data coming from GPU + if (m_debugVisualization) + { + if (ImGui::Begin("Result Data")) + { + auto drawColorField = [&](const char *fieldName, uint32_t index) + { + ImGui::Text("%s: %u", fieldName, index); + + if (index >= 27) + { + ImGui::SameLine(); + ImGui::Text(""); + return; + } + + const auto &c = colorLUT[index]; // uses the combined LUT we made earlier + + ImGui::SameLine(); + + // Color preview button + ImGui::ColorButton( + fieldName, + ImVec4(c.r, c.g, c.b, 1.0f), + 0, + ImVec2(20, 20)); + + ImGui::SameLine(); + ImGui::Text("%s", colorNames[index]); + }; + + // Vertices + if (ImGui::CollapsingHeader("Vertices", ImGuiTreeNodeFlags_DefaultOpen)) + { + for (uint32_t i = 0; i < 6; ++i) + { + if (i < m_GPUOutResulData.silhouetteVertexCount) + { + ImGui::Text("corners[%u]", i); + ImGui::SameLine(); + drawColorField(":", m_GPUOutResulData.vertices[i]); + ImGui::SameLine(); + static const float32_t3 constCorners[8] = { + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1)}; + float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; + ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); + } + else + { + ImGui::Text("corners[%u] :: ", i); + ImGui::SameLine(); + ImGui::ColorButton( + "", + ImVec4(0.0f, 0.0f, 0.0f, 0.0f), + 0, + ImVec2(20, 20)); + ImGui::SameLine(); + ImGui::Text(""); + } + } + } + + if (ImGui::CollapsingHeader("Color LUT Map")) + { + for (int i = 0; i < 27; i++) + drawColorField(" ", i); + } + + ImGui::Separator(); + ImGui::Text("Valid Samples: %u / %u", m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u), m_GPUOutResulData.sampleCount); + ImGui::ProgressBar(static_cast(m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u)) / static_cast(m_GPUOutResulData.sampleCount)); + ImGui::Separator(); + + // Silhouette + if (ImGui::CollapsingHeader("Silhouette")) + { + drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + ImGui::Text("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + ImGui::Text("Silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); + ImGui::Text("Positive Vertex Count: %u", m_GPUOutResulData.positiveVertCount); + ImGui::Text("Edge Visibility Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Text("Max Triangles Exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); + for (uint32_t i = 0; i < 6; i++) + ImGui::Text("Vertex[%u]: %u", i, m_GPUOutResulData.vertices[i]); + ImGui::Text("Clipped Silhouette Vertex Count: %u", m_GPUOutResulData.clippedSilhouetteVertexCount); + for (uint32_t i = 0; i < 7; i++) + ImGui::Text("Clipped Vertex[%u]: (%.3f, %.3f, %.3f) Index: %u", i, + m_GPUOutResulData.clippedSilhouetteVertices[i].x, + m_GPUOutResulData.clippedSilhouetteVertices[i].y, + m_GPUOutResulData.clippedSilhouetteVertices[i].z, + m_GPUOutResulData.clippedSilhouetteVerticesIndices[i]); + + // Silhouette mask printed in binary + auto printBin = [](uint32_t bin, const char *name) + { + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + ImGui::Text("%s: 0x%08X", name, bin); + ImGui::Text("binary: 0b%s", buf); + ImGui::Separator(); + }; + printBin(m_GPUOutResulData.silhouette, "Silhouette"); + printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); + + printBin(m_GPUOutResulData.clipCount, "clipCount"); + printBin(m_GPUOutResulData.clipMask, "clipMask"); + printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); + printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); + printBin(m_GPUOutResulData.wrapAround, "wrapAround"); + } + + // Parallelogram + if (m_samplingMode == PROJECTED_PARALLELOGRAM_SOLID_ANGLE && ImGui::CollapsingHeader("Projected Parallelogram", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Does Not Bound: %s", m_GPUOutResulData.parallelogramDoesNotBound ? "true" : "false"); + ImGui::Text("Area: %.3f", m_GPUOutResulData.parallelogramArea); + ImGui::Text("Failed Vertex Index: %u", m_GPUOutResulData.failedVertexIndex); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Edge Is Convex[%u]: %s", i, m_GPUOutResulData.edgeIsConvex[i] ? "true" : "false"); + ImGui::Text("Vertices Inside: %s", m_GPUOutResulData.parallelogramVerticesInside ? "true" : "false"); + ImGui::Text("Edges Inside: %s", m_GPUOutResulData.parallelogramEdgesInside ? "true" : "false"); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Corner[%u]: (%.3f, %.3f)", i, m_GPUOutResulData.parallelogramCorners[i].x, m_GPUOutResulData.parallelogramCorners[i].y); + } + else if ((m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC ||m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) && ImGui::CollapsingHeader("Spherical Pyramid", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spans Hemisphere: %s", m_GPUOutResulData.pyramidSpansHemisphere ? "YES (warning)" : "no"); + ImGui::Text("Best Caliper Edge: %u", m_GPUOutResulData.pyramidBestEdge); + ImGui::Separator(); + + ImGui::Text("Axis 1: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis1.x, m_GPUOutResulData.pyramidAxis1.y, m_GPUOutResulData.pyramidAxis1.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth1, m_GPUOutResulData.pyramidOffset1); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin1, m_GPUOutResulData.pyramidMax1); + + ImGui::Text("Axis 2: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis2.x, m_GPUOutResulData.pyramidAxis2.y, m_GPUOutResulData.pyramidAxis2.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth2, m_GPUOutResulData.pyramidOffset2); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin2, m_GPUOutResulData.pyramidMax2); + + ImGui::Separator(); + ImGui::Text("Center: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidCenter.x, m_GPUOutResulData.pyramidCenter.y, m_GPUOutResulData.pyramidCenter.z); + ImGui::Text("Solid Angle (bound): %.6f sr", m_GPUOutResulData.pyramidSolidAngle); + } + else if (m_samplingMode == TRIANGLE_SOLID_ANGLE || m_samplingMode == TRIANGLE_PROJECTED_SOLID_ANGLE && ImGui::CollapsingHeader("Spherical Triangle", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spherical Lune Detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); + ImGui::Text("Triangle Count: %u", m_GPUOutResulData.triangleCount); + // print solidAngles for each triangle + { + ImGui::Text("Solid Angles per Triangle:"); + ImGui::BeginTable("SolidAnglesTable", 2); + ImGui::TableSetupColumn("Triangle Index"); + ImGui::TableSetupColumn("Solid Angle"); + ImGui::TableHeadersRow(); + for (uint32_t i = 0; i < m_GPUOutResulData.triangleCount; ++i) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::Text("%u", i); + ImGui::TableSetColumnIndex(1); + ImGui::Text("%.6f", m_GPUOutResulData.solidAngles[i]); + } + ImGui::Text("Total: %.6f", m_GPUOutResulData.totalSolidAngles); + ImGui::EndTable(); + } + } + + { + float32_t3 xAxis = m_OBBModelMatrix[0].xyz; + float32_t3 yAxis = m_OBBModelMatrix[1].xyz; + float32_t3 zAxis = m_OBBModelMatrix[2].xyz; + + float32_t3 nx = normalize(xAxis); + float32_t3 ny = normalize(yAxis); + float32_t3 nz = normalize(zAxis); + + const float epsilon = 1e-4; + bool hasSkew = false; + if (abs(dot(nx, ny)) > epsilon || abs(dot(nx, nz)) > epsilon || abs(dot(ny, nz)) > epsilon) + hasSkew = true; + ImGui::Separator(); + ImGui::Text("Matrix Has Skew: %s", hasSkew ? "true" : "false"); + } + + static bool modalShown = false; + static bool modalDismissed = false; + static uint32_t lastSilhouetteIndex = ~0u; + + // Reset modal flags if silhouette configuration changed + if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) + { + modalShown = false; + modalDismissed = false; // Allow modal to show again for new configuration + lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; + } + + // Reset flags when mismatch is cleared + if (!m_GPUOutResulData.edgeVisibilityMismatch && !m_GPUOutResulData.maxTrianglesExceeded && !m_GPUOutResulData.sphericalLuneDetected) + { + modalShown = false; + modalDismissed = false; + } + + // Open modal only if not already shown/dismissed + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExceeded || m_GPUOutResulData.sphericalLuneDetected) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown && !modalDismissed) // Don't reopen if user dismissed it + { + ImGui::OpenPopup("Edge Visibility Mismatch Warning"); + modalShown = true; + } + + // Modal popup + if (ImGui::BeginPopupModal("Edge Visibility Mismatch Warning", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "Warning: Edge Visibility Mismatch Detected!"); + ImGui::Separator(); + ImGui::Text("The silhouette lookup table (LUT) does not match the computed edge visibility."); + ImGui::Text("This indicates the pre-computed silhouette data may be incorrect."); + ImGui::Spacing(); + ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); + ImGui::TextWrapped("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + ImGui::Spacing(); + ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); + ImGui::Text("Vertices involved in mismatched edges:"); + ImGui::Indent(); + for (int i = 0; i < 8; i++) + { + if (m_GPUOutResulData.edgeVisibilityMismatch & (1u << i)) + { + ImGui::BulletText("Vertex %d", i); + } + } + ImGui::Unindent(); + ImGui::Spacing(); + if (ImGui::Button("OK", ImVec2(120, 0))) + { + ImGui::CloseCurrentPopup(); + modalShown = false; + modalDismissed = true; // Mark as dismissed to prevent reopening + } + ImGui::EndPopup(); + } + } + ImGui::End(); + } + + // view matrices editor + { + ImGui::Begin("Matrices"); + + auto addMatrixTable = [&](const char *topText, const char *tableName, const int rows, const int columns, const float *pointer, const bool withSeparator = true) + { + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) + { + for (int y = 0; y < rows; ++y) + { + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) + { + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); + } + } + ImGui::EndTable(); + } + + if (withSeparator) + ImGui::Separator(); + }; + + static RandomSampler rng(0x45); // Initialize RNG with seed + + // Helper function to check if cube intersects unit sphere at origin + auto isCubeOutsideUnitSphere = [](const float32_t3 &translation, const float32_t3 &scale) -> bool + { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; + + static TRS lastTRS = {}; + if (ImGui::Button("Randomize Translation")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Randomize Rotation")) + { + lastTRS = m_TRS; // Backup before randomizing + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + } + ImGui::SameLine(); + if (ImGui::Button("Randomize Scale")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + // ImGui::SameLine(); + if (ImGui::Button("Randomize All")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Revert to Last")) + { + m_TRS = lastTRS; // Restore backed-up TRS + } + + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, &camera.getViewMatrix()[0].x); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, &camera.getProjectionMatrix()[0].x, false); + + ImGui::End(); + } + + // Nabla Imgui backend MDI buffer info + // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, + // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. + { + auto *streaminingBuffer = imGUI->getStreamingBuffer(); + + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + + float freePercentage = 100.0f * (float)(freeSize) / (float)total; + float allocatedPercentage = (float)(consumedMemory) / (float)total; + + ImVec2 barSize = ImVec2(400, 30); + float windowPadding = 10.0f; + float verticalPadding = ImGui::GetStyle().FramePadding.y; + + ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); + ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); + + ImGui::Text("Total Allocated Size: %zu bytes", total); + ImGui::Text("In use: %zu bytes", consumedMemory); + ImGui::Text("Buffer Usage:"); + + ImGui::SetCursorPosX(windowPadding); + + if (freePercentage > 70.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + else if (freePercentage > 30.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + else + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + + ImGui::ProgressBar(allocatedPercentage, barSize, ""); + + ImGui::PopStyleColor(); + + ImDrawList *drawList = ImGui::GetWindowDrawList(); + + ImVec2 progressBarPos = ImGui::GetItemRectMin(); + ImVec2 progressBarSize = ImGui::GetItemRectSize(); + + const char *text = "%.2f%% free"; + char textBuffer[64]; + snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); + + ImVec2 textSize = ImGui::CalcTextSize(textBuffer); + ImVec2 textPos = ImVec2( + progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f); + + ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + drawList->AddRectFilled( + ImVec2(textPos.x - 5, textPos.y - 2), + ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), + ImGui::GetColorU32(bgColor)); + + ImGui::SetCursorScreenPos(textPos); + ImGui::Text("%s", textBuffer); + + ImGui::Dummy(ImVec2(0.0f, verticalPadding)); + + ImGui::End(); + } + ImGui::End(); + + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &m_OBBModelMatrix[0][0]); + } + + smart_refctd_ptr imGUI; + + // descriptor set + smart_refctd_ptr subAllocDS; + enum E_RENDER_VIEWS : uint8_t + { + ERV_MAIN_VIEW, + ERV_SOLID_ANGLE_VIEW, + Count + }; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = {SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value}; + // + Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, {}, 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); + // mutables + struct TRS // Source of truth + { + float32_t3 translation{0.0f, 0.0f, 1.5f}; + float32_t3 rotation{0.0f}; // MUST stay orthonormal + float32_t3 scale{1.0f}; + } m_TRS; + float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS + + // std::string_view objectName; + TransformRequestParams transformParams; + TransformReturnInfo mainViewTransformReturnInfo; + TransformReturnInfo solidAngleViewTransformReturnInfo; + + const static inline core::vectorSIMDf cameraIntialPosition{-3.0f, 6.0f, 3.0f}; + const static inline core::vectorSIMDf cameraInitialTarget{0.f, 0.0f, 3.f}; + const static inline core::vectorSIMDf cameraInitialUp{0.f, 0.f, 1.f}; + + float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + // uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; + bool firstFrame = true; + + SolidAngleVisualizer *m_visualizer; + } interface; + + class SamplingBenchmark final + { + public: + SamplingBenchmark(SolidAngleVisualizer &base) + : m_api(base.m_api), m_device(base.m_device), m_logger(base.m_logger), m_visualizer(&base) + { + + // setting up pipeline in the constructor + m_queueFamily = base.getComputeQueue()->getFamilyIndex(); + m_cmdpool = base.m_device->createCommandPool(m_queueFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + // core::smart_refctd_ptr* cmdBuffs[] = { &m_cmdbuf, &m_timestampBeforeCmdBuff, &m_timestampAfterCmdBuff }; + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampBeforeCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampAfterCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipelines (one per sampling mode) + { + auto loadShader = [&](auto key) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = base.m_logger.get(); + lp.workingDirectory = "app_resources"; + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + base.logFail("Could not load shader!"); + assert(0); + } + assert(assets.size() == 1); + auto shader = IAsset::castDown(assets[0]); + if (!shader) + base.logFail("Failed to load precompiled benchmark shader!\n"); + return shader; + }; + + smart_refctd_ptr shaders[SAMPLING_MODE::Count] = { + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_sa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_psa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_para">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_rectangle">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_biquad">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_bilinear">(m_device.get())), + }; + + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1}}; + smart_refctd_ptr dsLayout = base.m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + base.logFail("Failed to create a Descriptor Layout!\n"); + + SPushConstantRange pushConstantRanges[] = { + {.stageFlags = ShaderStage::ESS_COMPUTE, + .offset = 0, + .size = sizeof(BenchmarkPushConstants)}}; + m_pplnLayout = base.m_device->createPipelineLayout(pushConstantRanges, smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + base.logFail("Failed to create a Pipeline Layout!\n"); + + for (uint32_t i = 0; i < SAMPLING_MODE::Count; i++) + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shaders[i].get(); + if (!base.m_device->createComputePipelines(nullptr, {¶ms, 1}, &m_pipelines[i])) + base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate the memory + { + constexpr size_t BufferSize = BENCHMARK_WORKGROUP_COUNT * BENCHMARK_WORKGROUP_DIMENSION_SIZE_X * + BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y * BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z * sizeof(uint32_t); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + smart_refctd_ptr dummyBuff = base.m_device->createBuffer(std::move(params)); + if (!dummyBuff) + base.logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + dummyBuff->setObjectDebugName("benchmark buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = dummyBuff->getMemoryReqs(); + + m_allocation = base.m_device->allocate(reqs, dummyBuff.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + base.logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(dummyBuff->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(dummyBuff); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; + base.m_device->updateDescriptorSets(writes, {}); + } + } + } + + IQueryPool::SCreationParams queryPoolCreationParams{}; + queryPoolCreationParams.queryType = IQueryPool::TYPE::TIMESTAMP; + queryPoolCreationParams.queryCount = 2; + queryPoolCreationParams.pipelineStatisticsFlags = IQueryPool::PIPELINE_STATISTICS_FLAGS::NONE; + m_queryPool = m_device->createQueryPool(queryPoolCreationParams); + + m_computeQueue = m_device->getQueue(m_queueFamily, 0); + } + + void run() + { + m_logger->log("\n\nsampling benchmark result:", ILogger::ELL_PERFORMANCE); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR); + + m_logger->log("sampling benchmark, PROJECTED_PARALLELOGRAM_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE); + + m_logger->log("sampling benchmark, TRIANGLE_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::TRIANGLE_SOLID_ANGLE); + + // m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); + // performBenchmark(SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE); + } + + private: + void performBenchmark(SAMPLING_MODE mode) + { + m_device->waitIdle(); + + recordTimestampQueryCmdBuffers(); + + uint64_t semaphoreCounter = 0; + smart_refctd_ptr semaphore = m_device->createSemaphore(semaphoreCounter); + + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + IQueue::SSubmitInfo::SSemaphoreInfo waits[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + + IQueue::SSubmitInfo beforeTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = {{.cmdbuf = m_timestampBeforeCmdBuff.get()}}; + beforeTimestapSubmitInfo[0].commandBuffers = cmdbufsBegin; + beforeTimestapSubmitInfo[0].signalSemaphores = signals; + beforeTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo afterTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsEnd[] = {{.cmdbuf = m_timestampAfterCmdBuff.get()}}; + afterTimestapSubmitInfo[0].commandBuffers = cmdbufsEnd; + afterTimestapSubmitInfo[0].signalSemaphores = signals; + afterTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo benchmarkSubmitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = {{.cmdbuf = m_cmdbuf.get()}}; + benchmarkSubmitInfos[0].commandBuffers = cmdbufs; + benchmarkSubmitInfos[0].signalSemaphores = signals; + benchmarkSubmitInfos[0].waitSemaphores = waits; + + m_pushConstants.modelMatrix = float32_t3x4(transpose(m_visualizer->interface.m_OBBModelMatrix)); + m_pushConstants.sampleCount = m_SampleCount; + recordCmdBuff(mode); + + // warmup runs + for (int i = 0; i < WarmupIterations; ++i) + { + if (i == 0) + m_api->startCapture(); + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + if (i == 0) + m_api->endCapture(); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(beforeTimestapSubmitInfo); + + // actual benchmark runs + for (int i = 0; i < Iterations; ++i) + { + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(afterTimestapSubmitInfo); + + m_device->waitIdle(); + + const uint64_t nativeBenchmarkTimeElapsedNanoseconds = calcTimeElapsed(); + const float nativeBenchmarkTimeElapsedSeconds = double(nativeBenchmarkTimeElapsedNanoseconds) / 1000000000.0; + + m_logger->log("%llu ns, %f s", ILogger::ELL_PERFORMANCE, nativeBenchmarkTimeElapsedNanoseconds, nativeBenchmarkTimeElapsedSeconds); + } + + void recordCmdBuff(SAMPLING_MODE mode) + { + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::SIMULTANEOUS_USE_BIT); + m_cmdbuf->beginDebugMarker("sampling compute dispatch", vectorSIMDf(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipelines[mode].get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->pushConstants(m_pplnLayout.get(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(BenchmarkPushConstants), &m_pushConstants); + m_cmdbuf->dispatch(BENCHMARK_WORKGROUP_COUNT, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + } + + void recordTimestampQueryCmdBuffers() + { + static bool firstInvocation = true; + + if (!firstInvocation) + { + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + } + + m_timestampBeforeCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampBeforeCmdBuff->resetQueryPool(m_queryPool.get(), 0, 2); + m_timestampBeforeCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 0); + m_timestampBeforeCmdBuff->end(); + + m_timestampAfterCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampAfterCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 1); + m_timestampAfterCmdBuff->end(); + + firstInvocation = false; + } + + uint64_t calcTimeElapsed() + { + uint64_t timestamps[2]; + const core::bitflag flags = core::bitflag(IQueryPool::RESULTS_FLAGS::_64_BIT) | core::bitflag(IQueryPool::RESULTS_FLAGS::WAIT_BIT); + m_device->getQueryPoolResults(m_queryPool.get(), 0, 2, ×tamps, sizeof(uint64_t), flags); + return timestamps[1] - timestamps[0]; + } + + private: + core::smart_refctd_ptr m_api; + smart_refctd_ptr m_device; + smart_refctd_ptr m_logger; + SolidAngleVisualizer *m_visualizer; + + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_cmdpool = nullptr; + smart_refctd_ptr m_cmdbuf = nullptr; + smart_refctd_ptr m_ds = nullptr; + smart_refctd_ptr m_pplnLayout = nullptr; + BenchmarkPushConstants m_pushConstants; + smart_refctd_ptr m_pipelines[SAMPLING_MODE::Count]; + + smart_refctd_ptr m_timestampBeforeCmdBuff = nullptr; + smart_refctd_ptr m_timestampAfterCmdBuff = nullptr; + smart_refctd_ptr m_queryPool = nullptr; + + uint32_t m_queueFamily; + IQueue *m_computeQueue; + static constexpr int WarmupIterations = 50; + static constexpr int Iterations = 1; + }; + + template + inline bool logFail(const char *msg, Args &&...args) + { + m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); + return false; + } + + std::ofstream m_logFile; +}; + +NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file diff --git a/12_MeshLoaders/pipeline.groovy b/73_SolidAngleVisualizer/pipeline.groovy similarity index 100% rename from 12_MeshLoaders/pipeline.groovy rename to 73_SolidAngleVisualizer/pipeline.groovy diff --git a/73_SolidAngleVisualizer/src/transform.cpp b/73_SolidAngleVisualizer/src/transform.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/74_QuantizedSequenceTests/CMakeLists.txt b/74_QuantizedSequenceTests/CMakeLists.txt new file mode 100644 index 000000000..a8dfb6781 --- /dev/null +++ b/74_QuantizedSequenceTests/CMakeLists.txt @@ -0,0 +1,50 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(MSVC) + target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") +else() + target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/quantizedSequenceTest.comp.hlsl", + "KEY": "quantizedSequenceTest", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/74_QuantizedSequenceTests/CQuantizedSequenceTester.h b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h new file mode 100644 index 000000000..baeb928b0 --- /dev/null +++ b/74_QuantizedSequenceTests/CQuantizedSequenceTester.h @@ -0,0 +1,304 @@ +#ifndef _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_TESTER_INCLUDED_ + +#define GLM_FORCE_RADIANS +#include +#include +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + +#include "nbl/examples/examples.hpp" +#include "app_resources/common.hlsl" +#include "nbl/examples/Tester/ITester.h" +#include +#include + +using namespace nbl; + +class CQuantizedSequenceTester final : public ITester +{ + using base_t = ITester; + +public: + CQuantizedSequenceTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; + +private: + QuantizedSequenceInputTestValues generateInputTestValues() override + { + std::uniform_real_distribution realDistribution(0.0f, 1.0f); + std::uniform_int_distribution uint32Distribution(0, std::numeric_limits::max()); + std::uniform_int_distribution uint16Distribution(0, std::numeric_limits::max()); + + QuantizedSequenceInputTestValues testInput; + testInput.scalar = uint16Distribution(getRandomEngine()); + testInput.u16vec2 = uint32_t2(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + testInput.u16vec3 = uint32_t3(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + testInput.u16vec4 = uint32_t4(uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine()), uint16Distribution(getRandomEngine())); + + testInput.scalar16 = uint32Distribution(getRandomEngine()); + testInput.uvec2 = uint32_t2(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.uvec3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.uvec4 = uint32_t4(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + + testInput.unorm1 = float32_t1(realDistribution(getRandomEngine())); + testInput.unorm2 = float32_t2(realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.unorm3 = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.unorm4 = float32_t4(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + testInput.scrambleKey1 = uint32_t1(uint32Distribution(getRandomEngine())); + testInput.scrambleKey2 = uint32_t2(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.scrambleKey3 = uint32_t3(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + testInput.scrambleKey4 = uint32_t4(uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine()), uint32Distribution(getRandomEngine())); + + return testInput; + } + + QuantizedSequenceTestValues determineExpectedResults(const QuantizedSequenceInputTestValues& testInput) override + { + QuantizedSequenceTestValues expected; + // test create/set/get + expected.uintDim1 = testInput.scalar; + { + for (uint32_t i = 0; i < 2; i++) + expected.uintDim2[i] = testInput.uvec2[i] >> 16u; + } + { + for (uint32_t i = 0; i < 3; i++) + expected.uintDim3[i] = testInput.uvec3[i] >> 22u; + } + { + for (uint32_t i = 0; i < 4; i++) + expected.uintDim4[i] = testInput.uvec4[i] >> 24u; + } + + expected.uintVec2_Dim2 = testInput.uvec2; + { + for (uint32_t i = 0; i < 3; i++) + expected.uintVec2_Dim3[i] = testInput.uvec3[i] >> 11u; + } + + expected.uintVec3_Dim3 = testInput.uvec3; + expected.uintVec4_Dim4 = testInput.uvec4; + + expected.u16Dim1 = testInput.scalar16; + { + for (uint32_t i = 0; i < 2; i++) + expected.u16Dim2[i] = testInput.u16vec2[i] >> 8u; + } + { + for (uint32_t i = 0; i < 3; i++) + expected.u16Dim3[i] = testInput.u16vec3[i] >> 11u; + } + { + for (uint32_t i = 0; i < 4; i++) + expected.u16Dim4[i] = testInput.u16vec4[i] >> 12u; + } + + expected.u16Vec2_Dim2 = testInput.u16vec2; + { + for (uint32_t i = 0; i < 4; i++) + expected.u16Vec2_Dim4[i] = testInput.u16vec4[i] >> 8u; + } + + expected.u16Vec3_Dim3 = testInput.u16vec3; + expected.u16Vec4_Dim4 = testInput.u16vec4; + + // test encode/decode uint32, dim 1..4 + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t1 stored; + stored[0] = uint32_t(testInput.unorm1[0] * fullWidthMultiplier); + expected.unorm1_pre_u32 = float32_t1(stored ^ testInput.scrambleKey1) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t1 stored; + stored[0] = uint32_t(testInput.unorm1[0] * multiplier); + expected.unorm1_post_u32 = float32_t1(stored ^ testInput.scrambleKey1) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 16u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * fullWidthMultiplier) >> discardBits; + expected.unorm2_pre_u32 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 16u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t2 stored, scrambleKey; + for (uint32_t i = 0; i < 2; i++) + { + stored[i] = uint32_t(testInput.unorm2[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey2[i] >> discardBits; + } + expected.unorm2_post_u32 = float32_t2(stored ^ scrambleKey) * bit_cast(0x37800080u); + } + { + const uint32_t bitsPerComponent = 10u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier) >> discardBits; + expected.unorm3_pre_u32 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 10u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t3 stored, scrambleKey; + for (uint32_t i = 0; i < 3; i++) + { + stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey3[i] >> discardBits; + } + expected.unorm3_post_u32 = float32_t3(stored ^ scrambleKey) * bit_cast(0x3a802008u); + } + { + const uint32_t bitsPerComponent = 8u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * fullWidthMultiplier) >> discardBits; + expected.unorm4_pre_u32 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 8u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t4 stored, scrambleKey; + for (uint32_t i = 0; i < 4; i++) + { + stored[i] = uint32_t(testInput.unorm4[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey4[i] >> discardBits; + } + expected.unorm4_post_u32 = float32_t4(stored ^ scrambleKey) * bit_cast(0x3b808081u); + } + + // test encode/decode uint32_tN storage, dim == N + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * fullWidthMultiplier); + expected.unorm2_pre_u32t2 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t2 stored; + for (uint32_t i = 0; i < 2; i++) + stored[i] = uint32_t(testInput.unorm2[i] * multiplier); + expected.unorm2_post_u32t2 = float32_t2(stored ^ testInput.scrambleKey2) * bit_cast(0x2f800004u); + } + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier); + expected.unorm3_pre_u32t3 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * multiplier); + expected.unorm3_post_u32t3 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * fullWidthMultiplier); + expected.unorm4_pre_u32t4 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + { + const uint32_t multiplier = (1u << 31u) - 1u; + uint32_t4 stored; + for (uint32_t i = 0; i < 4; i++) + stored[i] = uint32_t(testInput.unorm4[i] * multiplier); + expected.unorm4_post_u32t4 = float32_t4(stored ^ testInput.scrambleKey4) * bit_cast(0x2f800004u); + } + + // test encode/decode uint32_t2 storage, dim 3 + { + const uint32_t bitsPerComponent = 21u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t fullWidthMultiplier = (1u << 31u) - 1u; + uint32_t3 stored; + for (uint32_t i = 0; i < 3; i++) + stored[i] = uint32_t(testInput.unorm3[i] * fullWidthMultiplier) >> discardBits; + expected.unorm3_pre_u32t2 = float32_t3(stored ^ testInput.scrambleKey3) * bit_cast(0x2f800004u); + } + { + const uint32_t bitsPerComponent = 21u; + const uint32_t discardBits = 32u - bitsPerComponent; + const uint32_t multiplier = (1u << bitsPerComponent) - 1u; + uint32_t3 stored, scrambleKey; + for (uint32_t i = 0; i < 3; i++) + { + stored[i] = uint32_t(testInput.unorm3[i] * multiplier) >> discardBits; + scrambleKey[i] = testInput.scrambleKey3[i] >> discardBits; + } + expected.unorm3_post_u32t2 = float32_t3(stored ^ scrambleKey) * bit_cast(0x35000004u); + } + + return expected; + } + + bool verifyTestResults(const QuantizedSequenceTestValues& expectedTestValues, const QuantizedSequenceTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("get uint from u32", expectedTestValues.uintDim1, testValues.uintDim1, testIteration, seed, testType); + pass &= verifyTestValue("get uint2 from u32", expectedTestValues.uintDim2, testValues.uintDim2, testIteration, seed, testType); + pass &= verifyTestValue("get uint3 from u32", expectedTestValues.uintDim3, testValues.uintDim3, testIteration, seed, testType); + pass &= verifyTestValue("get uint4 from u32", expectedTestValues.uintDim4, testValues.uintDim4, testIteration, seed, testType); + + pass &= verifyTestValue("get uint2 from u32 vec2", expectedTestValues.uintVec2_Dim2, testValues.uintVec2_Dim2, testIteration, seed, testType); + pass &= verifyTestValue("get uint3 from u32 vec2", expectedTestValues.uintVec2_Dim3, testValues.uintVec2_Dim3, testIteration, seed, testType); + + pass &= verifyTestValue("get uint3 from u32 vec3", expectedTestValues.uintVec3_Dim3, testValues.uintVec3_Dim3, testIteration, seed, testType); + pass &= verifyTestValue("get uint4 from u32 vec4", expectedTestValues.uintVec4_Dim4, testValues.uintVec4_Dim4, testIteration, seed, testType); + + pass &= verifyTestValue("get uint from u16", expectedTestValues.u16Dim1, testValues.u16Dim1, testIteration, seed, testType); + pass &= verifyTestValue("get uint2 from u16", expectedTestValues.u16Dim2, testValues.u16Dim2, testIteration, seed, testType); + pass &= verifyTestValue("get uint3 from u16", expectedTestValues.u16Dim3, testValues.u16Dim3, testIteration, seed, testType); + pass &= verifyTestValue("get uint4 from u16", expectedTestValues.u16Dim3, testValues.u16Dim3, testIteration, seed, testType); + + pass &= verifyTestValue("get uint2 from u16 vec2", expectedTestValues.u16Vec2_Dim2, testValues.u16Vec2_Dim2, testIteration, seed, testType); + pass &= verifyTestValue("get uint4 from u16 vec2", expectedTestValues.u16Vec2_Dim4, testValues.u16Vec2_Dim4, testIteration, seed, testType); + + pass &= verifyTestValue("get uint3 from u16 vec3", expectedTestValues.u16Vec3_Dim3, testValues.u16Vec3_Dim3, testIteration, seed, testType); + pass &= verifyTestValue("get uint4 from u16 vec4", expectedTestValues.u16Vec4_Dim4, testValues.u16Vec4_Dim4, testIteration, seed, testType); + + pass &= verifyTestValue("encode/decode unorm from u32 (fullwidth)", expectedTestValues.unorm1_pre_u32, testValues.unorm1_pre_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm from u32", expectedTestValues.unorm1_post_u32, testValues.unorm1_post_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm2 from u32 (fullwidth)", expectedTestValues.unorm2_pre_u32, testValues.unorm2_pre_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm2 from u32", expectedTestValues.unorm2_post_u32, testValues.unorm2_post_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm3 from u32 (fullwidth)", expectedTestValues.unorm3_pre_u32, testValues.unorm3_pre_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm3 from u32", expectedTestValues.unorm3_post_u32, testValues.unorm3_post_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm4 from u32 (fullwidth)", expectedTestValues.unorm4_pre_u32, testValues.unorm4_pre_u32, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm4 from u32", expectedTestValues.unorm4_post_u32, testValues.unorm4_post_u32, testIteration, seed, testType); + + pass &= verifyTestValue("encode/decode unorm2 from u32 vec2 (fullwidth)", expectedTestValues.unorm2_pre_u32t2, testValues.unorm2_pre_u32t2, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm2 from u32 vec2", expectedTestValues.unorm2_post_u32t2, testValues.unorm2_post_u32t2, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm3 from u32 vec3 (fullwidth)", expectedTestValues.unorm3_pre_u32t3, testValues.unorm3_pre_u32t3, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm3 from u32 vec3", expectedTestValues.unorm3_post_u32t3, testValues.unorm3_post_u32t3, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm4 from u32 vec4 (fullwidth)", expectedTestValues.unorm4_pre_u32t4, testValues.unorm4_pre_u32t4, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm4 from u32 vec4", expectedTestValues.unorm4_post_u32t4, testValues.unorm4_post_u32t4, testIteration, seed, testType); + + pass &= verifyTestValue("encode/decode unorm3 from u32 vec2 (fullwidth)", expectedTestValues.unorm3_pre_u32t2, testValues.unorm3_pre_u32t2, testIteration, seed, testType); + pass &= verifyTestValue("encode/decode unorm3 from u32 vec2", expectedTestValues.unorm3_post_u32t2, testValues.unorm3_post_u32t2, testIteration, seed, testType); + + return pass; + } + +}; + +#endif diff --git a/74_QuantizedSequenceTests/app_resources/common.hlsl b/74_QuantizedSequenceTests/app_resources/common.hlsl new file mode 100644 index 000000000..d19ed0c60 --- /dev/null +++ b/74_QuantizedSequenceTests/app_resources/common.hlsl @@ -0,0 +1,253 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_74_QUANTIZED_SEQUENCE_COMMON_INCLUDED_ + +#include + +using namespace nbl::hlsl; +struct QuantizedSequenceInputTestValues +{ + uint32_t scalar; + uint32_t2 uvec2; + uint32_t3 uvec3; + uint32_t4 uvec4; + + uint16_t scalar16; + uint16_t2 u16vec2; + uint16_t3 u16vec3; + uint16_t4 u16vec4; + + float32_t1 unorm1; + float32_t2 unorm2; + float32_t3 unorm3; + float32_t4 unorm4; + + uint32_t1 scrambleKey1; + uint32_t2 scrambleKey2; + uint32_t3 scrambleKey3; + uint32_t4 scrambleKey4; +}; + +struct QuantizedSequenceTestValues +{ + uint32_t uintDim1; + uint32_t2 uintDim2; + uint32_t3 uintDim3; + uint32_t4 uintDim4; + + uint32_t2 uintVec2_Dim2; + uint32_t3 uintVec2_Dim3; + + uint32_t3 uintVec3_Dim3; + uint32_t4 uintVec4_Dim4; + + uint16_t u16Dim1; + uint16_t2 u16Dim2; + uint16_t3 u16Dim3; + uint16_t4 u16Dim4; + + uint16_t2 u16Vec2_Dim2; + uint16_t4 u16Vec2_Dim4; + + uint16_t3 u16Vec3_Dim3; + uint16_t4 u16Vec4_Dim4; + + // pre decode scramble + float32_t1 unorm1_pre_u32; + float32_t2 unorm2_pre_u32; + float32_t3 unorm3_pre_u32; + float32_t4 unorm4_pre_u32; + + float32_t2 unorm2_pre_u32t2; + float32_t3 unorm3_pre_u32t3; + float32_t4 unorm4_pre_u32t4; + + float32_t3 unorm3_pre_u32t2; + + // post decode scramble + float32_t1 unorm1_post_u32; + float32_t2 unorm2_post_u32; + float32_t3 unorm3_post_u32; + float32_t4 unorm4_post_u32; + + float32_t2 unorm2_post_u32t2; + float32_t3 unorm3_post_u32t3; + float32_t4 unorm4_post_u32t4; + + float32_t3 unorm3_post_u32t2; +}; + +struct QuantizedSequenceTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(QuantizedSequenceInputTestValues) input, NBL_REF_ARG(QuantizedSequenceTestValues) output) + { + // test get/set/create + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.scalar); + output.uintDim1 = qs.get(0); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec2); + for (uint32_t i = 0; i < 2; i++) + output.uintDim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); + for (uint32_t i = 0; i < 3; i++) + output.uintDim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec4); + for (uint32_t i = 0; i < 4; i++) + output.uintDim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec2); + for (uint32_t i = 0; i < 2; i++) + output.uintVec2_Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); + for (uint32_t i = 0; i < 3; i++) + output.uintVec2_Dim3[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec3); + for (uint32_t i = 0; i < 3; i++) + output.uintVec3_Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.uvec4); + for (uint32_t i = 0; i < 4; i++) + output.uintVec4_Dim4[i] = qs.get(i); + } + + // u16 + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.scalar16); + output.u16Dim1 = qs.get(0); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec2); + for (uint32_t i = 0; i < 2; i++) + output.u16Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec3); + for (uint32_t i = 0; i < 3; i++) + output.u16Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Dim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec2); + for (uint32_t i = 0; i < 2; i++) + output.u16Vec2_Dim2[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Vec2_Dim4[i] = qs.get(i); + } + + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec3); + for (uint32_t i = 0; i < 3; i++) + output.u16Vec3_Dim3[i] = qs.get(i); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::create(input.u16vec4); + for (uint32_t i = 0; i < 4; i++) + output.u16Vec4_Dim4[i] = qs.get(i); + } + + // test encode/decode uint32, dim 1..4 + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm1); + output.unorm1_pre_u32 = qs.template decode(input.scrambleKey1); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm1); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey1[0]); + output.unorm1_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + output.unorm2_pre_u32 = qs.template decode(input.scrambleKey2); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey2); + output.unorm2_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_pre_u32 = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_post_u32 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + output.unorm4_pre_u32 = qs.template decode(input.scrambleKey4); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey4); + output.unorm4_post_u32 = qs.template decode(key); + } + + // test encode/decode uint32_tN storage, dim == N + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + output.unorm2_pre_u32t2 = qs.template decode(input.scrambleKey2); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm2); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey2); + output.unorm2_post_u32t2 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_pre_u32t3 = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_post_u32t3 = qs.template decode(key); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + output.unorm4_pre_u32t4 = qs.template decode(input.scrambleKey4); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm4); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey4); + output.unorm4_post_u32t4 = qs.template decode(key); + } + + // test encode/decode uint32_t2 storage, dim 3 + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + output.unorm3_pre_u32t2 = qs.template decode(input.scrambleKey3); + } + { + sampling::QuantizedSequence qs = sampling::QuantizedSequence::template encode(input.unorm3); + sampling::QuantizedSequence key = sampling::QuantizedSequence::create(input.scrambleKey3); + output.unorm3_post_u32t2 = qs.template decode(key); + } + } +}; + +#endif diff --git a/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl b/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl new file mode 100644 index 000000000..50a58bdde --- /dev/null +++ b/74_QuantizedSequenceTests/app_resources/quantizedSequenceTest.comp.hlsl @@ -0,0 +1,19 @@ +//// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "common.hlsl" +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(256, 1, 1)] +[shader("compute")] +void main() +{ + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + QuantizedSequenceTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/74_QuantizedSequenceTests/main.cpp b/74_QuantizedSequenceTests/main.cpp new file mode 100644 index 000000000..dbba8a35f --- /dev/null +++ b/74_QuantizedSequenceTests/main.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "app_resources/common.hlsl" + +#include "CQuantizedSequenceTester.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +//using namespace glm; + +class QuantizedSequenceTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + QuantizedSequenceTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + { + CQuantizedSequenceTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + pplnSetupData.shaderKey = nbl::this_example::builtin::build::get_spirv_key<"quantizedSequenceTest">(m_device.get()); + + CQuantizedSequenceTester quantizedSequenceTester(8); + quantizedSequenceTester.setupPipeline(pplnSetupData); + if (!quantizedSequenceTester.performTestsAndVerifyResults("QuantizedSequenceTestLog.txt")) + return false; + } + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override {} + + bool keepRunning() override { return false; } +}; + +NBL_MAIN_FUNC(QuantizedSequenceTest) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b1654bd7..7928738d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,14 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(12_MeshLoaders) # add_subdirectory(13_MaterialCompilerTest) - add_subdirectory(14_Mortons EXCLUDE_FROM_ALL) + # + add_subdirectory(14_Mortons) + # + if (NBL_BUILD_MITSUBA_LOADER) + add_subdirectory(15_MitsubaLoader) + endif() + add_subdirectory(16_ZipArchiveLoaderTest) + # Waiting for a refactor #add_subdirectory(27_PLYSTLDemo) @@ -68,16 +75,24 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(30_ComputeShaderPathTracer) add_subdirectory(31_HLSLPathTracer) + add_subdirectory(34_DebugDraw) add_subdirectory(38_EXRSplit) - # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) - # add_subdirectory(39_DenoiserTonemapper) - # endif() + if (NBL_BUILD_MITSUBA_LOADER) + # if (NBL_BUILD_OPTIX) + # add_subdirectory(39_DenoiserTonemapper) + # endif() + if(NBL_BUILD_IMGUI) + add_subdirectory(40_PathTracer) + endif() + endif() #add_subdirectory(43_SumAndCDFFilters) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) + add_subdirectory(50.IESViewer) add_subdirectory(54_Transformations EXCLUDE_FROM_ALL) add_subdirectory(55_RGB18E7S3 EXCLUDE_FROM_ALL) + add_subdirectory(59_QuaternionTests) add_subdirectory(61_UI) add_subdirectory(62_CAD EXCLUDE_FROM_ALL) # TODO: Erfan, Przemek, Francisco and co. need to resurrect this add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) @@ -91,6 +106,13 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(73_SolidAngleVisualizer) + + if (NBL_BUILD_MITSUBA_LOADER) + add_subdirectory(73_GeometryInspector) + endif() + + add_subdirectory(74_QuantizedSequenceTests) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) diff --git a/22_CppCompat/ITester.h b/common/include/nbl/examples/Tester/ITester.h similarity index 61% rename from 22_CppCompat/ITester.h rename to common/include/nbl/examples/Tester/ITester.h index 39ceb8141..66cef6888 100644 --- a/22_CppCompat/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -1,337 +1,413 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ - - -#include "nbl/examples/examples.hpp" - -#include "app_resources/common.hlsl" -#include "nbl/asset/metadata/CHLSLMetadata.h" - - -using namespace nbl; - -class ITester -{ -public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - - struct PipelineSetupData - { - std::string testShaderPath; - - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - template - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - auto shaderStage = ESS_UNKNOWN; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty() || assetBundle.getAssetType() != asset::IAsset::ET_SHADER) - { - logFail("Could not load shader!"); - assert(0); - } - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - const auto hlslMetadata = static_cast(assetBundle.getMetadata()); - shaderStage = hlslMetadata->shaderStages->front(); - - auto* compilerSet = m_assetMgr->getCompilerSet(); - - asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = shaderStage; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - shader = compilerSet->compileToSPIRV(source.get(), options); - } - - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - constexpr size_t BufferSize = sizeof(InputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - constexpr size_t BufferSize = sizeof(OutputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - enum class TestType - { - CPU, - GPU - }; - - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal) - double(testVal)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << testVal << " expected value: " << expectedVal << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestVector3dValue(const std::string& memberName, const nbl::hlsl::vector& expectedVal, const nbl::hlsl::vector& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal.x) - double(testVal.x)) <= MaxAllowedError && - std::abs(double(expectedVal.y) - double(testVal.y)) <= MaxAllowedError && - std::abs(double(expectedVal.z) - double(testVal.z)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << - testVal.x << ' ' << testVal.y << ' ' << testVal.z << - " expected value: " << expectedVal.x << ' ' << expectedVal.y << ' ' << expectedVal.z << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestMatrix3x3Value(const std::string& memberName, const nbl::hlsl::matrix& expectedVal, const nbl::hlsl::matrix& testVal, const TestType testType) - { - for (int i = 0; i < 3; ++i) - { - auto expectedValRow = expectedVal[i]; - auto testValRow = testVal[i]; - verifyTestVector3dValue(memberName, expectedValRow, testValRow, testType); - } - } - -protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - template - OutputStruct dispatch(const InputStruct& input) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); - m_device->waitIdle(); - - return output; - } - -private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } -}; - +#ifndef _NBL_COMMON_I_TESTER_INCLUDED_ +#define _NBL_COMMON_I_TESTER_INCLUDED_ + +#include +#include +#include +#include + +using namespace nbl; + +#include + +template +class ITester +{ +public: + struct PipelineSetupData + { + std::string shaderKey; + core::smart_refctd_ptr device; + core::smart_refctd_ptr api; + core::smart_refctd_ptr assetMgr; + core::smart_refctd_ptr logger; + video::IPhysicalDevice* physicalDevice; + uint32_t computeFamilyIndex; + }; + + void setupPipeline(const PipelineSetupData& pipleineSetupData) + { + // setting up pipeline in the constructor + m_device = core::smart_refctd_ptr(pipleineSetupData.device); + m_physicalDevice = pipleineSetupData.physicalDevice; + m_api = core::smart_refctd_ptr(pipleineSetupData.api); + m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); + m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); + m_queueFamily = pipleineSetupData.computeFamilyIndex; + m_semaphoreCounter = 0; + m_semaphore = m_device->createSemaphore(0); + m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipeline + core::smart_refctd_ptr shader; + { + asset::IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.shaderKey.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + // It would be super weird if loading a shader from a file produced more than 1 asset + assert(assets.size() == 1); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + + shader = m_device->compileShader({ source.get() }); + } + + video::IGPUDescriptorSetLayout::SBinding bindings[2] = { + { + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + } + }; + + core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + logFail("Failed to create a Pipeline Layout!\n"); + + { + video::IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate memory of the input buffer + { + const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); + if (!inputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + inputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_inputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(inputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + // Allocate memory of the output buffer + { + const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); + if (!outputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + outputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_outputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(outputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); + if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); + + m_queue = m_device->getQueue(m_queueFamily, 0); + } + + bool performTestsAndVerifyResults(const std::string& logFileName) + { + m_logFile.open(logFileName, std::ios::out | std::ios::trunc); + if (!m_logFile.is_open()) + m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); + + core::vector inputTestValues; + core::vector exceptedTestResults; + + inputTestValues.reserve(m_testIterationCount); + exceptedTestResults.reserve(m_testIterationCount); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < m_testIterationCount; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput = generateInputTestValues(); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestResults expected = determineExpectedResults(testInput); + + inputTestValues.push_back(testInput); + exceptedTestResults.push_back(expected); + } + + core::vector cpuTestResults = performCpuTests(inputTestValues); + core::vector gpuTestResults = performGpuTests(inputTestValues); + + bool pass = verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); + + m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + reloadSeed(); + + m_logFile.close(); + return pass; + } + + virtual ~ITester() + { + m_outputBufferAllocation.memory->unmap(); + }; + +protected: + enum class TestType + { + CPU, + GPU + }; + + /** + * @param testBatchCount one test batch is equal to m_WorkgroupSize, so number of tests performed will be m_WorkgroupSize * testbatchCount + */ + ITester(const uint32_t testBatchCount) + : m_testBatchCount(testBatchCount), m_testIterationCount(testBatchCount * m_WorkgroupSize) + { + reloadSeed(); + }; + + virtual bool verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; + + virtual InputTestValues generateInputTestValues() = 0; + + virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; + + std::mt19937& getRandomEngine() + { + return m_mersenneTwister; + } + +protected: + uint32_t m_queueFamily; + core::smart_refctd_ptr m_device; + core::smart_refctd_ptr m_api; + video::IPhysicalDevice* m_physicalDevice; + core::smart_refctd_ptr m_assetMgr; + core::smart_refctd_ptr m_logger; + video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; + video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; + core::smart_refctd_ptr m_cmdbuf = nullptr; + core::smart_refctd_ptr m_cmdpool = nullptr; + core::smart_refctd_ptr m_ds = nullptr; + core::smart_refctd_ptr m_pplnLayout = nullptr; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_semaphore; + video::IQueue* m_queue; + uint64_t m_semaphoreCounter; + + void dispatchGpuTests(const core::vector& input, core::vector& output) + { + // Update input buffer + if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); + if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(m_testIterationCount == input.size()); + const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); + + m_inputBufferAllocation.memory->unmap(); + + // record command buffer + const uint32_t dispatchSizeX = m_testBatchCount; + m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); + m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->dispatch(dispatchSizeX, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + + video::IQueue::SSubmitInfo submitInfos[1] = {}; + const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + submitInfos[0].signalSemaphores = signals; + + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + + m_device->waitIdle(); + + // save test results + assert(m_testIterationCount == output.size()); + const size_t outputDataSize = sizeof(TestResults) * m_testIterationCount; + std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); + + m_device->waitIdle(); + } + + template + bool verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference = 0.0) + { + if (compareTestValues(expectedVal, testVal, maxAllowedDifference)) + return true; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + return false; + } + + template + void printTestFail(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType) + { + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + break; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; + ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; + + m_logger->log("%s", system::ILogger::ELL_ERROR, ss.str().c_str()); + m_logFile << ss.str() << '\n'; + } + +private: + template + inline void logFail(const char* msg, Args&&... args) + { + m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); + exit(-1); + } + + core::vector performCpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + TestExecutor testExecutor; + + auto iterations = std::views::iota(0ull, m_testIterationCount); + std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), + [&](size_t i) + { + testExecutor(inputTestValues[i], output[i]); + } + ); + + return output; + } + + core::vector performGpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + dispatchGpuTests(inputTestValues, output); + + return output; + } + + bool verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + { + bool pass = true; + for (int i = 0; i < m_testIterationCount; ++i) + { + pass = verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU) && pass; + pass = verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU) && pass; + } + return pass; + } + + void reloadSeed() + { + std::random_device rd; + m_seed = rd(); + m_mersenneTwister = std::mt19937(m_seed); + } + + template + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) + { + return lhs == rhs; + } + template requires concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial || (concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) + { + return nbl::hlsl::testing::relativeApproxCompare(lhs, rhs, maxAllowedDifference); + } + + const size_t m_testIterationCount; + const uint32_t m_testBatchCount; + static constexpr size_t m_WorkgroupSize = 256u; + // seed will change after every call to performTestsAndVerifyResults() + std::mt19937 m_mersenneTwister; + uint32_t m_seed; + std::ofstream m_logFile; +}; + #endif \ No newline at end of file diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 3b3cd38d8..8fadbd866 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -12,12 +12,15 @@ #include #include +#include +#include +#include -class Camera -{ +class Camera +{ public: Camera() = default; - Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::core::matrix4SIMD& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) + Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::hlsl::float32_t4x4& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) : position(position) , initialPosition(position) , target(lookat) @@ -27,6 +30,7 @@ class Camera , rotateSpeed(rotateSpeed) , upVector(upVec) , backupUpVector(backupUpVec) + , viewMatrix(nbl::hlsl::math::linalg::diagonal(1.0f)) { initDefaultKeysMap(); allKeysUp(); @@ -39,6 +43,8 @@ class Camera enum E_CAMERA_MOVE_KEYS : uint8_t { ECMK_MOVE_FORWARD = 0, + ECMK_MOVE_UP, + ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT, @@ -47,6 +53,8 @@ class Camera inline void mapKeysToWASD() { + keysMap[ECMK_MOVE_UP] = nbl::ui::EKC_E; + keysMap[ECMK_MOVE_DOWN] = nbl::ui::EKC_Q; keysMap[ECMK_MOVE_FORWARD] = nbl::ui::EKC_W; keysMap[ECMK_MOVE_BACKWARD] = nbl::ui::EKC_S; keysMap[ECMK_MOVE_LEFT] = nbl::ui::EKC_A; @@ -63,30 +71,26 @@ class Camera inline void mapKeysCustom(std::array& map) { keysMap = map; } - inline const nbl::core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } - inline const nbl::core::matrix3x4SIMD& getViewMatrix() const { return viewMatrix; } - inline const nbl::core::matrix4SIMD& getConcatenatedMatrix() const { return concatMatrix; } + inline const nbl::hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } + inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } + inline const nbl::hlsl::float32_t4x4& getConcatenatedMatrix() const { return concatMatrix; } - inline void setProjectionMatrix(const nbl::core::matrix4SIMD& projection) + inline void setProjectionMatrix(const nbl::hlsl::float32_t4x4& projection) { projMatrix = projection; - - const auto hlslMatMap = *reinterpret_cast(&projMatrix); // TEMPORARY TILL THE CAMERA CLASS IS REFACTORED TO WORK WITH HLSL MATRICIES! - { - leftHanded = nbl::hlsl::determinant(hlslMatMap) < 0.f; - } - concatMatrix = nbl::core::matrix4SIMD::concatenateBFollowedByAPrecisely(projMatrix, nbl::core::matrix4SIMD(viewMatrix)); + leftHanded = nbl::hlsl::determinant(projMatrix) < 0.f; + concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } - + inline void setPosition(const nbl::core::vectorSIMDf& pos) { position.set(pos); recomputeViewMatrix(); } - + inline const nbl::core::vectorSIMDf& getPosition() const { return position; } - inline void setTarget(const nbl::core::vectorSIMDf& pos) + inline void setTarget(const nbl::core::vectorSIMDf& pos) { target.set(pos); recomputeViewMatrix(); @@ -95,11 +99,11 @@ class Camera inline const nbl::core::vectorSIMDf& getTarget() const { return target; } inline void setUpVector(const nbl::core::vectorSIMDf& up) { upVector = up; } - + inline void setBackupUpVector(const nbl::core::vectorSIMDf& up) { backupUpVector = up; } inline const nbl::core::vectorSIMDf& getUpVector() const { return upVector; } - + inline const nbl::core::vectorSIMDf& getBackupUpVector() const { return backupUpVector; } inline const float getMoveSpeed() const { return moveSpeed; } @@ -110,24 +114,28 @@ class Camera inline void setRotateSpeed(const float _rotateSpeed) { rotateSpeed = _rotateSpeed; } - inline void recomputeViewMatrix() + inline void recomputeViewMatrix() { - nbl::core::vectorSIMDf pos = position; - nbl::core::vectorSIMDf localTarget = nbl::core::normalize(target - pos); + nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(position).xyz; + nbl::hlsl::float32_t3 localTarget = nbl::hlsl::normalize(nbl::core::convertToHLSLVector(target).xyz - pos); + // TODO: remove completely when removing vectorSIMD + nbl::hlsl::float32_t3 _target = nbl::core::convertToHLSLVector(target).xyz; // if upvector and vector to the target are the same, we have a // problem. so solve this problem: - nbl::core::vectorSIMDf up = nbl::core::normalize(upVector); - nbl::core::vectorSIMDf cross = nbl::core::cross(localTarget, up); - bool upVectorNeedsChange = nbl::core::lengthsquared(cross)[0] == 0; + nbl::hlsl::float32_t3 up = nbl::core::convertToHLSLVector(nbl::core::normalize(upVector)).xyz; + nbl::hlsl::float32_t3 cross = nbl::hlsl::cross(localTarget, up); + const float squaredLength = dot(cross, cross); + const bool upVectorNeedsChange = squaredLength == 0; if (upVectorNeedsChange) - up = nbl::core::normalize(backupUpVector); + up = nbl::core::convertToHLSLVector(nbl::core::normalize(backupUpVector)); if (leftHanded) - viewMatrix = nbl::core::matrix3x4SIMD::buildCameraLookAtMatrixLH(pos, target, up); + viewMatrix = nbl::hlsl::math::linalg::lhLookAt(pos, _target, up); else - viewMatrix = nbl::core::matrix3x4SIMD::buildCameraLookAtMatrixRH(pos, target, up); - concatMatrix = nbl::core::matrix4SIMD::concatenateBFollowedByAPrecisely(projMatrix, nbl::core::matrix4SIMD(viewMatrix)); + viewMatrix = nbl::hlsl::math::linalg::rhLookAt(pos, _target, up); + + concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } inline bool getLeftHanded() const { return leftHanded; } @@ -136,58 +144,78 @@ class Camera void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { auto ev = *eventIt; - if(ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) - if(ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) + if (ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) + if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) mouseDown = true; else if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_RELEASED) mouseDown = false; - if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) + if (ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { - nbl::core::vectorSIMDf pos = getPosition(); - nbl::core::vectorSIMDf localTarget = getTarget() - pos; - - // Get Relative Rotation for localTarget in Radians - float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.X, localTarget.Z); - const double z1 = nbl::core::sqrt(localTarget.X*localTarget.X + localTarget.Z*localTarget.Z); - relativeRotationX = atan2(z1, localTarget.Y) - nbl::core::PI()/2; - - constexpr float RotateSpeedScale = 0.003f; - relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; - float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; - + // --- corrected camera rotation update --- + nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(getPosition()).xyz; + nbl::hlsl::float32_t3 targetVec = nbl::core::convertToHLSLVector(getTarget()).xyz - pos; // original vector to target + + // preserve distance so we don't collapse to unit length + float targetDistance = nbl::hlsl::length(targetVec); + if (targetDistance < 1e-6f) targetDistance = 1.0f; // avoid div-by-zero + + nbl::hlsl::float32_t3 forward = nbl::hlsl::normalize(targetVec); + nbl::hlsl::float32_t3 upVector = nbl::core::convertToHLSLVector(getUpVector()).xyz; + nbl::hlsl::float32_t3 right = nbl::hlsl::normalize(nbl::hlsl::cross(upVector, forward)); + nbl::hlsl::float32_t3 correctedForward = nbl::hlsl::normalize(nbl::hlsl::cross(right, upVector)); + + // horizontal yaw (angle from correctedForward towards right) + float rightDot = nbl::hlsl::dot(targetVec, right); + float forwardDot = nbl::hlsl::dot(targetVec, correctedForward); + float relativeRotationY = atan2(rightDot, forwardDot); + + // pitch: angle above/below horizontal + float upDot = nbl::hlsl::dot(targetVec, upVector); + nbl::hlsl::float32_t3 horizontalComponent = targetVec - upVector * upDot; + float horizontalLength = nbl::hlsl::length(horizontalComponent); + float relativeRotationX = atan2(upDot, horizontalLength); + + // apply mouse/controller deltas (signs simplified) + constexpr float RotateSpeedScale = 0.003f; + relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale; + float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale; if (leftHanded) - relativeRotationY -= tmpYRot; - else relativeRotationY += tmpYRot; + else + relativeRotationY -= tmpYRot; - const double MaxVerticalAngle = nbl::core::radians(88.0f); + // clamp pitch + const float MaxVerticalAngle = nbl::core::radians(88.0f); + if (relativeRotationX > MaxVerticalAngle) relativeRotationX = MaxVerticalAngle; + if (relativeRotationX < -MaxVerticalAngle) relativeRotationX = -MaxVerticalAngle; - if (relativeRotationX > MaxVerticalAngle*2 && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = 2 * nbl::core::PI()-MaxVerticalAngle; - else - if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = MaxVerticalAngle; + // build final direction by first yaw-rotating in the horizontal plane, then pitching + float cosYaw = cos(relativeRotationY); + float sinYaw = sin(relativeRotationY); + nbl::hlsl::float32_t3 yawForward = correctedForward * cosYaw + right * sinYaw; + yawForward = nbl::hlsl::normalize(yawForward); + + float cosPitch = cos(relativeRotationX); + float sinPitch = sin(relativeRotationX); + nbl::hlsl::float32_t3 finalDir = nbl::hlsl::normalize(yawForward * cosPitch + upVector * sinPitch); - localTarget.set(0,0, nbl::core::max(1.f, nbl::core::length(pos)[0]), 1.f); + // restore original distance and set target + nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(pos + finalDir * targetDistance); + finalTarget.w = 1.0f; + setTarget(finalTarget); - nbl::core::matrix3x4SIMD mat; - mat.setRotation(nbl::core::quaternion(relativeRotationX, relativeRotationY, 0)); - mat.transformVect(localTarget); - - setTarget(localTarget + pos); } } } void keyboardProcess(const nbl::ui::IKeyboardEventChannel::range_t& events) { - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) perActionDt[k] = 0.0; /* @@ -196,35 +224,37 @@ class Camera * And If an UP event was sent It will get subtracted it from this value. (Currently Disabled Because we Need better Oracle) */ - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) - if(keysDown[k]) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + if (keysDown[k]) { auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - lastVirtualUpTimeStamp).count(); - assert(timeDiff >= 0); + if (timeDiff < 0) + timeDiff = 0; perActionDt[k] += timeDiff; } - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { const auto ev = *eventIt; - + // accumulate the periods for which a key was down - const auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); - assert(timeDiff >= 0); + auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); + if (timeDiff < 0) + timeDiff = 0; // handle camera movement - for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) + for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_UP, ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) { const auto code = keysMap[logicalKey]; if (ev.keyCode == code) { - if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) + if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) { perActionDt[logicalKey] += timeDiff; keysDown[logicalKey] = true; } - else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) { // perActionDt[logicalKey] -= timeDiff; keysDown[logicalKey] = false; @@ -248,7 +278,7 @@ class Camera nextPresentationTimeStamp = _nextPresentationTimeStamp; return; } - + void endInputProcessing(std::chrono::microseconds _nextPresentationTimeStamp) { nbl::core::vectorSIMDf pos = getPosition(); @@ -260,13 +290,12 @@ class Camera movedir.makeSafe3D(); movedir = nbl::core::normalize(movedir); - constexpr float MoveSpeedScale = 0.02f; + constexpr float MoveSpeedScale = 0.02f; pos += movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_FORWARD] * moveSpeed * MoveSpeedScale; pos -= movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_BACKWARD] * moveSpeed * MoveSpeedScale; - // strafing - + // if upvector and vector to the target are the same, we have a // problem. so solve this problem: nbl::core::vectorSIMDf up = nbl::core::normalize(upVector); @@ -277,6 +306,11 @@ class Camera up = nbl::core::normalize(backupUpVector); } + nbl::core::vectorSIMDf currentUp = nbl::core::normalize(nbl::core::cross(localTarget, nbl::core::cross(up, localTarget))); + pos += currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; + pos -= currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + + // strafing nbl::core::vectorSIMDf strafevect = localTarget; if (leftHanded) strafevect = nbl::core::cross(strafevect, up); @@ -292,18 +326,23 @@ class Camera firstUpdate = false; setPosition(pos); - setTarget(localTarget+pos); + setTarget(localTarget + pos); lastVirtualUpTimeStamp = nextPresentationTimeStamp; } + // TODO: temporary but a good fix for the camera events when mouse stops dragging gizmo + void mouseKeysUp() + { + mouseDown = false; + } private: inline void initDefaultKeysMap() { mapKeysToWASD(); } - - inline void allKeysUp() + + inline void allKeysUp() { - for (uint32_t i=0; i< E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) + for (uint32_t i = 0; i < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) keysDown[i] = false; mouseDown = false; @@ -311,12 +350,12 @@ class Camera private: nbl::core::vectorSIMDf initialPosition, initialTarget, position, target, upVector, backupUpVector; // TODO: make first 2 const + add default copy constructor - nbl::core::matrix3x4SIMD viewMatrix; - nbl::core::matrix4SIMD concatMatrix, projMatrix; + nbl::hlsl::float32_t3x4 viewMatrix; + nbl::hlsl::float32_t4x4 concatMatrix, projMatrix; float moveSpeed, rotateSpeed; bool leftHanded, firstUpdate = true, mouseDown = false; - + std::array keysMap = { {nbl::ui::EKC_NONE} }; // map camera E_CAMERA_MOVE_KEYS to corresponding Nabla key codes, by default camera uses WSAD to move // TODO: make them use std::array bool keysDown[E_CAMERA_MOVE_KEYS::ECMK_COUNT] = {}; @@ -324,4 +363,4 @@ class Camera std::chrono::microseconds nextPresentationTimeStamp, lastVirtualUpTimeStamp; }; -#endif \ No newline at end of file +#endif diff --git a/common/include/nbl/examples/common/MonoWindowApplication.hpp b/common/include/nbl/examples/common/MonoWindowApplication.hpp index 0f18012c0..a2048b7b0 100644 --- a/common/include/nbl/examples/common/MonoWindowApplication.hpp +++ b/common/include/nbl/examples/common/MonoWindowApplication.hpp @@ -41,7 +41,7 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication params.height = m_initialResolution[1]; params.x = 32; params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE | IWindow::ECF_CAN_MINIMIZE; params.windowCaption = "MonoWindowApplication"; params.callback = windowCallback; const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); @@ -186,4 +186,4 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication }; } -#endif \ No newline at end of file +#endif diff --git a/common/include/nbl/examples/examples.hpp b/common/include/nbl/examples/examples.hpp index 1450abc2a..d40950501 100644 --- a/common/include/nbl/examples/examples.hpp +++ b/common/include/nbl/examples/examples.hpp @@ -20,4 +20,15 @@ // cannot be in PCH because depens on definition of `this_example` for Example's builtins #include "nbl/examples/common/BuiltinResourcesApplication.hpp" -#endif // _NBL_EXAMPLES_HPP_ \ No newline at end of file +#define NBL_EXPOSE_NAMESPACES \ +using namespace nbl; \ +using namespace nbl::core; \ +using namespace nbl::hlsl; \ +using namespace nbl::system; \ +using namespace nbl::asset; \ +using namespace nbl::ui; \ +using namespace nbl::video; \ +using namespace nbl::scene; \ +using namespace nbl::examples; + +#endif // _NBL_EXAMPLES_HPP_ diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp index 2993725a0..1bcbd1fd1 100644 --- a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -3,9 +3,9 @@ #include +#include #include "nbl/asset/utils/CGeometryCreator.h" - namespace nbl::examples { @@ -17,7 +17,13 @@ class CGeometryCreatorScene : public core::IReferenceCounted using namespace nbl::asset; \ using namespace nbl::video public: - // + + struct SGeometryEntry + { + std::string name; + core::smart_refctd_ptr geometry; + }; + struct SCreateParams { video::IQueue* transferQueue; @@ -25,7 +31,45 @@ class CGeometryCreatorScene : public core::IReferenceCounted system::ILogger* logger; std::span addtionalBufferOwnershipFamilies = {}; }; - static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) + + // Creates and initializes a scene. Override addGeometries() to supply custom meshes. + template + static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch, Args&&... args) + { + static_assert(std::is_base_of_v); + auto scene = core::smart_refctd_ptr(new SceneT(std::forward(args)...), core::dont_grab); + if (!scene->initialize(std::move(params), geometryPatch)) + return nullptr; + return scene; + } + + // + struct SInitParams + { + core::vector> geometries; + core::vector geometryNames; + }; + const SInitParams& getInitParams() const {return m_init;} + + protected: + inline CGeometryCreatorScene() = default; + + // Override to supply custom geometries, names are used as UI labels + virtual core::vector addGeometries(asset::CGeometryCreator* creator) const + { + core::vector entries; + entries.push_back({ "Cube", creator->createCube({ 1.f,1.f,1.f }) }); + entries.push_back({ "Rectangle", creator->createRectangle({ 1.5f,3.f }) }); + entries.push_back({ "Disk", creator->createDisk(2.f, 30) }); + entries.push_back({ "Sphere", creator->createSphere(2, 16, 16) }); + entries.push_back({ "Cylinder", creator->createCylinder(2, 2, 20) }); + entries.push_back({ "Cone", creator->createCone(2, 3, 10) }); + entries.push_back({ "Icosphere", creator->createIcoSphere(1, 4, true) }); + entries.push_back({ "Grid", creator->createGrid({ 32u, 32u }) }); + return entries; + } + + inline bool initialize(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) { EXPOSE_NABLA_NAMESPACES; auto* logger = params.logger; @@ -33,43 +77,35 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (!params.transferQueue) { logger->log("Pass a non-null `IQueue* transferQueue`!",ILogger::ELL_ERROR); - return nullptr; + return false; } if (!params.utilities) { logger->log("Pass a non-null `IUtilities* utilities`!",ILogger::ELL_ERROR); - return nullptr; + return false; } - SInitParams init = {}; core::vector> geometries; // create out geometries { - auto addGeometry = [&init,&geometries](const std::string_view name, smart_refctd_ptr&& geom)->void + auto creator = core::make_smart_refctd_ptr(); + auto entries = addGeometries(creator.get()); + if (entries.empty()) + return false; + + init.geometryNames.reserve(entries.size()); + geometries.reserve(entries.size()); + for (auto& entry : entries) { - init.geometryNames.emplace_back(name); - geometries.push_back(std::move(geom)); - }; + if (!entry.geometry) + continue; + init.geometryNames.emplace_back(entry.name); + geometries.push_back(std::move(entry.geometry)); + } - auto creator = core::make_smart_refctd_ptr(); - /* TODO: others - ReferenceObjectCpu {.meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }, - ReferenceObjectCpu {.meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }, - ReferenceObjectCpu {.meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }, - ReferenceObjectCpu {.meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }, - ReferenceObjectCpu {.meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }, - ReferenceObjectCpu {.meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }, - ReferenceObjectCpu {.meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }, - ReferenceObjectCpu {.meta = {.type = OT_ICOSPHERE, .name = "Icoshpere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) } - */ - addGeometry("Cube",creator->createCube({1.f,1.f,1.f})); - addGeometry("Rectangle",creator->createRectangle({1.5f,3.f})); - addGeometry("Disk",creator->createDisk(2.f,30)); - addGeometry("Sphere", creator->createSphere(2, 16, 16)); - addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); - addGeometry("Cone", creator->createCone(2, 3, 10)); - addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); + if (geometries.empty()) + return false; } init.geometries.reserve(init.geometryNames.size()); @@ -78,7 +114,6 @@ class CGeometryCreatorScene : public core::IReferenceCounted auto device = params.utilities->getLogicalDevice(); smart_refctd_ptr converter = CAssetConverter::create({.device=device}); - const auto transferFamily = params.transferQueue->getFamilyIndex(); struct SInputs : CAssetConverter::SInputs @@ -109,7 +144,7 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (!reservation) { logger->log("Failed to reserve GPU objects for CPU->GPU conversion!",ILogger::ELL_ERROR); - return nullptr; + return false; } // convert @@ -146,7 +181,7 @@ class CGeometryCreatorScene : public core::IReferenceCounted if (future.copy()!=IQueue::RESULT::SUCCESS) { logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return nullptr; + return false; } } @@ -169,23 +204,13 @@ class CGeometryCreatorScene : public core::IReferenceCounted } } - return smart_refctd_ptr(new CGeometryCreatorScene(std::move(init)),dont_grab); + m_init = std::move(init); + return true; } - // - struct SInitParams - { - core::vector> geometries; - core::vector geometryNames; - }; - const SInitParams& getInitParams() const {return m_init;} - - protected: - inline CGeometryCreatorScene(SInitParams&& _init) : m_init(std::move(_init)) {} - SInitParams m_init; #undef EXPOSE_NABLA_NAMESPACES }; } -#endif \ No newline at end of file +#endif diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp index 9a9e5c966..6e5c24614 100644 --- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp +++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp @@ -168,6 +168,8 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted params[pipeline_e::BasicTriangleList].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; params[pipeline_e::BasicTriangleFan].vertexShader = {.shader=shader.get(),.entryPoint="BasicVS"}; params[pipeline_e::BasicTriangleFan].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; + params[pipeline_e::GridSnakeStrip].vertexShader = { .shader = shader.get(),.entryPoint = "BasicVS" }; + params[pipeline_e::GridSnakeStrip].fragmentShader = { .shader = shader.get(),.entryPoint = "BasicFSSnake" }; params[pipeline_e::Cone].vertexShader = {.shader=shader.get(),.entryPoint="ConeVS"}; params[pipeline_e::Cone].fragmentShader = {.shader=shader.get(),.entryPoint="ConeFS"}; for (auto i=0; i(0.5f),1.f); } +// Debug fragment shader for grid triangle-strips ("snake" order). It alternates +// triangle shading to visualize strip winding and connectivity. +[shader("pixel")] +float32_t4 BasicFSSnake(SInterpolants input, uint primID : SV_PrimitiveID) : SV_Target0 +{ + float3 N = normalize(pc.normalView < SPushConstants::DescriptorCount ? input.meta : reconstructGeometricNormal(input.meta)); + float3 base = (primID & 1u) ? float3(0.68,0.68,0.68) : float3(0.88,0.88,0.88); + + float nview = saturate(0.5 + 0.5 * N.z); + float grad = pow(nview, 0.5); + float rim = pow(1.0 - nview, 2.0) * 0.25; + + float3 col = base * (0.2 + 0.8 * grad) + rim; + return float4(col, 1.0); +} + // TODO: do smooth normals on the cone [shader("vertex")] SInterpolants ConeVS(uint32_t VertexIndex : SV_VertexID) @@ -63,4 +79,4 @@ float32_t4 ConeFS(SInterpolants input) : SV_Target0 { const float32_t3 normal = reconstructGeometricNormal(input.meta); return float32_t4(normalize(normal)*0.5f+promote(0.5f),1.f); -} \ No newline at end of file +} diff --git a/media b/media index c24f4e139..0f7ad42b3 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit c24f4e13901554abc9fdf87081108cc7dca1db57 +Subproject commit 0f7ad42b33abe3143a5d69c4d14b26cf3e538c88