diff --git a/23_Autoexposure/CMakeLists.txt b/23_Autoexposure/CMakeLists.txt deleted file mode 100644 index 8604e54c4..000000000 --- a/23_Autoexposure/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -set(EXAMPLE_SOURCES - ../../src/nbl/ext/LumaMeter/CLumaMeter.cpp - ../../src/nbl/ext/ToneMapper/CToneMapper.cpp -) - -nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/23_Autoexposure/main.cpp b/23_Autoexposure/main.cpp deleted file mode 100644 index 83b62c88d..000000000 --- a/23_Autoexposure/main.cpp +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - - -#include "nbl/ext/ToneMapper/CToneMapper.h" - -#include "../common/QToQuitEventReceiver.h" - -using namespace nbl; -using namespace nbl::core; -using namespace nbl::asset; -using namespace nbl::video; - - -int main() -{ - nbl::SIrrlichtCreationParameters deviceParams; - deviceParams.Bits = 24; //may have to set to 32bit for some platforms - deviceParams.ZBufferBits = 24; //we'd like 32bit here - deviceParams.DriverType = EDT_OPENGL; //! Only Well functioning driver, software renderer left for sake of 2D image drawing - deviceParams.WindowSize = dimension2d(1280, 720); - deviceParams.Fullscreen = false; - deviceParams.Vsync = true; //! If supported by target platform - deviceParams.Doublebuffer = true; - deviceParams.Stencilbuffer = false; //! This will not even be a choice soon - - auto device = createDeviceEx(deviceParams); - if (!device) - return 1; // could not create selected driver. - - QToQuitEventReceiver receiver; - device->setEventReceiver(&receiver); - - IVideoDriver* driver = device->getVideoDriver(); - - nbl::io::IFileSystem* filesystem = device->getFileSystem(); - IAssetManager* am = device->getAssetManager(); - - IAssetLoader::SAssetLoadParams lp; - auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp); - - E_FORMAT inFormat; - constexpr auto outFormat = EF_R8G8B8A8_SRGB; - smart_refctd_ptr outImg; - smart_refctd_ptr imgToTonemapView,outImgView; - { - auto cpuImg = IAsset::castDown(imageBundle.getContents().begin()[0]); - IGPUImage::SCreationParams imgInfo = cpuImg->getCreationParameters(); - inFormat = imgInfo.format; - - auto gpuImages = driver->getGPUObjectsFromAssets(&cpuImg.get(),&cpuImg.get()+1); - auto gpuImage = gpuImages->operator[](0u); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.image = std::move(gpuImage); - imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; - imgViewInfo.format = inFormat; - imgViewInfo.subresourceRange.aspectMask = static_cast(0u); - imgViewInfo.subresourceRange.baseMipLevel = 0; - imgViewInfo.subresourceRange.levelCount = 1; - imgViewInfo.subresourceRange.baseArrayLayer = 0; - imgViewInfo.subresourceRange.layerCount = 1; - imgToTonemapView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - - imgInfo.format = outFormat; - outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(imgInfo)); - - imgViewInfo.image = outImg; - imgViewInfo.format = outFormat; - outImgView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - } - - auto glslCompiler = am->getCompilerSet(); - const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY); - - using LumaMeterClass = ext::LumaMeter::CLumaMeter; - constexpr auto MeterMode = LumaMeterClass::EMM_MEDIAN; - const float minLuma = 1.f/2048.f; - const float maxLuma = 65536.f; - - auto cpuLumaMeasureSpecializedShader = LumaMeterClass::createShader(glslCompiler,inputColorSpace,MeterMode,minLuma,maxLuma); - auto gpuLumaMeasureShader = driver->createShader(smart_refctd_ptr(cpuLumaMeasureSpecializedShader->getUnspecialized())); - auto gpuLumaMeasureSpecializedShader = driver->createSpecializedShader(gpuLumaMeasureShader.get(), cpuLumaMeasureSpecializedShader->getSpecializationInfo()); - - const float meteringMinUV[2] = { 0.1f,0.1f }; - const float meteringMaxUV[2] = { 0.9f,0.9f }; - LumaMeterClass::Uniforms_t uniforms; - auto lumaDispatchInfo = LumaMeterClass::buildParameters(uniforms, outImg->getCreationParameters().extent, meteringMinUV, meteringMaxUV); - - auto uniformBuffer = driver->createFilledDeviceLocalBufferOnDedMem(sizeof(uniforms),&uniforms); - - - using ToneMapperClass = ext::ToneMapper::CToneMapper; - constexpr auto TMO = ToneMapperClass::EO_ACES; - constexpr bool usingLumaMeter = MeterModegetGLSLCompiler(), - inputColorSpace, - std::make_tuple(outFormat,ECP_SRGB,OETF_sRGB), - TMO,usingLumaMeter,MeterMode,minLuma,maxLuma,usingTemporalAdapatation - ); - auto gpuTonemappingShader = driver->createShader(smart_refctd_ptr(cpuTonemappingSpecializedShader->getUnspecialized())); - auto gpuTonemappingSpecializedShader = driver->createSpecializedShader(gpuTonemappingShader.get(),cpuTonemappingSpecializedShader->getSpecializationInfo()); - - auto outImgStorage = ToneMapperClass::createViewForImage(driver,false,core::smart_refctd_ptr(outImg),{static_cast(0u),0,1,0,1}); - - auto parameterBuffer = driver->createDeviceLocalGPUBufferOnDedMem(ToneMapperClass::getParameterBufferSize()); - constexpr float Exposure = 0.f; - constexpr float Key = 0.18; - auto params = ToneMapperClass::Params_t(Exposure, Key, 0.85f); - { - params.setAdaptationFactorFromFrameDelta(0.f); - driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(),0u,sizeof(params),¶ms); - } - - auto commonPipelineLayout = ToneMapperClass::getDefaultPipelineLayout(driver,usingLumaMeter); - - auto lumaMeteringPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuLumaMeasureSpecializedShader)); - auto toneMappingPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuTonemappingSpecializedShader)); - - auto commonDescriptorSet = driver->createDescriptorSet(core::smart_refctd_ptr(commonPipelineLayout->getDescriptorSetLayout(0u))); - ToneMapperClass::updateDescriptorSet(driver,commonDescriptorSet.get(),parameterBuffer,imgToTonemapView,outImgStorage,1u,2u,usingLumaMeter ? 3u:0u,uniformBuffer,0u,usingTemporalAdapatation); - - - constexpr auto dynOffsetArrayLen = usingLumaMeter ? 2u : 1u; - - auto lumaDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); - lumaDynamicOffsetArray->back() = sizeof(ToneMapperClass::Params_t); - - auto toneDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); - - - auto blitFBO = driver->addFrameBuffer(); - blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView)); - - uint32_t outBufferIx = 0u; - auto lastPresentStamp = std::chrono::high_resolution_clock::now(); - while (device->run() && receiver.keepOpen()) - { - driver->beginScene(false, false); - - driver->bindComputePipeline(lumaMeteringPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&lumaDynamicOffsetArray); - driver->pushConstants(commonPipelineLayout.get(),IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(outBufferIx),&outBufferIx); outBufferIx ^= 0x1u; - LumaMeterClass::dispatchHelper(driver,lumaDispatchInfo,true); - - driver->bindComputePipeline(toneMappingPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&toneDynamicOffsetArray); - ToneMapperClass::dispatchHelper(driver,outImgStorage.get(),true); - - driver->blitRenderTargets(blitFBO, nullptr, false, false); - - driver->endScene(); - if (usingTemporalAdapatation) - { - auto thisPresentStamp = std::chrono::high_resolution_clock::now(); - auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast(thisPresentStamp-lastPresentStamp); - lastPresentStamp = thisPresentStamp; - - params.setAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count())/1000000.f); - // dont override shader output - constexpr auto offsetPastLumaHistory = offsetof(decltype(params),lastFrameExtraEVAsHalf)+sizeof(decltype(params)::lastFrameExtraEVAsHalf); - auto* paramPtr = reinterpret_cast(¶ms); - driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(), offsetPastLumaHistory, sizeof(params)-offsetPastLumaHistory, paramPtr+offsetPastLumaHistory); - } - } - - return 0; -} \ No newline at end of file diff --git a/26_Autoexposure/CMakeLists.txt b/26_Autoexposure/CMakeLists.txt new file mode 100644 index 000000000..f1c6d2e0f --- /dev/null +++ b/26_Autoexposure/CMakeLists.txt @@ -0,0 +1,33 @@ + +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + +list(APPEND NBL_LIBRARIES + Nabla::ext::FullScreenTriangle +) + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() diff --git a/26_Autoexposure/app_resources/avg_luma_meter.comp.hlsl b/26_Autoexposure/app_resources/avg_luma_meter.comp.hlsl new file mode 100644 index 000000000..9031ccbba --- /dev/null +++ b/26_Autoexposure/app_resources/avg_luma_meter.comp.hlsl @@ -0,0 +1,65 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/geom_mean.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] luma_meter::PushConstants pushData; + +groupshared float32_t sdata[WORKGROUP_SIZE]; +struct SharedAccessor +{ + using type = float32_t; + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = sdata[ix]; + } + template + void set(const uint32_t ix, const AccessType value) + { + sdata[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct TexAccessor +{ + static float32_t toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return texture.SampleLevel(samplerState, uv, 0.f).rgb; + } +}; + +[numthreads(SUBGROUP_SIZE, SUBGROUP_SIZE, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr val_ptr = Ptr::create(pushData.pLumaMeterBuf); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::geom_meter; + LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.meanParams.rcpFirstPassWGCount); + + meter.sampleLuma(pushData.window, val_accessor, tex, sdata); +} diff --git a/26_Autoexposure/app_resources/avg_luma_tonemap.comp.hlsl b/26_Autoexposure/app_resources/avg_luma_tonemap.comp.hlsl new file mode 100644 index 000000000..d8c35014f --- /dev/null +++ b/26_Autoexposure/app_resources/avg_luma_tonemap.comp.hlsl @@ -0,0 +1,96 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/geom_mean.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators/reinhard.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators/aces.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D textureIn; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerStateIn; +[[vk::binding(0, 3)]] RWTexture2D textureOut; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] luma_meter::PushConstants pushData; + +groupshared float32_t sdata[WORKGROUP_SIZE]; +struct SharedAccessor +{ + using type = float32_t; + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = sdata[ix]; + } + template + void set(const uint32_t ix, const AccessType value) + { + sdata[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct TexAccessor +{ + static float32_t3 toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return textureIn.SampleLevel(samplerStateIn, uv, 0.f).rgb; + } +}; + +[numthreads(SUBGROUP_SIZE, SUBGROUP_SIZE, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr val_ptr = Ptr::create(pushData.pLumaMeterBuf); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::geom_meter; + LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.meanParams.rcpFirstPassWGCount); + + float32_t EV = meter.gatherLuma(val_accessor); + + const float32_t lumaDiff = vk::RawBufferLoad(pushData.pLastFrameEVBuf) - EV; + EV += lumaDiff * mix(pushData.exposureAdaptationFactors.x, pushData.exposureAdaptationFactors.y, lumaDiff >= 0.0); + + uint32_t tid = workgroup::SubgroupContiguousIndex(); + if (all(glsl::gl_WorkGroupID() == uint32_t3(0,0,0))) + if (tid == 0) + vk::RawBufferStore(pushData.pCurrFrameEVBuf, EV); + + morton::code mc; + mc.value = tid; + uint32_t2 coord = _static_cast(mc); + + uint32_t2 pos = (glsl::gl_WorkGroupID() * SUBGROUP_SIZE).xy + coord; + if (any(pos < promote(0u)) || any(pos >= pushData.viewportSize)) + return; + + float32_t2 uv = float32_t2(pos) / float32_t2(pushData.viewportSize); + float32_t3 color = tex.get(uv).rgb; + float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); + // tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(EV, 0.18f, 0.85f); + tonemapper::ACES aces = tonemapper::ACES::create(EV, 0.18f, 0.85f); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, aces(CIEColor)); + + textureOut[pos] = float32_t4(tonemappedColor, 1.0f); +} diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl new file mode 100644 index 000000000..a5ff5d9be --- /dev/null +++ b/26_Autoexposure/app_resources/common.hlsl @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _AUTOEXPOSURE_COMMON_INCLUDED_ +#define _AUTOEXPOSURE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +#ifdef __HLSL_VERSION + +#ifndef WORKGROUP_SIZE +#error "Define WORKGROUP_SIZE!" +#endif + +#ifndef SUBGROUP_SIZE +#error "Define SUBGROUP_SIZE!" +#endif + +#ifndef WG_CONFIG_T +#error "Define WG_CONFIG_T!" +#endif + +using wg_config_t = WG_CONFIG_T; + +struct device_capabilities +{ +#ifdef NATIVE_SUBGROUP_ARITHMETIC + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +#endif + +} +} + +#endif \ No newline at end of file diff --git a/26_Autoexposure/app_resources/median_luma_meter.comp.hlsl b/26_Autoexposure/app_resources/median_luma_meter.comp.hlsl new file mode 100644 index 000000000..aefdf4451 --- /dev/null +++ b/26_Autoexposure/app_resources/median_luma_meter.comp.hlsl @@ -0,0 +1,70 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/histogram.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] luma_meter::PushConstants pushData; + +groupshared uint32_t sdata[BIN_COUNT]; +struct SharedAccessor +{ + using type = uint32_t; + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = sdata[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + sdata[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + float32_t atomicAdd(const uint32_t index, const uint32_t value) { + return glsl::atomicAdd(sdata[index], value); + } +}; + +struct TexAccessor +{ + static float32_t toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return texture.SampleLevel(samplerState, uv, 0.f).rgb; + } +}; + +[numthreads(SUBGROUP_SIZE, SUBGROUP_SIZE, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr histo_ptr = Ptr::create(pushData.pLumaMeterBuf); + PtrAccessor histo_accessor = PtrAccessor::create(histo_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::median_meter; + LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.histoParams.lowerBoundPercentile, pushData.histoParams.upperBoundPercentile); + + meter.sampleLuma(pushData.window, histo_accessor, tex, sdata); +} diff --git a/26_Autoexposure/app_resources/median_luma_tonemap.comp.hlsl b/26_Autoexposure/app_resources/median_luma_tonemap.comp.hlsl new file mode 100644 index 000000000..26689605c --- /dev/null +++ b/26_Autoexposure/app_resources/median_luma_tonemap.comp.hlsl @@ -0,0 +1,102 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/histogram.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators/reinhard.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators/aces.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D textureIn; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerStateIn; +[[vk::binding(0, 3)]] RWTexture2D textureOut; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] luma_meter::PushConstants pushData; + +groupshared uint32_t sdata[BIN_COUNT]; +struct SharedAccessor +{ + using type = uint32_t; + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = sdata[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + sdata[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + uint32_t atomicAdd(const uint32_t index, const uint32_t value) + { + return glsl::atomicAdd(sdata[index], value); + } +}; + +struct TexAccessor +{ + static float32_t3 toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return textureIn.SampleLevel(samplerStateIn, uv, 0.f).rgb; + } +}; + +[numthreads(SUBGROUP_SIZE, SUBGROUP_SIZE, 1)] +[shader("compute")] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr histo_ptr = Ptr::create(pushData.pLumaMeterBuf); + PtrAccessor histo_accessor = PtrAccessor::create(histo_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::median_meter; + LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.histoParams.lowerBoundPercentile, pushData.histoParams.upperBoundPercentile); + + float32_t EV = meter.gatherLuma(histo_accessor, sdata); + + const float32_t lumaDiff = vk::RawBufferLoad(pushData.pLastFrameEVBuf) - EV; + EV += lumaDiff * mix(pushData.exposureAdaptationFactors.x, pushData.exposureAdaptationFactors.y, lumaDiff >= 0.0); + + uint32_t tid = workgroup::SubgroupContiguousIndex(); + if (all(glsl::gl_WorkGroupID() == uint32_t3(0,0,0))) + if (tid == 0) + vk::RawBufferStore(pushData.pCurrFrameEVBuf, EV); + + morton::code mc; + mc.value = tid; + uint32_t2 coord = _static_cast(mc); + + uint32_t2 pos = (glsl::gl_WorkGroupID() * SUBGROUP_SIZE).xy + coord; + if (any(pos < promote(0u)) || any(pos >= pushData.viewportSize)) + return; + + float32_t2 uv = float32_t2(pos) / pushData.viewportSize; + float32_t3 color = tex.get(uv).rgb; + float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); + // tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(EV, 0.18f, 0.85f); + tonemapper::ACES aces = tonemapper::ACES::create(EV, 0.18f, 0.85f); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, aces(CIEColor)); + + textureOut[pos] = float32_t4(tonemappedColor, 1.0f); +} diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl new file mode 100644 index 000000000..10ff89375 --- /dev/null +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -0,0 +1,18 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return texture.SampleLevel(samplerState, vxAttr.uv, 0.f); +} \ No newline at end of file diff --git a/23_Autoexposure/config.json.template b/26_Autoexposure/config.json.template similarity index 100% rename from 23_Autoexposure/config.json.template rename to 26_Autoexposure/config.json.template diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp new file mode 100644 index 000000000..4080af768 --- /dev/null +++ b/26_Autoexposure/main.cpp @@ -0,0 +1,1094 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" + +#include "nbl/video/surface/CSurfaceVulkan.h" +#include "nbl/asset/interchange/IAssetLoader.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" +#include "app_resources/common.hlsl" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +class AutoexposureApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + enum class MeteringMode { + AVERAGE, + MEDIAN + }; + + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + constexpr static inline uint32_t MaxFramesInFlight = 3u; + + static inline std::string DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + static inline std::array ShaderPaths = { + "app_resources/avg_luma_meter.comp.hlsl", + "app_resources/avg_luma_tonemap.comp.hlsl", + "app_resources/median_luma_meter.comp.hlsl", + "app_resources/median_luma_tonemap.comp.hlsl", + "app_resources/present.frag.hlsl" + }; + constexpr static inline MeteringMode MeterMode = MeteringMode::MEDIAN; + constexpr static inline uint32_t BinCount = 1024; + constexpr static inline uint32_t2 Dimensions = { 1280, 720 }; + constexpr static inline float32_t2 MeteringMinUV = { 0.1f, 0.1f }; + constexpr static inline float32_t2 MeteringMaxUV = { 0.9f, 0.9f }; + constexpr static inline float32_t SamplingFactor = 2.f; + constexpr static inline float32_t2 LumaRange = { 1.0f / 2048.0f, 65536.f }; + constexpr static inline float32_t2 PercentileRange = { 0.45f, 0.55f }; + constexpr static inline float32_t2 BaseExposureAdaptationFactorsLog2 = {-1.1f, -0.2f}; + +public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline AutoexposureApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen + inline core::vector getSurfaces() const override + { + // So let's create our Window and Surface then! + if (!m_surface) + { + { + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = Dimensions[0]; + params.height = Dimensions[1]; + params.x = 32; + params.y = 32; + // Don't want to have a window lingering about before we're ready so create it hidden. + // Only programmatic resize, not regular. + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "AutoexposureApp"; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals + constexpr IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition to ATTACHMENT_OPTIMAL + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // since we're uploading the image data we're about to draw + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because we clear and don't blend + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // we can have NONE as the Destinations because the spec says so about presents + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // One asset converter to make the cache persist + auto converter = CAssetConverter::create({ .device = m_device.get() }); + + // Create descriptors and pipelines + { + // need to hoist + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + auto convertDSLayoutCPU2GPU = [&](std::span cpuLayouts) + { + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + + std::get>(inputs.assets) = cpuLayouts; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayouts = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuLayouts.size()); + + for (auto& gpuLayout : gpuLayouts) { + auto layout = gpuLayout.value; + if (!layout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(layout); + } + + return result; + }; + auto convertDSCPU2GPU = [&](std::span cpuDS) + { + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + + std::get>(inputs.assets) = cpuDS; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuDS.size()); + + for (auto& ds : gpuDS) { + if (!ds.value) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(ds.value); + } + + return result; + }; + + ISampler::SParams samplerParams; + samplerParams.AnisotropicFilter = 0; + auto defaultSampler = make_smart_refctd_ptr(samplerParams); + + std::array gpuImgbindings = {}; + std::array tonemappedImgRWbindings = {}; + std::array tonemappedImgSamplerbindings = {}; + + gpuImgbindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + tonemappedImgRWbindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + tonemappedImgSamplerbindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuGpuImgLayout = make_smart_refctd_ptr(gpuImgbindings); + auto cpuTonemappedImgRWLayout = make_smart_refctd_ptr(tonemappedImgRWbindings); + auto cpuTonemappedImgSamplerLayout = make_smart_refctd_ptr(tonemappedImgSamplerbindings); + + std::array cpuLayouts = { + cpuGpuImgLayout.get(), + cpuTonemappedImgRWLayout.get(), + cpuTonemappedImgSamplerLayout.get() + }; + + auto gpuLayouts = convertDSLayoutCPU2GPU(cpuLayouts); + + auto cpuGpuImgDS = make_smart_refctd_ptr(std::move(cpuGpuImgLayout)); + auto cpuTonemappedImgRWDS = make_smart_refctd_ptr(std::move(cpuTonemappedImgRWLayout)); + auto cpuTonemappedImgSamplerDS = make_smart_refctd_ptr(std::move(cpuTonemappedImgSamplerLayout)); + + std::array cpuDS = { + cpuGpuImgDS.get(), + cpuTonemappedImgRWDS.get(), + cpuTonemappedImgSamplerDS.get() + }; + + auto gpuDS = convertDSCPU2GPU(cpuDS); + m_gpuImgDS = gpuDS[0]; + m_gpuImgDS->setObjectDebugName("m_gpuImgDS"); + m_tonemappedImgRWDS = gpuDS[1]; + m_tonemappedImgRWDS->setObjectDebugName("m_tonemappedImgRWDS"); + m_tonemappedImgSamplerDS = gpuDS[2]; + m_tonemappedImgSamplerDS->setObjectDebugName("m_tonemappedImgSamplerDS"); + + // Create Shaders + auto loadAndCompileShader = [&](std::string pathToShader) { + IAssetLoader::SAssetLoadParams lp = {}; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = IAsset::castDown(assets[0]); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + + auto* includeFinder = compiler->getDefaultIncludeFinder(); + options.preprocessorOptions.includeFinder = includeFinder; + + const uint32_t workgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; + m_subgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + + const uint32_t configItemsPerInvoc = MeterMode == MeteringMode::AVERAGE ? 1 : BinCount / workgroupSize; + workgroup2::SArithmeticConfiguration wgConfig; + wgConfig.init(hlsl::findMSB(workgroupSize), hlsl::log2(float(m_subgroupSize)), configItemsPerInvoc); + + struct MacroDefines + { + std::string identifier; + std::string definition; + }; + constexpr uint32_t NumBaseDefines = 3; + constexpr uint32_t NumExtraDefines = 2; + const MacroDefines definesBuf[NumBaseDefines+NumExtraDefines] = { + { "WORKGROUP_SIZE", std::to_string(workgroupSize) }, + { "SUBGROUP_SIZE", std::to_string(m_subgroupSize) }, + {"WG_CONFIG_T", wgConfig.getConfigTemplateStructString()}, + {"NATIVE_SUBGROUP_ARITHMETIC", "1"}, + { "BIN_COUNT", std::to_string(BinCount) } + }; + + uint32_t defineCount = NumBaseDefines; + if (m_physicalDevice->getLimits().shaderSubgroupArithmetic) + defineCount++; + if (MeterMode == MeteringMode::MEDIAN) + defineCount++; + std::vector defines; + for (uint32_t i = 0; i < defineCount; i++) + defines.emplace_back(definesBuf[i].identifier, definesBuf[i].definition); + options.preprocessorOptions.extraDefines = defines; + + auto overriddenSource = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + if (!overriddenSource) + { + m_logger->log("Shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader.c_str()); + std::exit(-1); + } + + return overriddenSource; + }; + + // Create compute pipelines + { + IGPUComputePipeline::SCreationParams params; + auto shader = loadAndCompileShader((MeterMode == MeteringMode::AVERAGE) ? ShaderPaths[0] : ShaderPaths[2]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(luma_meter::PushConstants) + }; + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), + nullptr, + nullptr, + nullptr + ); + if (!pipelineLayout) { + return logFail("Failed to create pipeline layout"); + } + + params.layout = pipelineLayout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_subgroupSize)); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_meterPipeline)) { + return logFail("Failed to create meter compute pipeline!\n"); + } + } + { + IGPUComputePipeline::SCreationParams params; + auto shader = loadAndCompileShader((MeterMode == MeteringMode::AVERAGE) ? ShaderPaths[1] : ShaderPaths[3]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(luma_meter::PushConstants) + }; + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), + nullptr, + nullptr, + smart_refctd_ptr(gpuLayouts[1]) + ); + if (!pipelineLayout) { + return logFail("Failed to create pipeline layout"); + } + + params.layout = pipelineLayout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_subgroupSize)); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_tonemapPipeline)) { + return logFail("Failed to create tonemap compute pipeline!\n"); + } + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileShader(ShaderPaths[4]); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + nullptr, + nullptr, + nullptr, + smart_refctd_ptr(gpuLayouts[2]) + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } + } + + // Load exr file into gpu + smart_refctd_ptr gpuImg; + { + auto convertImgCPU2GPU = [&](ICPUImage* cpuImg) + { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuImg, 1 }; + // assert that we don't need to provide patches + assert(cpuImg->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuImgs[0].value; + }; + + smart_refctd_ptr cpuImg; + { + IAssetLoader::SAssetLoadParams lp; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cpuImg = IAsset::castDown(bundle.getContents()[0]); + if (!cpuImg) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + + gpuImg = convertImgCPU2GPU(cpuImg.get()); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = gpuImg->getCreationParameters(); + auto extent = params.extent; + gpuImg->setObjectDebugName("GPU Img"); + m_gpuImgView = createHDRIImageView(gpuImg); + m_gpuImgView->setObjectDebugName("GPU Img View"); + + const auto gpuImgDims = params.extent; + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R32G32B32A32_SFLOAT, gpuImgDims.width, gpuImgDims.height); + outImg->setObjectDebugName("Tonemapped Image"); + m_tonemappedImgView = createHDRIImageView(outImg); + m_tonemappedImgView->setObjectDebugName("Tonemapped Image View"); + } + + // Allocate and create buffer for Luma Gather + { + // Allocate memory + m_gatherAllocation = {}; + m_histoAllocation = {}; + for (uint32_t i = 0; i < MaxFramesInFlight; i++) + m_lastLumaAllocations[i] = {}; + { + auto build_buffer = [this]( + smart_refctd_ptr m_device, + nbl::video::IDeviceMemoryAllocator::SAllocation *allocation, + smart_refctd_ptr &buffer, + size_t buffer_size, + const char *label) { + IGPUBuffer::SCreationParams params; + params.size = buffer_size; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + buffer = m_device->createBuffer(std::move(params)); + if (!buffer) + return logFail("Failed to create GPU buffer of size %d!\n", buffer_size); + + buffer->setObjectDebugName(label); + + auto reqs = buffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + *allocation = m_device->allocate(reqs, buffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + if (!allocation->isValid()) + return logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(allocation->memory.get() == buffer->getBoundMemory().memory); + return true; + }; + + build_buffer( + m_device, + &m_gatherAllocation, + m_gatherBuffer, + m_physicalDevice->getLimits().maxSubgroupSize * sizeof(float32_t), + "Luma Gather Buffer" + ); + build_buffer( + m_device, + &m_histoAllocation, + m_histoBuffer, + BinCount * sizeof(uint32_t), + "Luma Histogram Buffer" + ); + for (uint32_t i = 0; i < MaxFramesInFlight; i++) + build_buffer( + m_device, + &m_lastLumaAllocations[i], + m_lastFrameEVBuffers[i], + sizeof(float32_t), + ("Last Luma EV Buffer " + std::to_string(i)).c_str() + ); + } + + m_gatherMemory = m_gatherAllocation.memory->map({ 0ull, m_gatherAllocation.memory->getAllocationSize() }); + m_histoMemory = m_histoAllocation.memory->map({ 0ull, m_histoAllocation.memory->getAllocationSize() }); + + if (!m_gatherMemory || !m_histoMemory) + return logFail("Failed to map the Device Memory!\n"); + + for (uint32_t i = 0; i < MaxFramesInFlight; i++) + { + void* lastLumaMemory = m_lastLumaAllocations[i].memory->map({ 0ull, m_lastLumaAllocations[i].memory->getAllocationSize() }); + if (!lastLumaMemory) + return logFail("Failed to map the Device Memory!\n"); + memset(lastLumaMemory, 0, m_lastFrameEVBuffers[i]->getSize()); + } + } + + // transition m_tonemappedImgView to GENERAL + { + auto transitionSemaphore = m_device->createSemaphore(0); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + } + }, + .image = m_tonemappedImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = transitionSemaphore.get(), + .value = 1, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = {}, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + queue->submit(infos); + const ISemaphore::SWaitInfo waits[] = { + { + .semaphore = transitionSemaphore.get(), + .value = 1 + } + }; + m_device->blockForSemaphores(waits); + m_api->endCapture(); + } + + // Update Descriptors + { + IGPUDescriptorSet::SDescriptorInfo infos[3]; + infos[0].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[0].desc = m_gpuImgView; + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + infos[1].desc = m_tonemappedImgView; + infos[2].info.combinedImageSampler.imageLayout = IImage::LAYOUT::GENERAL; + infos[2].desc = m_tonemappedImgView; + + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { + { + .dstSet = m_gpuImgDS.get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos + }, + { + .dstSet = m_tonemappedImgRWDS.get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos + 1 + }, + { + .dstSet = m_tonemappedImgSamplerDS.get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos + 2 + } + }; + + m_device->updateDescriptorSets(3, writeDescriptors, 0, nullptr); + } + + m_winMgr->setWindowSize(m_window.get(), Dimensions.x, Dimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + + m_lastPresentStamp = std::chrono::high_resolution_clock::now(); + + return true; + } + + // We do a very simple thing, display an image and wait `DisplayImageMs` to show it + inline void workLoopBody() override + { + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + + memset(m_gatherMemory, 0, m_gatherBuffer->getSize()); + memset(m_histoMemory, 0, m_histoBuffer->getSize()); + + auto gpuImgExtent = m_gpuImgView->getCreationParameters().image->getCreationParameters().extent; + + auto thisPresentStamp = std::chrono::high_resolution_clock::now(); + auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast(thisPresentStamp - m_lastPresentStamp); + m_lastPresentStamp = thisPresentStamp; + + auto pc = luma_meter::PushConstants + { + .lumaMin = LumaRange.x, + .lumaMax = LumaRange.y, + .viewportSize = uint32_t2(gpuImgExtent.width, gpuImgExtent.height), + .exposureAdaptationFactors = getAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count()) * 1e-6f), + .pLumaMeterBuf = (MeterMode == MeteringMode::AVERAGE) ? m_gatherBuffer->getDeviceAddress() : m_histoBuffer->getDeviceAddress(), + .pLastFrameEVBuf = m_lastFrameEVBuffers[resourceIx]->getDeviceAddress(), + }; + pc.pCurrFrameEVBuf = m_lastFrameEVBuffers[(resourceIx+1)%MaxFramesInFlight]->getDeviceAddress(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("Autoexposure Frame"); + + // Luma Meter + { + auto ds = m_gpuImgDS.get(); + + const float32_t2 meteringUVRange = MeteringMaxUV - MeteringMinUV; + const uint32_t2 dispatchSize = uint32_t2(hlsl::ceil(float32_t2(gpuImgExtent.width, gpuImgExtent.height) * meteringUVRange / (m_subgroupSize * SamplingFactor))); + + pc.window = luma_meter::MeteringWindow::create(meteringUVRange / (float32_t2(dispatchSize) * static_cast(m_subgroupSize)), MeteringMinUV); + pc.meanParams.rcpFirstPassWGCount = 1.f / float(dispatchSize.x * dispatchSize.y); + + uint32_t totalSampleCount = dispatchSize.x * m_subgroupSize * dispatchSize.y * m_subgroupSize; + pc.histoParams.lowerBoundPercentile = uint32_t(PercentileRange.x * totalSampleCount); + pc.histoParams.upperBoundPercentile = uint32_t(PercentileRange.y * totalSampleCount); + + cmdbuf->bindComputePipeline(m_meterPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_meterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->pushConstants(m_meterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); + } + + // Luma Gather and Tonemapping + { + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + imageBarriers[0].image = m_tonemappedImgView->getCreationParameters().image.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = IImage::LAYOUT::UNDEFINED; + imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + auto ds1 = m_gpuImgDS.get(); + auto ds2 = m_tonemappedImgRWDS.get(); + + const uint32_t2 dispatchSize = { + 1 + ((gpuImgExtent.width) - 1) / m_subgroupSize, + 1 + ((gpuImgExtent.height) - 1) / m_subgroupSize + }; + + cmdbuf->bindComputePipeline(m_tonemapPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 0, 1, &ds1); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 3, 1, &ds2); + cmdbuf->pushConstants(m_tonemapPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); + } + + // Render to swapchain + { + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }; + imageBarriers[0].image = m_tonemappedImgView->getCreationParameters().image.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = IImage::LAYOUT::GENERAL; + imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + auto ds = m_tonemappedImgSamplerDS.get(); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = { m_window->getWidth(), m_window->getHeight() } + }; + // set viewport + { + const asset::SViewport viewport = + { + .width = float32_t(m_window->getWidth()), + .height = float32_t(m_window->getHeight()) + }; + cmdbuf->setViewport({ &viewport, 1 }); + } + cmdbuf->setScissor({ ¤tRenderArea, 1 }); + + // begin the renderpass + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SRenderpassBeginInfo beginInfo = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + + cmdbuf->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->endRenderPass(); + } + + cmdbuf->endDebugMarker(); + cmdbuf->end(); + + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + m_api->startCapture(); + if (queue->submit(infos) == IQueue::RESULT::SUCCESS) + { + const nbl::video::ISemaphore::SWaitInfo waitInfos[] = + { { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + } }; + + m_device->blockForSemaphores(waitInfos); // this is not solution, quick wa to not throw validation errors + } + else + --m_realFrameIx; + m_api->endCapture(); + } + } + + std::string caption = "[Nabla Engine] Autoexposure Example"; + m_window->setCaption(caption); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + } + + inline bool keepRunning() override + { + // Keep arunning as long as we have a surface to present to (usually this means, as long as the window is open) + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +protected: + float32_t2 getAdaptationFactorFromFrameDelta(float frameDeltaSeconds) + { + return hlsl::exp2(BaseExposureAdaptationFactorsLog2 * frameDeltaSeconds); + } + + // window + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // Pipelines + smart_refctd_ptr m_meterPipeline, m_tonemapPipeline; + smart_refctd_ptr m_presentPipeline; + + // Descriptor Sets + smart_refctd_ptr m_gpuImgDS, m_tonemappedImgRWDS, m_tonemappedImgSamplerDS; + + // Command Buffers + smart_refctd_ptr m_cmdPool; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + smart_refctd_ptr m_semaphore; + video::CDumbPresentationOracle oracle; + + // example resources + uint32_t m_subgroupSize; + uint32_t m_lastFrameEVIx = 0; + smart_refctd_ptr m_gatherBuffer, m_histoBuffer; + std::array, MaxFramesInFlight> m_lastFrameEVBuffers; + IDeviceMemoryAllocator::SAllocation m_gatherAllocation, m_histoAllocation; + std::array m_lastLumaAllocations; + void *m_gatherMemory, *m_histoMemory; + smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; + std::chrono::high_resolution_clock::time_point m_lastPresentStamp; +}; + +NBL_MAIN_FUNC(AutoexposureApp) diff --git a/23_Autoexposure/pipeline.groovy b/26_Autoexposure/pipeline.groovy similarity index 100% rename from 23_Autoexposure/pipeline.groovy rename to 26_Autoexposure/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index d945c547a..0c4e0723c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(23_Arithmetic2UnitTest) add_subdirectory(24_ColorSpaceTest) add_subdirectory(25_FilterTest EXCLUDE_FROM_ALL) + add_subdirectory(26_Autoexposure EXCLUDE_FROM_ALL) add_subdirectory(26_Blur) add_subdirectory(27_MPMCScheduler) add_subdirectory(28_FFTBloom)