From 354bb866fb7e0e0db0a7da39c2beed45155ec8d4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 10 Jan 2026 12:23:54 +0100 Subject: [PATCH 01/14] rework + upgrade SPIR-V key codegen, update docs ordered args, no nabla.h, constexpr formatter, hashed guards, custom permutation structs --- cmake/common.cmake | 553 ++++++++++++++++++---- docs/nsc-prebuilds.md | 88 +++- include/nbl/core/string/SpirvKeyHelpers.h | 55 +++ include/nbl/core/string/StringLiteral.h | 121 +++++ 4 files changed, 729 insertions(+), 88 deletions(-) create mode 100644 include/nbl/core/string/SpirvKeyHelpers.h diff --git a/cmake/common.cmake b/cmake/common.cmake index 2de6dc758f..d0104f9cc3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1143,6 +1143,7 @@ option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) +option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") @@ -1250,7 +1251,17 @@ struct DeviceConfigCaps if(NOT HEADER_RULE_GENERATED) set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include/$") set(INCLUDE_FILE "${INCLUDE_DIR}/$") - set(INCLUDE_CONTENT $) + set(NBL_HEADER_GUARD_RAW "${IMPL_TARGET}_${IMPL_NAMESPACE}_SPIRV_KEYS_HPP_INCLUDED") + string(SHA1 NBL_HEADER_GUARD_HASH "${NBL_HEADER_GUARD_RAW}") + string(TOUPPER "${NBL_HEADER_GUARD_HASH}" NBL_HEADER_GUARD_HASH_UPPER) + set(NBL_HEADER_GUARD "SPIRV_KEYS_${NBL_HEADER_GUARD_HASH_UPPER}_HPP_INCLUDED") + set(INCLUDE_CONTENT_TEMPLATE [=[ +#ifndef @NBL_HEADER_GUARD@ +#define @NBL_HEADER_GUARD@ +$ +#endif +]=]) + string(CONFIGURE "${INCLUDE_CONTENT_TEMPLATE}" INCLUDE_CONTENT @ONLY) file(GENERATE OUTPUT ${INCLUDE_FILE} CONTENT $ @@ -1268,7 +1279,8 @@ struct DeviceConfigCaps set_target_properties(${IMPL_TARGET} PROPERTIES NBL_HEADER_GENERATED_RULE ON) set(HEADER_ITEM_VIEW [=[ -#include "nabla.h" +#include +#include "nbl/core/string/SpirvKeyHelpers.h" ]=]) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_VIEW}") @@ -1283,13 +1295,18 @@ struct DeviceConfigCaps if(NOT NS_IMPL_KEYS_PROPERTY_DEFINED) set(HEADER_ITEM_VIEW [=[ namespace @IMPL_NAMESPACE@ { - template - inline const nbl::core::string get_spirv_key(const nbl::video::SPhysicalDeviceLimits& limits, const nbl::video::SPhysicalDeviceFeatures& features); + template + requires ((... && !std::is_pointer_v>)) + inline constexpr typename nbl::core::detail::StringLiteralBufferType::type get_spirv_key(const Args&... args) + { + return nbl::core::detail::SpirvKeyBuilder::build(args...); + } - template - inline const nbl::core::string get_spirv_key(const nbl::video::ILogicalDevice* device) + template + inline std::string get_spirv_key(const Device* device, const Args&... args) { - return get_spirv_key(device->getPhysicalDevice()->getLimits(), device->getEnabledFeatures()); + const auto key = nbl::core::detail::SpirvKeyBuilder::build_from_device(device, args...); + return std::string(key.view()); } } @@ -1368,67 +1385,160 @@ namespace @IMPL_NAMESPACE@ { ) endfunction() + macro(NBL_NSC_RESOLVE_CAP_KIND _CAP_KIND_RAW _CAP_STRUCT _CAP_NAME _OUT_KIND) + set(_CAP_KIND_RAW "${_CAP_KIND_RAW}") + set(_CAP_STRUCT "${_CAP_STRUCT}") + + if(_CAP_KIND_RAW STREQUAL "custom") + if(_CAP_STRUCT STREQUAL "") + ERROR_WHILE_PARSING_ITEM( + "CAPS entry with kind \"custom\" requires \"struct\".\n" + ) + endif() + set(${_OUT_KIND} "${_CAP_STRUCT}") + else() + set(${_OUT_KIND} "${_CAP_KIND_RAW}") + endif() + + if(NOT "${${_OUT_KIND}}" MATCHES "^[A-Za-z_][A-Za-z0-9_]*$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP kind \"${${_OUT_KIND}}\" for ${_CAP_NAME}\n" + "CAP kinds must be valid C/C++ identifiers." + ) + endif() + endmacro() + set(CAP_NAMES "") set(CAP_TYPES "") set(CAP_KINDS "") + set(CAP_VALUES_INDEX 0) if(HAS_CAPS) math(EXPR LAST_CAP "${CAPS_LENGTH} - 1") foreach(CAP_IDX RANGE 0 ${LAST_CAP}) - string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) - string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) - string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + string(JSON MEMBERS_TYPE ERROR_VARIABLE MEMBERS_ERROR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members) + if(MEMBERS_TYPE STREQUAL "ARRAY") + string(JSON CAP_KIND_RAW ERROR_VARIABLE CAP_KIND_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) + if(CAP_KIND_ERROR) + set(CAP_KIND_RAW limits) + endif() + + string(JSON CAP_STRUCT ERROR_VARIABLE CAP_STRUCT_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} struct) + if(CAP_STRUCT_ERROR) + set(CAP_STRUCT "") + endif() - # -> TODO: improve validation, input should be string - if(CAP_TYPE_ERROR) - set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently) + NBL_NSC_RESOLVE_CAP_KIND("${CAP_KIND_RAW}" "${CAP_STRUCT}" "member group" CAP_KIND) + + string(JSON MEMBERS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members) + if(MEMBERS_LENGTH GREATER 0) + math(EXPR LAST_MEMBER "${MEMBERS_LENGTH} - 1") + foreach(MEMBER_IDX RANGE 0 ${LAST_MEMBER}) + string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} name) + string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} type) + + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t" + ) + endif() + + string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values) + + set(VALUES "") + math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") + foreach(VAL_IDX RANGE 0 ${LAST_VAL}) + string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) + string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) + + if(NOT VAL_TYPE STREQUAL "NUMBER") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." + ) + endif() + + if(CAP_TYPE STREQUAL "bool") + if(NOT VALUE MATCHES "^[01]$") + ERROR_WHILE_PARSING_ITEM( + "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" + "Boolean CAPs can only have values 0 or 1." + ) + endif() + endif() + + list(APPEND VALUES "${VALUE}") + endforeach() + + set(CAP_VALUES_${CAP_VALUES_INDEX} "${VALUES}") + list(APPEND CAP_NAMES "${CAP_NAME}") + list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") + math(EXPR CAP_VALUES_INDEX "${CAP_VALUES_INDEX} + 1") + endforeach() + endif() else() - if(NOT CAP_KIND MATCHES "^(limits|features)$") + if(NOT MEMBERS_ERROR) ERROR_WHILE_PARSING_ITEM( - "Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n" - "Allowed kinds are: limits, features" + "CAPS.members must be an array when provided." ) endif() - endif() - # <- - if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") - ERROR_WHILE_PARSING_ITEM( - "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" - "Allowed types are: bool, uint16_t, uint32_t, uint64_t" - ) - endif() + string(JSON CAP_KIND_RAW ERROR_VARIABLE CAP_KIND_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) + string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) + string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + + if(CAP_KIND_ERROR) + set(CAP_KIND_RAW limits) # I assume its limit by default (or when invalid value present, currently) + endif() - string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values) + string(JSON CAP_STRUCT ERROR_VARIABLE CAP_STRUCT_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} struct) + if(CAP_STRUCT_ERROR) + set(CAP_STRUCT "") + endif() - set(VALUES "") - math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") - foreach(VAL_IDX RANGE 0 ${LAST_VAL}) - string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) - string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + NBL_NSC_RESOLVE_CAP_KIND("${CAP_KIND_RAW}" "${CAP_STRUCT}" "${CAP_NAME}" CAP_KIND) - if(NOT VAL_TYPE STREQUAL "NUMBER") + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") ERROR_WHILE_PARSING_ITEM( - "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" - "Use numbers for uint*_t and 0/1 for bools." + "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t" ) endif() - if(CAP_TYPE STREQUAL "bool") - if(NOT VALUE MATCHES "^[01]$") + string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values) + + set(VALUES "") + math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") + foreach(VAL_IDX RANGE 0 ${LAST_VAL}) + string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + + if(NOT VAL_TYPE STREQUAL "NUMBER") ERROR_WHILE_PARSING_ITEM( - "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" - "Boolean CAPs can only have values 0 or 1." + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." ) endif() - endif() - list(APPEND VALUES "${VALUE}") - endforeach() + if(CAP_TYPE STREQUAL "bool") + if(NOT VALUE MATCHES "^[01]$") + ERROR_WHILE_PARSING_ITEM( + "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" + "Boolean CAPs can only have values 0 or 1." + ) + endif() + endif() + + list(APPEND VALUES "${VALUE}") + endforeach() - set(CAP_VALUES_${CAP_IDX} "${VALUES}") - list(APPEND CAP_NAMES "${CAP_NAME}") - list(APPEND CAP_TYPES "${CAP_TYPE}") - list(APPEND CAP_KINDS "${CAP_KIND}") + set(CAP_VALUES_${CAP_VALUES_INDEX} "${VALUES}") + list(APPEND CAP_NAMES "${CAP_NAME}") + list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") + math(EXPR CAP_VALUES_INDEX "${CAP_VALUES_INDEX} + 1") + endif() endforeach() endif() @@ -1453,40 +1563,297 @@ namespace @IMPL_NAMESPACE@ { set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_CANONICAL_IDENTIFIERS "${NEW_CANONICAL_IDENTIFIER}") - set(HEADER_ITEM_VIEW [=[ -namespace @IMPL_NAMESPACE@ { - template<> - inline const nbl::core::string get_spirv_key - (const nbl::video::SPhysicalDeviceLimits& limits, const nbl::video::SPhysicalDeviceFeatures& features) - { - nbl::core::string retval = "@BASE_KEY@"; -@RETVAL_EVAL@ - retval += ".spv"; - return "$/" + retval; - } -} + if(NUM_CAPS GREATER 0) + set(KIND_ORDER "") + foreach(_NBL_KIND IN LISTS CAP_KINDS) + list(FIND KIND_ORDER "${_NBL_KIND}" _NBL_KIND_INDEX) + if(_NBL_KIND_INDEX EQUAL -1) + list(APPEND KIND_ORDER "${_NBL_KIND}") + endif() + endforeach() + + set(ORDERED_KINDS "${KIND_ORDER}") + + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + unset(_NBL_KIND_INDICES_${_NBL_KIND}) + endforeach() + + math(EXPR LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE 0 ${LAST_CAP}) + list(GET CAP_KINDS ${i} _NBL_KIND) + set(_NBL_ORIG_CAP_VALUES_${i} "${CAP_VALUES_${i}}") + list(APPEND _NBL_KIND_INDICES_${_NBL_KIND} ${i}) + endforeach() + + set(_NBL_ORDERED_INDICES "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(DEFINED _NBL_KIND_INDICES_${_NBL_KIND}) + list(APPEND _NBL_ORDERED_INDICES ${_NBL_KIND_INDICES_${_NBL_KIND}}) + endif() + endforeach() + + set(_NBL_ORDERED_CAP_NAMES "") + set(_NBL_ORDERED_CAP_TYPES "") + set(_NBL_ORDERED_CAP_KINDS "") + set(_NBL_ORDERED_VALUES_INDEX 0) + foreach(_NBL_INDEX IN LISTS _NBL_ORDERED_INDICES) + list(GET CAP_NAMES ${_NBL_INDEX} _NBL_CAP_NAME) + list(GET CAP_TYPES ${_NBL_INDEX} _NBL_CAP_TYPE) + list(GET CAP_KINDS ${_NBL_INDEX} _NBL_CAP_KIND) + set(_NBL_CAP_VALUES "${_NBL_ORIG_CAP_VALUES_${_NBL_INDEX}}") + list(APPEND _NBL_ORDERED_CAP_NAMES "${_NBL_CAP_NAME}") + list(APPEND _NBL_ORDERED_CAP_TYPES "${_NBL_CAP_TYPE}") + list(APPEND _NBL_ORDERED_CAP_KINDS "${_NBL_CAP_KIND}") + set(CAP_VALUES_${_NBL_ORDERED_VALUES_INDEX} "${_NBL_CAP_VALUES}") + math(EXPR _NBL_ORDERED_VALUES_INDEX "${_NBL_ORDERED_VALUES_INDEX} + 1") + endforeach() + + set(CAP_NAMES "${_NBL_ORDERED_CAP_NAMES}") + set(CAP_TYPES "${_NBL_ORDERED_CAP_TYPES}") + set(CAP_KINDS "${_NBL_ORDERED_CAP_KINDS}") + list(LENGTH CAP_NAMES NUM_CAPS) + else() + set(ORDERED_KINDS "") + endif() + + list(LENGTH ORDERED_KINDS ORDERED_KIND_COUNT) + set(NON_DEVICE_KINDS "") + set(HAS_LIMITS FALSE) + set(HAS_FEATURES FALSE) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(_NBL_KIND STREQUAL "limits") + set(HAS_LIMITS TRUE) + elseif(_NBL_KIND STREQUAL "features") + set(HAS_FEATURES TRUE) + else() + list(APPEND NON_DEVICE_KINDS "${_NBL_KIND}") + endif() + endforeach() + list(LENGTH NON_DEVICE_KINDS NON_DEVICE_COUNT) + + string(MAKE_C_IDENTIFIER "${BASE_KEY}" BASE_KEY_IDENT) + string(MD5 BASE_KEY_HASH "${BASE_KEY}") + string(SUBSTRING "${BASE_KEY_HASH}" 0 8 BASE_KEY_HASH8) + set(KIND_PREFIX "${BASE_KEY_IDENT}_${BASE_KEY_HASH8}") + + set(MATCH_KINDS "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + list(APPEND MATCH_KINDS "${_NBL_KIND}") + endforeach() + + foreach(_NBL_KIND IN LISTS MATCH_KINDS) + set(_NBL_KIND_MEMBERS_${_NBL_KIND} "") + set(_NBL_KIND_TYPES_${_NBL_KIND} "") + endforeach() + + if(NUM_CAPS GREATER 0) + math(EXPR _NBL_LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE ${_NBL_LAST_CAP}) + list(GET CAP_KINDS ${i} _NBL_KIND) + list(GET CAP_NAMES ${i} _NBL_CAP) + list(GET CAP_TYPES ${i} _NBL_TYPE) + list(FIND _NBL_KIND_MEMBERS_${_NBL_KIND} "${_NBL_CAP}" _NBL_MEMBER_INDEX) + if(_NBL_MEMBER_INDEX EQUAL -1) + list(APPEND _NBL_KIND_MEMBERS_${_NBL_KIND} "${_NBL_CAP}") + list(APPEND _NBL_KIND_TYPES_${_NBL_KIND} "${_NBL_TYPE}") + endif() + endforeach() + endif() -]=]) - unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) + + set(RETVAL_FMT "$/${BASE_KEY}") + set(RETVAL_ARGS "") + set(CX_CAPACITY 0) + string(LENGTH "${BASE_KEY}" CX_BASE_LEN) + math(EXPR CX_CAPACITY "${CX_BASE_LEN} + 4 + 24") if(CAP_COUNT GREATER 0) math(EXPR LAST_CAP "${CAP_COUNT} - 1") + set(PREV_KIND "") foreach(i RANGE ${LAST_CAP}) list(GET CAP_NAMES ${i} CAP) list(GET CAP_KINDS ${i} KIND) + list(GET CAP_TYPES ${i} TYPE) + if(NOT KIND STREQUAL PREV_KIND) + string(APPEND RETVAL_FMT "__${KIND}") + string(LENGTH "${KIND}" KIND_LEN) + math(EXPR CX_CAPACITY "${CX_CAPACITY} + 2 + ${KIND_LEN}") + set(PREV_KIND "${KIND}") + endif() + string(APPEND RETVAL_FMT ".${CAP}_%s") + list(APPEND RETVAL_ARGS "nbl_spirv_${KIND}.${CAP}") + string(LENGTH "${CAP}" CAP_LEN) + math(EXPR CX_CAPACITY "${CX_CAPACITY} + 2 + ${CAP_LEN}") + if(TYPE STREQUAL "bool") + set(DIGITS 1) + elseif(TYPE STREQUAL "uint16_t") + set(DIGITS 5) + elseif(TYPE STREQUAL "uint32_t") + set(DIGITS 10) + else() + set(DIGITS 20) + endif() + math(EXPR CX_CAPACITY "${CX_CAPACITY} + ${DIGITS}") + endforeach() + endif() + string(APPEND RETVAL_FMT ".spv") + if(RETVAL_ARGS) + string(JOIN ", " RETVAL_ARGS_JOINED ${RETVAL_ARGS}) + set(RETVAL_ARGS_STR ", ${RETVAL_ARGS_JOINED}") + else() + set(RETVAL_ARGS_STR "") + endif() + string(CONFIGURE [=[ + nbl::core::detail::append_printf_s(retval@RETVAL_ARGS_STR@); +]=] RETVAL_EVAL_CONSTEXPR @ONLY) + + set(SPIRV_CUSTOM_TRAITS "") + foreach(_NBL_KIND IN LISTS MATCH_KINDS) + set(_NBL_MEMBER_LINES "") + list(LENGTH _NBL_KIND_MEMBERS_${_NBL_KIND} _NBL_MEMBER_COUNT) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + if(_NBL_MEMBER_COUNT GREATER 0) + math(EXPR _NBL_MEMBER_LAST "${_NBL_MEMBER_COUNT} - 1") + foreach(_NBL_MEMBER_INDEX RANGE ${_NBL_MEMBER_LAST}) + list(GET _NBL_KIND_MEMBERS_${_NBL_KIND} ${_NBL_MEMBER_INDEX} _NBL_MEMBER_NAME) + list(GET _NBL_KIND_TYPES_${_NBL_KIND} ${_NBL_MEMBER_INDEX} _NBL_MEMBER_TYPE) + set(MEMBER_NAME "${_NBL_MEMBER_NAME}") + set(MEMBER_TYPE "${_NBL_MEMBER_TYPE}") + string(CONFIGURE [=[ + requires std::is_same_v, @MEMBER_TYPE@>; +]=] _NBL_MEMBER_LINE @ONLY) + string(APPEND _NBL_MEMBER_LINES "${_NBL_MEMBER_LINE}") + endforeach() + set(KIND "${KIND_TRAIT}") + set(MEMBER_LINES "${_NBL_MEMBER_LINES}") + string(CONFIGURE [=[ + template + struct SpirvPerm_@KIND@ + { + static constexpr bool value = requires(const T& v) + { +@MEMBER_LINES@ }; + }; + +]=] _NBL_CUSTOM_TRAIT @ONLY) + else() + set(KIND "${KIND_TRAIT}") string(CONFIGURE [=[ - retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + template + struct SpirvPerm_@KIND@ + { + static constexpr bool value = false; + }; + +]=] _NBL_CUSTOM_TRAIT @ONLY) + endif() + string(APPEND SPIRV_CUSTOM_TRAITS "${_NBL_CUSTOM_TRAIT}") + endforeach() + + set(SPIRV_BUILD_REQUIRES "") + if(ORDERED_KIND_COUNT EQUAL 0) + set(SPIRV_BUILD_REQUIRES "requires (sizeof...(Args) == 0)") + else() + set(_NBL_REQS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + list(APPEND _NBL_REQS "SpirvPerm_${KIND_TRAIT}>>>::value") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") endforeach() + string(JOIN " && " _NBL_REQS_JOINED ${_NBL_REQS}) + set(SPIRV_BUILD_REQUIRES "requires (sizeof...(Args) == ${ORDERED_KIND_COUNT} && ${_NBL_REQS_JOINED})") endif() - + + set(SPIRV_ARG_DECLS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + string(APPEND SPIRV_ARG_DECLS "\t\tconst auto& nbl_spirv_${_NBL_KIND} = std::get<${_NBL_KIND_INDEX}>(std::forward_as_tuple(args...));\n") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") + endforeach() + + set(SPIRV_BUILD_FROM_DEVICE_REQUIRES "") + set(_NBL_DEVICE_REQS "") + if(HAS_LIMITS) + list(APPEND _NBL_DEVICE_REQS "nbl::core::detail::spirv_device_has_limits") + endif() + if(HAS_FEATURES) + list(APPEND _NBL_DEVICE_REQS "nbl::core::detail::spirv_device_has_features") + endif() + if(NON_DEVICE_COUNT EQUAL 0) + list(APPEND _NBL_DEVICE_REQS "sizeof...(Args) == 0") + else() + list(APPEND _NBL_DEVICE_REQS "sizeof...(Args) == ${NON_DEVICE_COUNT}") + set(_NBL_REQS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS NON_DEVICE_KINDS) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + list(APPEND _NBL_REQS "SpirvPerm_${KIND_TRAIT}>>>::value") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") + endforeach() + if(_NBL_REQS) + string(JOIN " && " _NBL_REQS_JOINED ${_NBL_REQS}) + list(APPEND _NBL_DEVICE_REQS "${_NBL_REQS_JOINED}") + endif() + endif() + string(JOIN " && " SPIRV_DEVICE_REQUIRES_EXPR ${_NBL_DEVICE_REQS}) + set(SPIRV_BUILD_FROM_DEVICE_REQUIRES "requires (${SPIRV_DEVICE_REQUIRES_EXPR})") + + set(SPIRV_BUILD_FROM_DEVICE_ARGS "") + set(_NBL_ARG_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(_NBL_KIND STREQUAL "limits") + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "nbl::core::detail::spirv_device_get_limits(device)") + elseif(_NBL_KIND STREQUAL "features") + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "nbl::core::detail::spirv_device_get_features(device)") + else() + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "std::get<${_NBL_ARG_INDEX}>(std::forward_as_tuple(args...))") + math(EXPR _NBL_ARG_INDEX "${_NBL_ARG_INDEX} + 1") + endif() + endforeach() + if(SPIRV_BUILD_FROM_DEVICE_ARGS) + string(JOIN ", " SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED ${SPIRV_BUILD_FROM_DEVICE_ARGS}) + else() + set(SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED "") + endif() + + set(SPIRV_TRIVIAL_ASSERTS "") + + set(HEADER_ITEM_VIEW [=[ +namespace nbl::core::detail { + template<> + struct StringLiteralBufferType + { + using type = StringLiteralBuffer<@CX_CAPACITY@ + 1>; + }; + + template<> + struct SpirvKeyBuilder + { +@SPIRV_CUSTOM_TRAITS@ template + @SPIRV_BUILD_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build(const Args&... args) + { +@SPIRV_ARG_DECLS@@SPIRV_TRIVIAL_ASSERTS@ typename StringLiteralBufferType::type retval = {}; +@RETVAL_EVAL_CONSTEXPR@ + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=]) string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") - + function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART) if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) - # generate .config file set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") @@ -1532,37 +1899,46 @@ namespace @IMPL_NAMESPACE@ { if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() - add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) - set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") - if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) + endif() + set(NBL_NSC_OUT_FILES "") + if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + endif() + set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) endif() - set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" ${NBL_NSC_OUT_FILES}) + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}") + if(NBL_NSC_OUT_FILES) + list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) + endif() target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES HEADER_FILE_ONLY ON VS_TOOL_OVERRIDE None ) - if(CMAKE_CONFIGURATION_TYPES) - foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") + if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_CONFIGURATION_TYPES) + foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + endif() + source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endforeach() + else() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${FINAL_KEY}") set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") endif() - source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) - endforeach() - else() - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") - if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() - source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES @@ -1582,8 +1958,18 @@ namespace @IMPL_NAMESPACE@ { set(VAR_NAME "CAP_VALUES_${CAP_INDEX}") set(VALUES "${${VAR_NAME}}") + set(KEY_PREFIX ".") + if(CAP_INDEX EQUAL 0) + set(KEY_PREFIX "__${CURRENT_KIND}.") + else() + math(EXPR PREV_INDEX "${CAP_INDEX} - 1") + list(GET CAP_KINDS ${PREV_INDEX} PREV_KIND) + if(NOT CURRENT_KIND STREQUAL PREV_KIND) + set(KEY_PREFIX "__${CURRENT_KIND}.") + endif() + endif() foreach(V IN LISTS VALUES) - set(NEW_PREFIX "${PREFIX}.${CURRENT_CAP}_${V}") + set(NEW_PREFIX "${PREFIX}${KEY_PREFIX}${CURRENT_CAP}_${V}") set(NEW_EVAL "${CAPS_EVAL_PART}NBL_CONSTEXPR_STATIC_INLINE ${CURRENT_TYPE} ${CURRENT_CAP} = (${CURRENT_TYPE}) ${V}; // got permuted\n") math(EXPR NEXT_INDEX "${CAP_INDEX} + 1") GENERATE_KEYS("${NEW_PREFIX}" "${NEXT_INDEX}" "${NEW_EVAL}") @@ -1591,6 +1977,7 @@ namespace @IMPL_NAMESPACE@ { endfunction() GENERATE_KEYS("" 0 "") + endforeach() unset(KEYS) diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 400aff5eb7..3b758e1b25 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -43,14 +43,18 @@ For each registered input it generates: - One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). - If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. - A generated header (you choose the path via `INCLUDE`) containing: - - a primary template `get_spirv_key(limits, features)` and `get_spirv_key(device)` +- a primary template `get_spirv_key(...args)` and `get_spirv_key(device, ...args)` +- `get_spirv_key` returns a small owning buffer with `.view()` +- arguments must follow the **kind order** as it appears in `CAPS` (first appearance), validated structurally by required member names/types for each kind (including `limits`/`features`, no strong typing) + - `get_spirv_key(device, ...)` expects only **non-device** kinds in that same order; `limits`/`features` are injected from the device + - note: an order-agnostic API would require enforcing unique member sets across kinds to guarantee unambiguous matching; we keep a conventional order instead to stay flexible without extra constraints - explicit specializations for each registered base `KEY` - the returned key already includes the build config prefix (compiled into the header). Keys are strings that match the output layout: ``` -/(._)(._)....spv +/(__._)(._)....spv ``` ## The JSON "INPUTS" format @@ -145,17 +149,50 @@ auto bundle = assetMgr->getAsset(key.c_str(), loadParams); Each `CAPS` entry looks like: -- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid). +- `kind` (string, optional): `"limits"`, `"features"`, or `"custom"` (defaults to `"limits"` if omitted/invalid). +- `struct` (string, required for `kind="custom"`): name of the custom permutation struct (valid C/C++ identifier). If you use `limits` or `features` here, do not also use the built-in `limits`/`features` kinds in the same rule. - `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). - `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. - `values` (array of numbers, required): the values you want to prebuild. - for `bool`, values must be `0` or `1`. -At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in. +At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the structs you pass in for `limits`/`features` (duck-typed by required members) and any custom kinds. Each group starts with `__limits`, `__features`, or `__`, followed by `.member_` entries. Group order follows the **first appearance of each kind in `CAPS`** (and this same order is the required argument order for `get_spirv_key`); groups with no members are omitted. + +### Grouping caps by kind (optional) + +To avoid repeating the same `kind`, you can group caps with `members`: + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "CAPS": [ + { + "kind": "custom", + "struct": "userA", + "members": [ + { "name": "mode", "type": "uint32_t", "values": [0, 1] }, + { "name": "quality", "type": "uint32_t", "values": [1, 2, 4] } + ] + }, + { + "kind": "features", + "members": [ + { "name": "shaderFloat64", "type": "bool", "values": [0, 1] } + ] + } + ] + } +] +]=]) +``` ### Example: mixing `limits` and `features` -This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order): +This example permutes over one device limit and one device feature. Suffix order follows the `CAPS` order (`__limits` then `__features` here), and member order within each group follows the `CAPS` order for that group: ```cmake set(JSON [=[ @@ -190,6 +227,47 @@ NBL_CREATE_NSC_COMPILE_RULES( ) ``` +## Custom permutation structs + +If you need permutations based on data outside of device `limits`/`features`, define a custom struct in C++ and use `kind: "custom"` with `struct` set to the parameter name. At runtime you can pass any struct type that exposes the required members with matching types; **argument order follows the `CAPS` kind order**. Using custom names `limits` or `features` is allowed, but you cannot mix them with the built-in `limits`/`features` kinds in the same rule. + +Example: + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/fft.hlsl", + "KEY": "fft", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "CAPS": [ + { + "kind": "custom", + "struct": "fftConfig", + "name": "passCount", + "type": "uint32_t", + "values": [4, 8] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + # ... + OUTPUT_VAR KEYS + INPUTS ${JSON} +) +``` + +Runtime usage: + +```cpp +nbl::this_example::FFTConfig cfg = {}; +cfg.passCount = 4; +auto key = nbl::this_example::builtin::build::get_spirv_key<"fft">(device, cfg); +``` + This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): ``` diff --git a/include/nbl/core/string/SpirvKeyHelpers.h b/include/nbl/core/string/SpirvKeyHelpers.h new file mode 100644 index 0000000000..c9f3150c2d --- /dev/null +++ b/include/nbl/core/string/SpirvKeyHelpers.h @@ -0,0 +1,55 @@ +#ifndef _NBL_CORE_STRING_SPIRV_KEY_HELPERS_H_INCLUDED_ +#define _NBL_CORE_STRING_SPIRV_KEY_HELPERS_H_INCLUDED_ + +#include +#include +#include +#include + +#include "nbl/core/string/StringLiteral.h" + +namespace nbl::core::detail +{ + +template +struct SpirvKeyBuilderMissing : std::false_type {}; + +template +struct SpirvKeyBuilder +{ + template + static constexpr void build(const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } +}; + +template +concept spirv_device_has_limits = requires(const Device* device) +{ + device->getPhysicalDevice()->getLimits(); +}; + +template +concept spirv_device_has_features = requires(const Device* device) +{ + device->getEnabledFeatures(); +}; + +template +constexpr decltype(auto) spirv_device_get_limits(const Device* device) +{ + static_assert(spirv_device_has_limits, "Device does not provide getLimits"); + return device->getPhysicalDevice()->getLimits(); +} + +template +constexpr decltype(auto) spirv_device_get_features(const Device* device) +{ + static_assert(spirv_device_has_features, "Device does not provide getEnabledFeatures"); + return device->getEnabledFeatures(); +} + +} + +#endif diff --git a/include/nbl/core/string/StringLiteral.h b/include/nbl/core/string/StringLiteral.h index ebbed673f6..d9e0a92580 100644 --- a/include/nbl/core/string/StringLiteral.h +++ b/include/nbl/core/string/StringLiteral.h @@ -5,6 +5,11 @@ #define _NBL_CORE_STRING_LITERAL_H_INCLUDED_ #include +#include +#include +#include +#include +#include namespace nbl::core { @@ -25,4 +30,120 @@ struct StringLiteral // for compatibility's sake #define NBL_CORE_UNIQUE_STRING_LITERAL_TYPE(STRING_LITERAL) nbl::core::StringLiteral(STRING_LITERAL) +namespace nbl::core::detail +{ + +template +struct StringLiteralBufferType +{ + using type = void; +}; + +template +struct StringLiteralBuffer +{ + std::array b{}; + size_t n = 0; + + constexpr void append(char c) + { + if (n >= Cap) + throw "overflow"; + b[n++] = c; + } + constexpr void append(std::string_view sv) { for (char c : sv) append(c); } + constexpr void append(const char* s) { for (; *s; ++s) append(*s); } + + constexpr std::string_view view() const { return { b.data(), n }; } + constexpr const char* data() const { return b.data(); } + constexpr size_t size() const { return n; } +}; + +template +constexpr void put(Out& o, const T& v) +{ + using U = std::remove_cvref_t; + + if constexpr (std::is_same_v) + { + o.append(v ? '1' : '0'); + } + else if constexpr (std::is_integral_v) + { + using UU = std::make_unsigned_t; + UU x{}; + + if constexpr (std::is_signed_v) + { + if (v < 0) + { + o.append('-'); + x = UU(-(v + 1)) + 1; + } + else + { + x = UU(v); + } + } + else + { + x = UU(v); + } + + char tmp[3 + sizeof(U) * 8]; + size_t k = 0; + do { + tmp[k++] = char('0' + (x % 10)); + x /= 10; + } while (x); + while (k) + o.append(tmp[--k]); + } + else if constexpr (std::is_convertible_v) + { + o.append(std::string_view(v)); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + o.append((const char*)v); + } + else + { + static_assert(!sizeof(U), "Unsupported %s argument type"); + } +} + +template +constexpr void append_printf_s(Out& out, const Args&... args) +{ + auto tup = std::forward_as_tuple(args...); + size_t ai = 0; + + for (size_t i = 0; Fmt.value[i]; ++i) + { + if (Fmt.value[i] != '%') + { + out.append(Fmt.value[i]); + continue; + } + + char c = Fmt.value[++i]; + if (c == '%') + { + out.append('%'); + continue; + } + if (c == 's') + { + std::apply([&](auto const&... xs) { + size_t k = 0; + (((k++ == ai) ? (put(out, xs), 0) : 0), ...); + }, tup); + ++ai; + } + } +} + +} + #endif // _NBL_CORE_STRING_LITERAL_H_INCLUDED_ From 559955090101e9d3b3addf34ec43fae0257cc1b5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 11 Jan 2026 00:25:07 +0100 Subject: [PATCH 02/14] add regression tests, improve formater, more native types, normalize floating points, update docs --- cmake/common.cmake | 269 ++++++++++++++++++++++-- docs/nsc-prebuilds.md | 60 +++++- examples_tests | 2 +- include/nbl/core/string/StringLiteral.h | 187 +++++++++++++++- 4 files changed, 488 insertions(+), 30 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index d0104f9cc3..aa9237959f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1220,9 +1220,14 @@ struct DeviceConfigCaps set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) set(OPTIONAL_SINGLE_ARGS GLOB_DIR) - cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) + cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB;DISABLE_CUSTOM_COMMANDS" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) + set(_NBL_DISABLE_CUSTOM_COMMANDS FALSE) + if(NBL_NSC_DISABLE_CUSTOM_COMMANDS OR IMPL_DISABLE_CUSTOM_COMMANDS) + set(_NBL_DISABLE_CUSTOM_COMMANDS TRUE) + endif() + set(IMPL_HLSL_GLOB "") if(NOT IMPL_DISCARD_DEFAULT_GLOB) set(GLOB_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") @@ -1279,6 +1284,7 @@ $ set_target_properties(${IMPL_TARGET} PROPERTIES NBL_HEADER_GENERATED_RULE ON) set(HEADER_ITEM_VIEW [=[ +#include #include #include "nbl/core/string/SpirvKeyHelpers.h" @@ -1408,6 +1414,119 @@ namespace @IMPL_NAMESPACE@ { endif() endmacro() + macro(NBL_REQUIRE_PYTHON) + if(NOT Python3_EXECUTABLE) + find_package(Python3 COMPONENTS Interpreter REQUIRED) + endif() + endmacro() + + macro(NBL_NORMALIZE_FLOAT_LITERAL _CAP_NAME _VALUE _MANTISSA_DIGITS _TYPE_LABEL _OUT_VAR) + NBL_REQUIRE_PYTHON() + set(_NBL_RAW "${_VALUE}") + if(_TYPE_LABEL STREQUAL "float") + if("${_NBL_RAW}" MATCHES "^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?[fF]$") + string(REGEX REPLACE "[fF]$" "" _NBL_RAW "${_NBL_RAW}") + endif() + elseif(_TYPE_LABEL STREQUAL "double") + if("${_NBL_RAW}" MATCHES "^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?[dD]$") + string(REGEX REPLACE "[dD]$" "" _NBL_RAW "${_NBL_RAW}") + endif() + endif() + + set(_NBL_CANON_DONE FALSE) + if("${_NBL_RAW}" MATCHES "^[+-]?[0-9]\\.([0-9]+)e([+-][0-9]+)$") + set(_NBL_MANTISSA "${CMAKE_MATCH_1}") + set(_NBL_EXPONENT "${CMAKE_MATCH_2}") + string(LENGTH "${_NBL_MANTISSA}" _NBL_MANTISSA_LEN) + string(LENGTH "${_NBL_EXPONENT}" _NBL_EXPONENT_LEN) + math(EXPR _NBL_EXPONENT_DIGITS "${_NBL_EXPONENT_LEN} - 1") + if(_NBL_MANTISSA_LEN EQUAL ${_MANTISSA_DIGITS} AND _NBL_EXPONENT_DIGITS GREATER_EQUAL 2 AND _NBL_EXPONENT_DIGITS LESS_EQUAL 3) + string(TOLOWER "${_NBL_RAW}" _NBL_CANON) + set(_NBL_CANON_DONE TRUE) + endif() + endif() + + if(NOT _NBL_CANON_DONE) + set(_NBL_PY_SCRIPT [=[ +import sys,math,struct +t=sys.argv[1] +s=sys.argv[2] +if t=="float" and s[-1:] in ("f","F"): + s=s[:-1] +if t=="double" and s[-1:] in ("d","D"): + s=s[:-1] +try: + x=float(s) +except Exception: + sys.exit(2) +if t=="float": + x=struct.unpack("!f",struct.pack("!f",x))[0] +if not math.isfinite(x): + sys.exit(2) +p=8 if t=="float" else 16 +sign="-" if x<0 else "" +x=abs(x) +if x==0.0: + sys.stdout.write(sign+"0."+"0"*p+"e+00") + sys.exit(0) +m=x +e=0 +while m>=10.0: + m/=10.0 + e+=1 +while m<1.0: + m*=10.0 + e-=1 +digits=[0]*(p+1) +digits[0]=int(m) +frac=m-digits[0] +for i in range(1,p+1): + frac*=10.0 + d=int(frac) + if d>9: + d=9 + digits[i]=d + frac-=d +frac*=10.0 +rd=int(frac) +if rd>9: + rd=9 +rem=frac-rd +ru = rd>5 or (rd==5 and (rem>0 or (digits[p]%2))) +if ru: + i=p + while i>=0 and digits[i]==9: + digits[i]=0 + i-=1 + if i>=0: + digits[i]+=1 + else: + digits[0]=1 + for j in range(1,p+1): + digits[j]=0 + e+=1 +es="-" if e<0 else "+" +if e<0: + e=-e +ew=3 if e>=100 else 2 +sys.stdout.write(sign+str(digits[0])+"."+("".join(str(d) for d in digits[1:]))+"e"+es+str(e).zfill(ew)) +]=]) + execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "${_NBL_PY_SCRIPT}" "${_TYPE_LABEL}" "${_NBL_RAW}" + RESULT_VARIABLE _NBL_FMT_RESULT + OUTPUT_VARIABLE _NBL_CANON + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT _NBL_FMT_RESULT EQUAL 0) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${_VALUE}\" for ${_CAP_NAME}\n" + "${_TYPE_LABEL} values must be numbers or numeric strings." + ) + endif() + endif() + set(${_OUT_VAR} "${_NBL_CANON}") + endmacro() + set(CAP_NAMES "") set(CAP_TYPES "") set(CAP_KINDS "") @@ -1436,10 +1555,10 @@ namespace @IMPL_NAMESPACE@ { string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} name) string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} type) - if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t|int16_t|int32_t|int64_t|float|double)$") ERROR_WHILE_PARSING_ITEM( "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" - "Allowed types are: bool, uint16_t, uint32_t, uint64_t" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t, int16_t, int32_t, int64_t, float, double" ) endif() @@ -1451,7 +1570,28 @@ namespace @IMPL_NAMESPACE@ { string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) - if(NOT VAL_TYPE STREQUAL "NUMBER") + if(CAP_TYPE STREQUAL "float") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Float values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 8 "float" VALUE) + elseif(CAP_TYPE STREQUAL "double") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Double values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 16 "double" VALUE) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." + ) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") ERROR_WHILE_PARSING_ITEM( "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" "Use numbers for uint*_t and 0/1 for bools." @@ -1466,7 +1606,6 @@ namespace @IMPL_NAMESPACE@ { ) endif() endif() - list(APPEND VALUES "${VALUE}") endforeach() @@ -1499,10 +1638,10 @@ namespace @IMPL_NAMESPACE@ { NBL_NSC_RESOLVE_CAP_KIND("${CAP_KIND_RAW}" "${CAP_STRUCT}" "${CAP_NAME}" CAP_KIND) - if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t|int16_t|int32_t|int64_t|float|double)$") ERROR_WHILE_PARSING_ITEM( "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" - "Allowed types are: bool, uint16_t, uint32_t, uint64_t" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t, int16_t, int32_t, int64_t, float, double" ) endif() @@ -1514,7 +1653,23 @@ namespace @IMPL_NAMESPACE@ { string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) - if(NOT VAL_TYPE STREQUAL "NUMBER") + if(CAP_TYPE STREQUAL "float") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Float values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 8 "float" VALUE) + elseif(CAP_TYPE STREQUAL "double") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Double values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 16 "double" VALUE) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") ERROR_WHILE_PARSING_ITEM( "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" "Use numbers for uint*_t and 0/1 for bools." @@ -1529,7 +1684,6 @@ namespace @IMPL_NAMESPACE@ { ) endif() endif() - list(APPEND VALUES "${VALUE}") endforeach() @@ -1690,6 +1844,18 @@ namespace @IMPL_NAMESPACE@ { set(DIGITS 5) elseif(TYPE STREQUAL "uint32_t") set(DIGITS 10) + elseif(TYPE STREQUAL "int16_t") + set(DIGITS 6) + elseif(TYPE STREQUAL "int32_t") + set(DIGITS 11) + elseif(TYPE STREQUAL "int64_t") + set(DIGITS 20) + elseif(TYPE STREQUAL "uint64_t") + set(DIGITS 20) + elseif(TYPE STREQUAL "float") + set(DIGITS 16) + elseif(TYPE STREQUAL "double") + set(DIGITS 24) else() set(DIGITS 20) endif() @@ -1852,12 +2018,80 @@ namespace nbl::core::detail { string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") - function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART) + function(GENERATE_KEYS PREFIX CAP_INDEX) + set(CAPS_VALUES_PART "${ARGN}") if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") - set(CAPS_EVAL "${CAPS_EVAL_PART}") + set(CAPS_EVAL "") + if(NUM_CAPS GREATER 0) + set(CAPS_EVAL_LIMITS "") + set(CAPS_EVAL_FEATURES "") + set(_NBL_CUSTOM_KIND_LIST "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(NOT _NBL_KIND STREQUAL "limits" AND NOT _NBL_KIND STREQUAL "features") + list(APPEND _NBL_CUSTOM_KIND_LIST "${_NBL_KIND}") + set(_NBL_CUSTOM_LINES_${_NBL_KIND} "") + endif() + endforeach() + + math(EXPR _NBL_LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE 0 ${_NBL_LAST_CAP}) + list(GET CAP_NAMES ${i} _NBL_CAP_NAME) + list(GET CAP_TYPES ${i} _NBL_CAP_TYPE) + list(GET CAP_KINDS ${i} _NBL_CAP_KIND) + list(GET CAPS_VALUES_PART ${i} _NBL_CAP_VALUE) + set(MEMBER_NAME "${_NBL_CAP_NAME}") + set(MEMBER_TYPE "${_NBL_CAP_TYPE}") + set(MEMBER_VALUE "${_NBL_CAP_VALUE}") + string(CONFIGURE [=[ +NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; // got permuted +]=] _NBL_MEMBER_LINE @ONLY) + if(_NBL_CAP_KIND STREQUAL "limits") + string(APPEND CAPS_EVAL_LIMITS "${_NBL_MEMBER_LINE}") + elseif(_NBL_CAP_KIND STREQUAL "features") + string(APPEND CAPS_EVAL_FEATURES "${_NBL_MEMBER_LINE}") + else() + set(_NBL_CUSTOM_LINE_VAR "_NBL_CUSTOM_LINES_${_NBL_CAP_KIND}") + set(${_NBL_CUSTOM_LINE_VAR} "${${_NBL_CUSTOM_LINE_VAR}}${_NBL_MEMBER_LINE}") + endif() + endforeach() + + if(CAPS_EVAL_LIMITS) + string(APPEND CAPS_EVAL "// limits\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_LIMITS}\n") + endif() + if(CAPS_EVAL_FEATURES) + string(APPEND CAPS_EVAL "// features\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_FEATURES}\n") + endif() + + set(_NBL_HAS_CUSTOM FALSE) + foreach(_NBL_KIND IN LISTS _NBL_CUSTOM_KIND_LIST) + if(_NBL_CUSTOM_LINES_${_NBL_KIND}) + set(_NBL_HAS_CUSTOM TRUE) + endif() + endforeach() + + if(_NBL_HAS_CUSTOM) + string(APPEND CAPS_EVAL "// custom structs\n") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(NOT _NBL_KIND STREQUAL "limits" AND NOT _NBL_KIND STREQUAL "features") + if(_NBL_CUSTOM_LINES_${_NBL_KIND}) + set(NBL_KIND_NAME "${_NBL_KIND}") + set(MEMBER_LINES "${_NBL_CUSTOM_LINES_${_NBL_KIND}}") + string(CONFIGURE [=[ +struct @NBL_KIND_NAME@ +{ +@MEMBER_LINES@}; +]=] _NBL_KIND_STRUCT @ONLY) + string(APPEND CAPS_EVAL "${_NBL_KIND_STRUCT}\n") + endif() + endif() + endforeach() + endif() + endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") @@ -1899,11 +2133,11 @@ namespace nbl::core::detail { if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() - if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) endif() set(NBL_NSC_OUT_FILES "") - if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") @@ -1921,7 +2155,7 @@ namespace nbl::core::detail { HEADER_FILE_ONLY ON VS_TOOL_OVERRIDE None ) - if(NOT NBL_NSC_DISABLE_CUSTOM_COMMANDS) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) if(CMAKE_CONFIGURATION_TYPES) foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") @@ -1970,13 +2204,14 @@ namespace nbl::core::detail { endif() foreach(V IN LISTS VALUES) set(NEW_PREFIX "${PREFIX}${KEY_PREFIX}${CURRENT_CAP}_${V}") - set(NEW_EVAL "${CAPS_EVAL_PART}NBL_CONSTEXPR_STATIC_INLINE ${CURRENT_TYPE} ${CURRENT_CAP} = (${CURRENT_TYPE}) ${V}; // got permuted\n") + set(NEW_VALUES "${CAPS_VALUES_PART}") + list(APPEND NEW_VALUES "${V}") math(EXPR NEXT_INDEX "${CAP_INDEX} + 1") - GENERATE_KEYS("${NEW_PREFIX}" "${NEXT_INDEX}" "${NEW_EVAL}") + GENERATE_KEYS("${NEW_PREFIX}" "${NEXT_INDEX}" ${NEW_VALUES}) endforeach() endfunction() - GENERATE_KEYS("" 0 "") + GENERATE_KEYS("" 0) endforeach() diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 3b758e1b25..e035837658 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -44,7 +44,7 @@ For each registered input it generates: - If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. - A generated header (you choose the path via `INCLUDE`) containing: - a primary template `get_spirv_key(...args)` and `get_spirv_key(device, ...args)` -- `get_spirv_key` returns a small owning buffer with `.view()` +- `get_spirv_key` returns a small owning buffer; use `.view()` or implicit `std::string_view` to consume it - arguments must follow the **kind order** as it appears in `CAPS` (first appearance), validated structurally by required member names/types for each kind (including `limits`/`features`, no strong typing) - `get_spirv_key(device, ...)` expects only **non-device** kinds in that same order; `limits`/`features` are injected from the device - note: an order-agnostic API would require enforcing unique member sets across kinds to guarantee unambiguous matching; we keep a conventional order instead to stay flexible without extra constraints @@ -137,8 +137,9 @@ Then include the generated header and use the key to load the SPIR-V: ```cpp #include "nbl/this_example/builtin/build/spirv/keys.hpp" // ... -auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); -auto bundle = assetMgr->getAsset(key.c_str(), loadParams); +auto keyBuf = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +std::string_view key = keyBuf; +auto bundle = assetMgr->getAsset(key.data(), loadParams); ``` `OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. @@ -152,12 +153,36 @@ Each `CAPS` entry looks like: - `kind` (string, optional): `"limits"`, `"features"`, or `"custom"` (defaults to `"limits"` if omitted/invalid). - `struct` (string, required for `kind="custom"`): name of the custom permutation struct (valid C/C++ identifier). If you use `limits` or `features` here, do not also use the built-in `limits`/`features` kinds in the same rule. - `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). -- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. +- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`, `int16_t`, `int32_t`, `int64_t`, `float`, `double`. - `values` (array of numbers, required): the values you want to prebuild. - for `bool`, values must be `0` or `1`. + - for signed integer types, negative values are allowed. + - for `float`/`double`, you can provide **numbers or numeric strings** (e.g. `-1`, `-1.0`, `1e-3`, or `-1.f` for floats). Values are **normalized** to canonical scientific notation (1 digit before the decimal, 8 digits after for `float` or 16 for `double`, signed exponent with 2 or 3 digits). The normalized text becomes part of the key. At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the structs you pass in for `limits`/`features` (duck-typed by required members) and any custom kinds. Each group starts with `__limits`, `__features`, or `__`, followed by `.member_` entries. Group order follows the **first appearance of each kind in `CAPS`** (and this same order is the required argument order for `get_spirv_key`); groups with no members are omitted. +Each generated `.config` file defines a `DeviceConfigCaps` struct for HLSL. It includes: +- flat members for `limits`/`features` (backwards compatibility with older shaders) +- nested structs for custom kinds only, e.g. `DeviceConfigCaps::userA` + +Example shape: + +```hlsl +struct DeviceConfigCaps +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t maxImageDimension2D = 16384u; + NBL_CONSTEXPR_STATIC_INLINE bool shaderCullDistance = true; + + struct userA + { + NBL_CONSTEXPR_STATIC_INLINE uint32_t mode = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t quality = 1u; + }; +}; +``` + +For more complex usage and regression-style checks (constexpr vs runtime), see `examples_tests/73_SpirvKeysTest`. + ### Grouping caps by kind (optional) To avoid repeating the same `kind`, you can group caps with `members`: @@ -268,6 +293,33 @@ cfg.passCount = 4; auto key = nbl::this_example::builtin::build::get_spirv_key<"fft">(device, cfg); ``` +Constexpr usage with extra structs (order must match `CAPS` kind order, first appearance): + +```cpp +struct MyLimits { uint32_t maxImageDimension2D; }; +struct MyFeatures { bool shaderCullDistance; }; +struct UserA { uint32_t mode; uint32_t quality; }; +struct UserB { bool useAlternatePath; bool useFastPath; }; + +constexpr UserA userA = { 0u, 1u }; +constexpr UserB userB = { false, true }; +constexpr MyLimits limits = { 16384u }; +constexpr MyFeatures features = { true }; + +static constexpr auto keyBuf = + nbl::this_example::builtin::build::get_spirv_key<"shader_cd">(userA, userB, limits, features); +static constexpr std::string_view keyView = keyBuf; + +``` + +## Common pitfalls + +- Argument order must follow the **first appearance of each kind in `CAPS`**; this is an intentional convention to keep the API flexible. +- `get_spirv_key` returns a buffer; prefer `std::string_view key = buf;` or `buf.view()` to consume it. +- Do not store a `std::string_view` from a temporary buffer; keep the buffer alive. +- `float`/`double` CAP values are normalized to canonical scientific notation (1 digit before the decimal, 8 or 16 digits after, signed exponent); values passed to `get_spirv_key` must match one of the CAP values exactly. +- `constexpr` key generation works with `float`/`double` members when the values match the CAP list. + This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): ``` diff --git a/examples_tests b/examples_tests index 4c4e5e803e..35bfcc9c33 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4c4e5e803e81e043390699f76cc51c6c360908d1 +Subproject commit 35bfcc9c332fb4c697b8fa5d3f11031b0e63c327 diff --git a/include/nbl/core/string/StringLiteral.h b/include/nbl/core/string/StringLiteral.h index d9e0a92580..7062bd3995 100644 --- a/include/nbl/core/string/StringLiteral.h +++ b/include/nbl/core/string/StringLiteral.h @@ -6,7 +6,9 @@ #include #include +#include #include +#include #include #include #include @@ -40,25 +42,186 @@ struct StringLiteralBufferType }; template -struct StringLiteralBuffer +class StringLiteralBuffer { - std::array b{}; - size_t n = 0; - +public: constexpr void append(char c) { - if (n >= Cap) - throw "overflow"; + if (!ensure_capacity(1)) + return; b[n++] = c; } - constexpr void append(std::string_view sv) { for (char c : sv) append(c); } - constexpr void append(const char* s) { for (; *s; ++s) append(*s); } + constexpr void append(std::string_view sv) + { + if (!ensure_capacity(sv.size())) + return; + for (char c : sv) + b[n++] = c; + } + constexpr void append(const char* s) + { + for (; *s; ++s) + append(*s); + } constexpr std::string_view view() const { return { b.data(), n }; } + constexpr operator std::string_view() const { return view(); } constexpr const char* data() const { return b.data(); } constexpr size_t size() const { return n; } + +private: + constexpr bool ensure_capacity(size_t add) + { + if (n + add <= Cap) + return true; + if (std::is_constant_evaluated()) + throw "overflow"; + assert(false && "StringLiteralBuffer overflow"); + return false; + } + + std::array b{}; + size_t n = 0; }; +template +constexpr std::string_view to_string_view(const StringLiteralBuffer& v) +{ + return v.view(); +} + +template +constexpr void append_uint_padded(Out& o, unsigned value, int width) +{ + char buf[16]; + int len = 0; + do + { + buf[len++] = static_cast('0' + (value % 10u)); + value /= 10u; + } while (value); + while (len < width) + buf[len++] = '0'; + for (int i = len - 1; i >= 0; --i) + o.append(buf[i]); +} + +template +constexpr void append_float_scientific(Out& o, T v) +{ + using Limits = std::numeric_limits; + constexpr int precision = Limits::max_digits10 - 1; + if (v != v) + { + assert(false && "StringLiteralBuffer float format failed"); + return; + } + if constexpr (Limits::has_infinity) + { + if (v == Limits::infinity() || v == -Limits::infinity()) + { + assert(false && "StringLiteralBuffer float format failed"); + return; + } + } + if (v < T(0)) + { + o.append('-'); + v = -v; + } + if (v == T(0)) + { + o.append('0'); + o.append('.'); + for (int i = 0; i < precision; ++i) + o.append('0'); + o.append('e'); + o.append('+'); + append_uint_padded(o, 0u, 2); + return; + } + + long double m = static_cast(v); + int exp10 = 0; + while (m >= 10.0L) + { + m /= 10.0L; + ++exp10; + } + while (m < 1.0L) + { + m *= 10.0L; + --exp10; + } + + std::array digits{}; + digits[0] = static_cast(m); + long double frac = m - static_cast(digits[0]); + for (int i = 1; i <= precision; ++i) + { + frac *= 10.0L; + int d = static_cast(frac); + if (d > 9) + d = 9; + digits[i] = d; + frac -= static_cast(d); + } + + frac *= 10.0L; + int round_digit = static_cast(frac); + if (round_digit > 9) + round_digit = 9; + long double remainder = frac - static_cast(round_digit); + bool round_up = false; + if (round_digit > 5) + round_up = true; + else if (round_digit == 5) + { + if (remainder > 0.0L) + round_up = true; + else + round_up = (digits[precision] % 2) != 0; + } + + if (round_up) + { + int i = precision; + for (; i >= 0; --i) + { + if (digits[i] < 9) + { + digits[i]++; + break; + } + digits[i] = 0; + } + if (i < 0) + { + digits[0] = 1; + for (int j = 1; j <= precision; ++j) + digits[j] = 0; + ++exp10; + } + } + + o.append(static_cast('0' + digits[0])); + o.append('.'); + for (int i = 1; i <= precision; ++i) + o.append(static_cast('0' + digits[i])); + o.append('e'); + if (exp10 < 0) + { + o.append('-'); + exp10 = -exp10; + } + else + { + o.append('+'); + } + const int exp_width = (exp10 >= 100) ? 3 : 2; + append_uint_padded(o, static_cast(exp10), exp_width); +} + template constexpr void put(Out& o, const T& v) { @@ -99,6 +262,14 @@ constexpr void put(Out& o, const T& v) while (k) o.append(tmp[--k]); } + else if constexpr (std::is_same_v || std::is_same_v) + { + append_float_scientific(o, v); + } + else if constexpr (std::is_floating_point_v) + { + static_assert(!sizeof(U), "Unsupported %s argument type"); + } else if constexpr (std::is_convertible_v) { o.append(std::string_view(v)); From d1201063670bbfa31724e44e7a5c332cd76d050e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 11 Jan 2026 13:02:33 +0100 Subject: [PATCH 03/14] add FNV1a constexpr hash and use with SPIRV keys, add generic hash helpers, clean & improve logs, IDE grouping and autogen file names, update examples_tests submodule --- cmake/common.cmake | 99 +++++++++++++++++-------- examples_tests | 2 +- include/nbl/core/hash/fnv1a64.h | 28 +++++++ include/nbl/core/string/StringLiteral.h | 2 + tools/nsc/main.cpp | 47 ++++++++++-- 5 files changed, 137 insertions(+), 41 deletions(-) create mode 100644 include/nbl/core/hash/fnv1a64.h diff --git a/cmake/common.cmake b/cmake/common.cmake index aa9237959f..b0158f4695 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1144,6 +1144,7 @@ option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) +option(NBL_NSC_VERBOSE "Enable NSC verbose logging to .log" ON) function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") @@ -1153,18 +1154,14 @@ function(NBL_CREATE_NSC_COMPILE_RULES) // -> @COMMENT@! #ifndef _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ #define _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ -#ifdef __HLSL_VERSION #include struct DeviceConfigCaps { @CAPS_EVAL@ }; - -#include "@TARGET_INPUT@" - -#endif // __HLSL_VERSION -#endif // _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ // <- @COMMENT@! +#endif // _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ +#include "@TARGET_INPUT@" ]=]) @@ -1527,6 +1524,29 @@ sys.stdout.write(sign+str(digits[0])+"."+("".join(str(d) for d in digits[1:]))+" set(${_OUT_VAR} "${_NBL_CANON}") endmacro() + macro(NBL_HASH_SPIRV_KEY _VALUE _OUT_VAR) + NBL_REQUIRE_PYTHON() + set(_NBL_PY_HASH [=[ +import sys +s=sys.argv[1] +h=14695981039346656037 +for b in s.encode("utf-8"): + h^=b + h=(h*1099511628211)&0xFFFFFFFFFFFFFFFF +sys.stdout.write(str(h)) +]=]) + execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "${_NBL_PY_HASH}" "${_VALUE}" + RESULT_VARIABLE _NBL_HASH_RESULT + OUTPUT_VARIABLE _NBL_HASH_OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT _NBL_HASH_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to hash SPIR-V key \"${_VALUE}\"") + endif() + set(${_OUT_VAR} "${_NBL_HASH_OUT}") + endmacro() + set(CAP_NAMES "") set(CAP_TYPES "") set(CAP_KINDS "") @@ -1816,7 +1836,7 @@ sys.stdout.write(sign+str(digits[0])+"."+("".join(str(d) for d in digits[1:]))+" list(LENGTH CAP_NAMES CAP_COUNT) - set(RETVAL_FMT "$/${BASE_KEY}") + set(RETVAL_FMT "${BASE_KEY}") set(RETVAL_ARGS "") set(CX_CAPACITY 0) string(LENGTH "${BASE_KEY}" CX_BASE_LEN) @@ -1870,7 +1890,11 @@ sys.stdout.write(sign+str(digits[0])+"."+("".join(str(d) for d in digits[1:]))+" set(RETVAL_ARGS_STR "") endif() string(CONFIGURE [=[ - nbl::core::detail::append_printf_s(retval@RETVAL_ARGS_STR@); + typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full@RETVAL_ARGS_STR@); + retval.append("$/"); + nbl::core::detail::put(retval, nbl::core::FNV1a_64(nbl_spirv_full.view())); + retval.append(".spv"); ]=] RETVAL_EVAL_CONSTEXPR @ONLY) set(SPIRV_CUSTOM_TRAITS "") @@ -2022,8 +2046,10 @@ namespace nbl::core::detail { set(CAPS_VALUES_PART "${ARGN}") if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader - set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") + NBL_HASH_SPIRV_KEY("${FINAL_KEY}" FINAL_KEY_HASH) + set(HASHED_KEY "${FINAL_KEY_HASH}.spv") + set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_HASH}") + set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.in.hlsl") set(CAPS_EVAL "") if(NUM_CAPS GREATER 0) set(CAPS_EVAL_LIMITS "") @@ -2046,25 +2072,25 @@ namespace nbl::core::detail { set(MEMBER_TYPE "${_NBL_CAP_TYPE}") set(MEMBER_VALUE "${_NBL_CAP_VALUE}") string(CONFIGURE [=[ -NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; // got permuted +NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; ]=] _NBL_MEMBER_LINE @ONLY) if(_NBL_CAP_KIND STREQUAL "limits") - string(APPEND CAPS_EVAL_LIMITS "${_NBL_MEMBER_LINE}") + string(APPEND CAPS_EVAL_LIMITS " ${_NBL_MEMBER_LINE}") elseif(_NBL_CAP_KIND STREQUAL "features") - string(APPEND CAPS_EVAL_FEATURES "${_NBL_MEMBER_LINE}") + string(APPEND CAPS_EVAL_FEATURES " ${_NBL_MEMBER_LINE}") else() set(_NBL_CUSTOM_LINE_VAR "_NBL_CUSTOM_LINES_${_NBL_CAP_KIND}") - set(${_NBL_CUSTOM_LINE_VAR} "${${_NBL_CUSTOM_LINE_VAR}}${_NBL_MEMBER_LINE}") + set(${_NBL_CUSTOM_LINE_VAR} "${${_NBL_CUSTOM_LINE_VAR}} ${_NBL_MEMBER_LINE}") endif() endforeach() if(CAPS_EVAL_LIMITS) - string(APPEND CAPS_EVAL "// limits\n") - string(APPEND CAPS_EVAL "${CAPS_EVAL_LIMITS}\n") + string(APPEND CAPS_EVAL " // limits\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_LIMITS}") endif() if(CAPS_EVAL_FEATURES) - string(APPEND CAPS_EVAL "// features\n") - string(APPEND CAPS_EVAL "${CAPS_EVAL_FEATURES}\n") + string(APPEND CAPS_EVAL " // features\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_FEATURES}") endif() set(_NBL_HAS_CUSTOM FALSE) @@ -2075,30 +2101,33 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endforeach() if(_NBL_HAS_CUSTOM) - string(APPEND CAPS_EVAL "// custom structs\n") + string(APPEND CAPS_EVAL " // custom structs\n") foreach(_NBL_KIND IN LISTS ORDERED_KINDS) if(NOT _NBL_KIND STREQUAL "limits" AND NOT _NBL_KIND STREQUAL "features") if(_NBL_CUSTOM_LINES_${_NBL_KIND}) set(NBL_KIND_NAME "${_NBL_KIND}") set(MEMBER_LINES "${_NBL_CUSTOM_LINES_${_NBL_KIND}}") string(CONFIGURE [=[ -struct @NBL_KIND_NAME@ -{ -@MEMBER_LINES@}; + struct @NBL_KIND_NAME@ + { +@MEMBER_LINES@ }; ]=] _NBL_KIND_STRUCT @ONLY) - string(APPEND CAPS_EVAL "${_NBL_KIND_STRUCT}\n") + string(APPEND CAPS_EVAL "${_NBL_KIND_STRUCT}") endif() endif() endforeach() endif() endif() + if(CAPS_EVAL STREQUAL "") + set(CAPS_EVAL " // no caps\n") + endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") # generate keys and commands for compiling shaders - set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") + set(FINAL_KEY_REL_PATH "$/${HASHED_KEY}") set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") - set(DEPFILE_PATH "${TARGET_OUTPUT}.d") + set(DEPFILE_PATH "${TARGET_OUTPUT}.dep") set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") set(NBL_NSC_DEPFILE_ARGS "") @@ -2111,6 +2140,7 @@ struct @NBL_KIND_NAME@ -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} ${NBL_NSC_DEPFILE_ARGS} + $<$:-verbose> "${CONFIG_FILE}" ) @@ -2145,31 +2175,35 @@ struct @NBL_KIND_NAME@ set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) endif() - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}") + set(HEADER_ONLY_LIKE "${TARGET_INPUT}") if(NBL_NSC_OUT_FILES) list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) endif() - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE} "${CONFIG_FILE}") set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES HEADER_FILE_ONLY ON VS_TOOL_OVERRIDE None ) + set_source_files_properties("${CONFIG_FILE}" PROPERTIES + GENERATED TRUE + VS_TOOL_OVERRIDE None + ) if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) if(CMAKE_CONFIGURATION_TYPES) foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${HASHED_KEY}") set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") endif() source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) endforeach() else() - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${FINAL_KEY}") + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${HASHED_KEY}") set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") endif() source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() @@ -2227,7 +2261,8 @@ struct @NBL_KIND_NAME@ list(APPEND KEYS ${ACCESS_KEY}) endforeach() - source_group("${IN}" FILES ${CONFIGS} ${INPUTS}) + source_group("${IN}/autogen" FILES ${CONFIGS}) + source_group("${IN}" FILES ${INPUTS}) if(IMPL_HLSL_GLOB) target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES diff --git a/examples_tests b/examples_tests index 35bfcc9c33..f2f4c5f8c4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 35bfcc9c332fb4c697b8fa5d3f11031b0e63c327 +Subproject commit f2f4c5f8c42f2de3f86de66ac4e4398c00a792a1 diff --git a/include/nbl/core/hash/fnv1a64.h b/include/nbl/core/hash/fnv1a64.h new file mode 100644 index 0000000000..96f5315fbb --- /dev/null +++ b/include/nbl/core/hash/fnv1a64.h @@ -0,0 +1,28 @@ +// Copyright (C) 2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_CORE_FNV1A64_H_INCLUDED_ +#define _NBL_CORE_FNV1A64_H_INCLUDED_ + +#include +#include +#include + +namespace nbl::core +{ + +// FNV-1a 64-bit hash. +constexpr uint64_t FNV1a_64(std::string_view sv) +{ + uint64_t h = 14695981039346656037ull; + for (unsigned char c : sv) + { + h ^= c; + h *= 1099511628211ull; + } + return h; +} + +} + +#endif // _NBL_CORE_FNV1A64_H_INCLUDED_ diff --git a/include/nbl/core/string/StringLiteral.h b/include/nbl/core/string/StringLiteral.h index 7062bd3995..d48ebce7a5 100644 --- a/include/nbl/core/string/StringLiteral.h +++ b/include/nbl/core/string/StringLiteral.h @@ -13,6 +13,8 @@ #include #include +#include "nbl/core/hash/fnv1a64.h" + namespace nbl::core { diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 5ab01d72e5..4a82f49136 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -105,6 +105,9 @@ class ShaderLogger final : public IThreadsafeLogger if (!m_file) return; + std::error_code ec; + std::filesystem::resize_file(m_logPath, 0, ec); + m_fileLogger = make_smart_refctd_ptr(smart_refctd_ptr(m_file), true, m_fileMask); } @@ -260,6 +263,8 @@ class ShaderCompiler final : public IApplicationFramework const auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; m_logger = make_smart_refctd_ptr(m_system, logPath, fileMask, consoleMask, noLog); + const auto configName = std::filesystem::path(outputFilepath).parent_path().filename().string(); + const auto configLabel = configName.empty() ? "Unknown" : configName; m_arguments = std::move(unknownArgs); if (!m_arguments.empty() && m_arguments.back() == fileToCompile) @@ -278,9 +283,7 @@ class ShaderCompiler final : public IApplicationFramework if (program.is_used("-MF")) dep.path = program.get("-MF"); if (dep.enabled && dep.path.empty()) - dep.path = outputFilepath + ".d"; - if (dep.enabled) - m_logger->log("Dependency file will be saved to %s", ILogger::ELL_INFO, dep.path.c_str()); + dep.path = outputFilepath + ".dep"; #ifndef NBL_EMBED_BUILTIN_RESOURCES if (!noNblBuiltins) @@ -303,10 +306,33 @@ class ShaderCompiler final : public IApplicationFramework m_include_search_paths.emplace_back(m_arguments[i + 1]); } + if (verbose) + { + auto join = [](const std::vector& items) + { + std::string out; + for (const auto& item : items) + { + if (!out.empty()) + out.push_back(' '); + out.append(item); + } + return out; + }; + m_logger->log("Verbose logging enabled.", ILogger::ELL_DEBUG); + m_logger->log("Variant: %s", ILogger::ELL_DEBUG, configLabel.c_str()); + if (!rawArgs.empty()) + m_logger->log("Compiler: %s", ILogger::ELL_DEBUG, rawArgs.front().c_str()); + m_logger->log("Command line: %s", ILogger::ELL_DEBUG, join(rawArgs).c_str()); + m_logger->log("Input: %s", ILogger::ELL_DEBUG, fileToCompile.c_str()); + m_logger->log("Output: %s", ILogger::ELL_DEBUG, outputFilepath.c_str()); + if (dep.enabled) + m_logger->log("Depfile: %s", ILogger::ELL_DEBUG, dep.path.c_str()); + } + const char* const action = preprocessOnly ? "Preprocessing" : "Compiling"; const char* const outType = preprocessOnly ? "Preprocessed" : "Compiled"; m_logger->log("%s %s", ILogger::ELL_INFO, action, fileToCompile.c_str()); - m_logger->log("%s shader code will be saved to %s", ILogger::ELL_INFO, outType, outputFilepath.c_str()); auto [shader, shaderStage] = open_shader_file(fileToCompile); if (!shader || shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) @@ -326,9 +352,14 @@ class ShaderCompiler final : public IApplicationFramework return false; } - const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); m_logger->log("Shader %s successful.", ILogger::ELL_INFO, op); - m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + if (dep.enabled) + { + const bool depWritten = m_system->exists(dep.path, IFileBase::ECF_READ); + if (!depWritten) + m_logger->log("Dependency file missing at %s", ILogger::ELL_WARNING, dep.path.c_str()); + m_logger->log(depWritten ? "Depfile written successfully." : "Depfile write failed.", depWritten ? ILogger::ELL_INFO : ILogger::ELL_WARNING); + } const auto outParent = std::filesystem::path(outputFilepath).parent_path(); if (!outParent.empty() && !std::filesystem::exists(outParent)) @@ -362,8 +393,8 @@ class ShaderCompiler final : public IApplicationFramework return false; } - if (dep.enabled) - m_logger->log("Dependency file written to %s", ILogger::ELL_INFO, dep.path.c_str()); + const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); + m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); return true; } From 106b501e28df8c9966f81c5b9a143e5c79ca2741 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 12 Jan 2026 20:19:25 +0100 Subject: [PATCH 04/14] nsc & build system updates preprocess cache, force-includes, deps handling, timings, cache probing, logging upgrades, docs & cmake polishing --- cmake/common.cmake | 107 ++- docs/nsc-prebuilds.md | 46 +- include/nbl/asset/utils/CHLSLCompiler.h | 7 +- include/nbl/asset/utils/IShaderCompiler.h | 172 ++++- src/nbl/asset/utils/CGLSLCompiler.cpp | 3 + src/nbl/asset/utils/CHLSLCompiler.cpp | 37 +- src/nbl/asset/utils/CWaveStringResolver.cpp | 18 +- src/nbl/asset/utils/IShaderCompiler.cpp | 613 +++++++++++++++++- .../utils/shaderCompiler_serialization.h | 4 +- src/nbl/asset/utils/waveContext.h | 68 +- tools/nsc/main.cpp | 584 +++++++++++++++-- 11 files changed, 1529 insertions(+), 130 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index b0158f4695..662dc5e665 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1145,6 +1145,9 @@ option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" t option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) option(NBL_NSC_VERBOSE "Enable NSC verbose logging to .log" ON) +option(NSC_SHADER_CACHE "Enable NSC shader cache" ON) +option(NSC_PREPROCESS_CACHE "Enable NSC preprocess cache" ON) +set(NSC_CACHE_DIR "" CACHE PATH "Optional root directory for NSC cache files (shader/preprocess)") function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") @@ -1161,7 +1164,6 @@ struct DeviceConfigCaps }; // <- @COMMENT@! #endif // _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ -#include "@TARGET_INPUT@" ]=]) @@ -2123,25 +2125,52 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") + list(APPEND DEPENDS_ON "${TARGET_INPUT}" "${CONFIG_FILE}") # generate keys and commands for compiling shaders set(FINAL_KEY_REL_PATH "$/${HASHED_KEY}") set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") set(DEPFILE_PATH "${TARGET_OUTPUT}.dep") set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") + set(NBL_NSC_PREPROCESSED_PATH "${TARGET_OUTPUT}.pre.hlsl") + if(NSC_CACHE_DIR) + get_filename_component(NBL_NSC_CACHE_ROOT "${NSC_CACHE_DIR}" ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}") + file(RELATIVE_PATH NBL_NSC_CACHE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT}") + set(NBL_NSC_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache.pre") + else() + set(NBL_NSC_CACHE_PATH "${TARGET_OUTPUT}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${TARGET_OUTPUT}.ppcache.pre") + endif() set(NBL_NSC_DEPFILE_ARGS "") if(NSC_USE_DEPFILE) set(NBL_NSC_DEPFILE_ARGS -MD -MF "${DEPFILE_PATH}") endif() + set(NBL_NSC_CACHE_ARGS "") + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -shader-cache) + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") + endif() + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache) + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache-file "${NBL_NSC_PREPROCESS_CACHE_PATH}") + endif() + endif() + set(NBL_NSC_COMPILE_COMMAND "$" -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} ${NBL_NSC_DEPFILE_ARGS} $<$:-verbose> - "${CONFIG_FILE}" + ${NBL_NSC_CACHE_ARGS} + -FI "${CONFIG_FILE}" + "${TARGET_INPUT}" ) get_filename_component(NBL_NSC_INPUT_NAME "${TARGET_INPUT}" NAME) @@ -2150,13 +2179,20 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESSED_PATH}") + endif() set(NBL_NSC_CUSTOM_COMMAND_ARGS OUTPUT "${TARGET_OUTPUT}" BYPRODUCTS ${NBL_NSC_BYPRODUCTS} COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} - COMMENT "${NBL_NSC_CONFIG_NAME} (${NBL_NSC_INPUT_NAME})" + COMMENT "${NBL_NSC_INPUT_NAME} (${NBL_NSC_CONFIG_NAME})" VERBATIM COMMAND_EXPAND_LISTS ) @@ -2172,6 +2208,13 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESSED_PATH}") + endif() set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) endif() @@ -2193,18 +2236,76 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(CMAKE_CONFIGURATION_TYPES) foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + endif() source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) endforeach() else() set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + endif() source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() endif() diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index e035837658..38c0e48716 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -51,10 +51,16 @@ For each registered input it generates: - explicit specializations for each registered base `KEY` - the returned key already includes the build config prefix (compiled into the header). -Keys are strings that match the output layout: +Keys are hashed to keep filenames short and stable across long permutation strings. The **full key string** is built as: ``` -/(__._)(._)....spv +(__._)(._)....spv +``` + +Then `FNV-1a 64-bit` is computed from that full key (no `` prefix), and the **final output key** is: + +``` +/.spv ``` ## The JSON "INPUTS" format @@ -100,6 +106,38 @@ By default `NBL_CREATE_NSC_COMPILE_RULES` also collects `*.hlsl` files for IDE v - `GLOB_DIR` (optional): root directory for the default `*.hlsl` scan. - `DISCARD_DEFAULT_GLOB` (flag): disables the default scan and IDE grouping. +## Cache layers (SPIR-V + preprocess) + +There are two independent caches: + +- `NSC_SHADER_CACHE` (default `ON`) -> SPIR-V cache (`.spv.ppcache`) for full compilation results. +- `NSC_PREPROCESS_CACHE` (default `ON`) -> preprocessor prefix cache (`.spv.ppcache.pre`) to avoid repeating Boost.Wave include work when only the main shader changes. +- Both caches are used only for compilation (not `-P` preprocess-only runs). +- When preprocess cache is enabled and used, NSC also writes a combined preprocessed view (`.spv.pre.hlsl`) next to the outputs. + - This file is the exact input fed to DXC on the preprocess-cache path, so it's ready to paste into Godbolt for repros (use the same flags/includes). + +With `-verbose`, `.log` shows: + +- `Cache: ` and `Cache hit!/miss! ...` for SPIR-V cache. +- `Preprocess cache: ` and `Preprocess cache hit!/miss! ...` for the prefix cache. +- Timing lines (performance): + - `Shader cache lookup took: ...` + - `Preprocess cache lookup took: ...` + - `Total cache probe took: ...` + - `Preprocess took: ...` (only on compile path) + - `Compile took: ...` (only on compile path) + - `Total build time: ...` (preprocess + compile) + - `Total took: ...` (overall tool runtime) + +You can redirect both caches into a shared directory with: + +- `NSC_CACHE_DIR` (path). The cache files keep the same relative layout as `BINARY_DIR` (including `/`), but live under the given root. This is handy for CI or persistent cache volumes. + +The preprocess cache key is based on the **prefix** of the input file (leading directives/comments plus forced includes), and cache validity is checked against include dependency hashes. That means: + +- edits to the shader body still hit (fast path) +- changes to prefix directives, forced-includes, or included headers cause a cold run + ## Minimal usage (no permutations) Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): @@ -142,7 +180,7 @@ std::string_view key = keyBuf; auto bundle = assetMgr->getAsset(key.data(), loadParams); ``` -`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). These are already hashed (e.g. `Debug/123456789.spv`) and are intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. ## Permutations via `CAPS` @@ -181,7 +219,7 @@ struct DeviceConfigCaps }; ``` -For more complex usage and regression-style checks (constexpr vs runtime), see `examples_tests/73_SpirvKeysTest`. +For more complex usage and regression-style checks (constexpr vs runtime, hashing, mixed payloads), see `examples_tests/73_SpirvKeysTest`. ### Grouping caps by kind (optional) diff --git a/include/nbl/asset/utils/CHLSLCompiler.h b/include/nbl/asset/utils/CHLSLCompiler.h index 92a1dca394..b093ff98ed 100644 --- a/include/nbl/asset/utils/CHLSLCompiler.h +++ b/include/nbl/asset/utils/CHLSLCompiler.h @@ -51,7 +51,7 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler //} std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies = nullptr) const override; - std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies = nullptr) const; + std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies = nullptr, std::vector* macro_defs = nullptr) const; void insertIntoStart(std::string& code, std::ostringstream&& ins) const override; @@ -118,6 +118,9 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler return std::span(RequiredArguments); } + protected: + bool preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const override; + protected: // This can't be a unique_ptr due to it being an undefined type // when Nabla is used as a lib @@ -155,4 +158,4 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 9fd4eee833..e7671d7eb5 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -118,6 +118,44 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::string_view definition; }; + // Forward declaration for dependency access. + struct CCache; + + struct SPreprocessingDependency + { + public: + // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this + inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) : + requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash) + {} + + inline SPreprocessingDependency(SPreprocessingDependency&) = default; + inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete; + inline SPreprocessingDependency(SPreprocessingDependency&&) = default; + inline SPreprocessingDependency& operator=(SPreprocessingDependency&&) = default; + + // Needed for json vector serialization. Making it private and declaring from_json(_, SEntry&) as friend didn't work + inline SPreprocessingDependency() {} + + inline const system::path& getRequestingSourceDir() const { return requestingSourceDir; } + inline std::string_view getIdentifier() const { return identifier; } + inline bool isStandardInclude() const { return standardInclude; } + inline const core::blake3_hash_t& getHash() const { return hash; } + + private: + friend void to_json(nlohmann::json& j, const SPreprocessingDependency& dependency); + friend void from_json(const nlohmann::json& j, SPreprocessingDependency& dependency); + friend class CCache; + + // path or identifier + system::path requestingSourceDir = ""; + std::string identifier = ""; + // hash of the contents - used to check against a found_t + core::blake3_hash_t hash = {}; + // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` + bool standardInclude = false; + }; + // using E_SPIRV_VERSION = nbl::hlsl::SpirvVersion; @@ -136,6 +174,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted system::logger_opt_ptr logger = nullptr; const CIncludeFinder* includeFinder = nullptr; std::span extraDefines = {}; + std::span forceIncludes = {}; + std::string_view codeForCache = {}; + bool applyForceIncludes = true; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; bool depfile = false; system::path depfilePath = {}; @@ -153,7 +194,6 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted }; // Forward declaration for SCompilerOptions use - struct CCache; /* @stage shaderStage, can be ESS_ALL_OR_LIBRARY to make multi-entrypoint shaders @targetSpirvVersion spirv version @@ -185,15 +225,20 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted SPreprocessorOptions preprocessorOptions = {}; CCache* readCache = nullptr; CCache* writeCache = nullptr; + bool* cacheHit = nullptr; + const std::vector* dependencyOverrides = nullptr; }; + static std::string applyForceIncludes(std::string_view code, std::span forceIncludes); + static bool probeShaderCache(const CCache* cache, std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder); + class CCache final : public IReferenceCounted { friend class IShaderCompiler; public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.1.0"; + constexpr static inline std::string_view VERSION = "1.2.1"; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 @@ -201,40 +246,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { friend class CCache; - struct SPreprocessingDependency - { - public: - // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this - inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) : - requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash) - {} - - inline SPreprocessingDependency(SPreprocessingDependency&) = default; - inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete; - inline SPreprocessingDependency(SPreprocessingDependency&&) = default; - inline SPreprocessingDependency& operator=(SPreprocessingDependency&&) = default; - - // Needed for json vector serialization. Making it private and declaring from_json(_, SEntry&) as friend didn't work - inline SPreprocessingDependency() {} - - inline const system::path& getRequestingSourceDir() const { return requestingSourceDir; } - inline std::string_view getIdentifier() const { return identifier; } - inline bool isStandardInclude() const { return standardInclude; } - - private: - friend void to_json(nlohmann::json& j, const SEntry::SPreprocessingDependency& dependency); - friend void from_json(const nlohmann::json& j, SEntry::SPreprocessingDependency& dependency); - friend class CCache; - - // path or identifier - system::path requestingSourceDir = ""; - std::string identifier = ""; - // hash of the contents - used to check against a found_t - core::blake3_hash_t hash = {}; - // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` - bool standardInclude = false; - - }; + using SPreprocessingDependency = IShaderCompiler::SPreprocessingDependency; struct SCompilerArgs; // Forward declaration for SPreprocessorArgs's friend declaration @@ -251,6 +263,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted if (definesIt->identifier != otherDefinesIt->identifier || definesIt->definition != otherDefinesIt->definition) return false; + if (forceIncludes != other.forceIncludes) return false; + return true; } @@ -275,11 +289,15 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted for (auto define : options.extraDefines) extraDefines.emplace_back(std::string(define.identifier), std::string(define.definition)); + for (const auto& inc : options.forceIncludes) + forceIncludes.emplace_back(inc); + // Sort them so equality and hashing are well defined std::sort(extraDefines.begin(), extraDefines.end(), [](const SMacroDefinition& lhs, const SMacroDefinition& rhs) {return lhs.identifier < rhs.identifier; }); }; std::string sourceIdentifier; std::vector extraDefines; + std::vector forceIncludes; }; // TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and decompressShader struct SCompilerArgs final @@ -351,6 +369,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted hashable.insert(hashable.end(), defines.identifier.begin(), defines.identifier.end()); hashable.insert(hashable.end(), defines.definition.begin(), defines.definition.end()); } + for (const auto& inc : compilerArgs.preprocessorArgs.forceIncludes) + hashable.insert(hashable.end(), inc.begin(), inc.end()); // Insert rest of stuff from this struct. We're going to treat stage, targetSpirvVersion and debugInfoFlags.value as byte arrays for simplicity hashable.insert(hashable.end(), reinterpret_cast(&compilerArgs.stage), reinterpret_cast(&compilerArgs.stage) + sizeof(compilerArgs.stage)); @@ -400,6 +420,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline void insert(SEntry&& entry) { + if (auto found = m_container.find(entry); found != m_container.end()) + m_container.erase(found); m_container.insert(std::move(entry)); } @@ -420,6 +442,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted } NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; + NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder) const; + NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry) const; + NBL_API2 core::smart_refctd_ptr decompressEntry(const SEntry& entry) const; inline CCache() {} @@ -453,6 +478,79 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; }; + class CPreprocessCache final : public IReferenceCounted + { + public: + constexpr static inline std::string_view VERSION = "2.0"; + + struct SEntry + { + core::blake3_hash_t prefixHash = {}; + std::string preprocessedPrefix; + std::vector macroDefs; + std::vector dxcFlags; + uint32_t pragmaStage = static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN); + CCache::SEntry::dependency_container_t dependencies; + }; + + enum class ELoadStatus : uint8_t + { + Missing, + Invalid, + Loaded + }; + + enum class EProbeStatus : uint8_t + { + Hit, + NoPrefix, + Missing, + Invalid, + PrefixChanged, + DependenciesChanged, + EntryInvalid + }; + + struct SProbeResult + { + std::string codeStorage; + std::string_view prefix = {}; + std::string_view body = {}; + core::blake3_hash_t prefixHash = {}; + EProbeStatus status = EProbeStatus::EntryInvalid; + bool hasPrefix = false; + bool cacheHit = false; + }; + + inline bool hasEntry() const { return m_hasEntry; } + inline const SEntry& getEntry() const { return m_entry; } + inline void setEntry(SEntry&& entry) { m_entry = std::move(entry); m_hasEntry = true; } + + NBL_API2 core::smart_refctd_ptr serialize() const; + NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache); + NBL_API2 static core::smart_refctd_ptr loadFromFile(const system::path& path, ELoadStatus& status); + NBL_API2 static bool writeToFile(const system::path& path, const CPreprocessCache& cache); + NBL_API2 static SProbeResult probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions); + NBL_API2 static const char* getProbeReason(EProbeStatus status); + NBL_API2 bool validateDependencies(const CIncludeFinder* finder) const; + NBL_API2 std::string buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const; + + private: + bool m_hasEntry = false; + SEntry m_entry; + }; + + struct SPreprocessCacheResult + { + bool ok = true; + bool cacheUsed = false; + bool cacheHit = false; + bool cacheUpdated = false; + CPreprocessCache::EProbeStatus status = CPreprocessCache::EProbeStatus::EntryInvalid; + IShader::E_SHADER_STAGE stage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; + std::string code; + }; + struct DepfileWriteParams { system::ISystem* system = nullptr; @@ -465,6 +563,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted static bool writeDepfile(const DepfileWriteParams& params, const CCache::SEntry::dependency_container_t& dependencies, const CIncludeFinder* includeFinder = nullptr, system::logger_opt_ptr logger = nullptr); core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; + SPreprocessCacheResult preprocessWithCache(std::string_view code, IShader::E_SHADER_STAGE stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache& cache, CPreprocessCache::ELoadStatus loadStatus, std::string_view sourceIdentifier) const; inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const { @@ -593,6 +692,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted virtual void insertIntoStart(std::string& code, std::ostringstream&& ins) const = 0; virtual core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const SCompilerOptions& options, std::vector* dependencies) const = 0; + virtual bool preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const; core::smart_refctd_ptr m_system; diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index a593a11597..7f9763f5c4 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -136,6 +136,9 @@ CGLSLCompiler::CGLSLCompiler(core::smart_refctd_ptr&& system) std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); + if (!preprocessOptions.extraDefines.empty()) { std::ostringstream insertion; diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 1020fa9446..4132fe6fe6 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -363,7 +364,7 @@ namespace nbl::wave extern nbl::core::string preprocess(std::string& code, const IShaderCompiler::SPreprocessorOptions& preprocessOptions, bool withCaching, std::function post); } -std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies) const +std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies, std::vector* macro_defs) const { const bool depfileEnabled = preprocessOptions.depfile; if (depfileEnabled) @@ -375,6 +376,9 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE } } + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); + std::vector localDependencies; auto* dependenciesOut = dependencies; if (depfileEnabled && !dependenciesOut) @@ -395,7 +399,7 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE // preprocess core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependenciesOut), - [&dxc_compile_flags_override, &stage, &dependenciesOut](nbl::wave::context& context) -> void + [&dxc_compile_flags_override, &stage, &dependenciesOut, macro_defs](nbl::wave::context& context) -> void { if (context.get_hooks().m_dxc_compile_flags_override.size() != 0) dxc_compile_flags_override = context.get_hooks().m_dxc_compile_flags_override; @@ -406,6 +410,8 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (dependenciesOut) *dependenciesOut = std::move(context.get_dependencies()); + if (macro_defs) + context.dump_macro_definitions(*macro_defs); } ); @@ -444,7 +450,24 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { std::vector extra_dxc_compile_flags = {}; - return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags, dependencies); + return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags, dependencies, nullptr); +} + +bool CHLSLCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const +{ + outEntry = {}; + std::vector deps; + std::vector dxcFlags; + std::vector macroDefs; + auto text = preprocessShader(std::string(code), stage, preprocessOptions, dxcFlags, &deps, ¯oDefs); + if (text.empty()) + return false; + outEntry.preprocessedPrefix = std::move(text); + outEntry.dependencies = std::move(deps); + outEntry.dxcFlags = std::move(dxcFlags); + outEntry.macroDefs = std::move(macroDefs); + outEntry.pragmaStage = static_cast(stage); + return true; } core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const @@ -459,7 +482,11 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st std::vector dxc_compile_flags = {}; IShader::E_SHADER_STAGE stage = options.stage; + using clock_t = std::chrono::high_resolution_clock; + const auto preprocessStart = clock_t::now(); auto newCode = preprocessShader(std::string(code), stage, hlslOptions.preprocessorOptions, dxc_compile_flags, dependencies); + const auto preprocessEnd = clock_t::now(); + logger.log("Preprocess took: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(preprocessEnd - preprocessStart).count())); if (newCode.empty()) return nullptr; // Suffix is the shader model version @@ -543,6 +570,7 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st for (size_t i = 0; i < argc; i++) argsArray[i] = arguments[i].c_str(); + const auto compileStart = clock_t::now(); auto compileResult = dxcCompile( this, m_dxcCompilerTypes, @@ -551,6 +579,9 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st argc, hlslOptions ); + const auto compileEnd = clock_t::now(); + logger.log("Compile took: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(compileEnd - compileStart).count())); + logger.log("Total build time: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(compileEnd - preprocessStart).count())); if (argsArray) delete[] argsArray; diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index a2165972e5..456aadb4ac 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -70,14 +70,24 @@ namespace nbl::wave stream << i->get_value(); resolvedString = stream.str(); } - catch (boost::wave::preprocess_exception& e) + catch (const boost::wave::cpp_exception& e) { - preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]",system::ILogger::ELL_ERROR,e.what(),e.description(),e.file_name(),e.line_no(),e.column_no()); + preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]", system::ILogger::ELL_ERROR, e.what(), e.description(), e.file_name(), e.line_no(), e.column_no()); + return {}; + } + catch (const boost::wave::cpplexer::lexing_exception& e) + { + preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]", system::ILogger::ELL_ERROR, e.what(), e.description(), e.file_name(), e.line_no(), e.column_no()); + return {}; + } + catch (const std::exception& e) + { + preprocessOptions.logger.log("Exception caught. %s", system::ILogger::ELL_ERROR, e.what()); return {}; } catch (...) { - preprocessOptions.logger.log("Unknown exception caught!",system::ILogger::ELL_ERROR); + preprocessOptions.logger.log("Unknown exception caught!", system::ILogger::ELL_ERROR); return {}; } @@ -85,4 +95,4 @@ namespace nbl::wave return resolvedString; } -} \ No newline at end of file +} diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index a6cd95b441..50f2abd4cb 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -4,12 +4,14 @@ #include "nbl/asset/utils/IShaderCompiler.h" #include "nbl/asset/utils/shadercUtils.h" #include "nbl/asset/utils/shaderCompiler_serialization.h" +#include "nbl/core/hash/blake.h" #include #include #include #include #include +#include #include #include @@ -17,6 +19,87 @@ using namespace nbl; using namespace nbl::asset; +namespace +{ + void splitPrefix(std::string_view code, std::string_view& prefix, std::string_view& body) + { + size_t pos = 0; + size_t prefixEnd = 0; + bool inContinuation = false; + bool inBlockComment = false; + + while (pos < code.size()) + { + const size_t lineStart = pos; + size_t lineEnd = code.find('\n', pos); + if (lineEnd == std::string_view::npos) + lineEnd = code.size(); + + std::string_view line = code.substr(lineStart, lineEnd - lineStart); + if (!line.empty() && line.back() == '\r') + line.remove_suffix(1); + + bool directiveLine = false; + if (inContinuation || inBlockComment) + { + directiveLine = true; + } + else + { + size_t i = 0; + if (line.size() >= 3 && static_cast(line[0]) == 0xEF && + static_cast(line[1]) == 0xBB && static_cast(line[2]) == 0xBF) + i = 3; + while (i < line.size() && (line[i] == ' ' || line[i] == '\t')) + ++i; + if (i == line.size()) + { + directiveLine = true; + } + else if (line[i] == '#') + { + directiveLine = true; + } + else if (line[i] == '/' && i + 1 < line.size() && line[i + 1] == '/') + { + directiveLine = true; + } + else if (line[i] == '/' && i + 1 < line.size() && line[i + 1] == '*') + { + directiveLine = true; + if (line.find("*/", i + 2) == std::string_view::npos) + inBlockComment = true; + } + } + + if (!directiveLine) + break; + + prefixEnd = lineEnd < code.size() ? lineEnd + 1 : lineEnd; + + if (inBlockComment && line.find("*/") != std::string_view::npos) + inBlockComment = false; + + bool continuation = false; + if (!line.empty()) + { + size_t j = line.size(); + while (j > 0 && (line[j - 1] == ' ' || line[j - 1] == '\t')) + --j; + if (j > 0 && line[j - 1] == '\\') + continuation = true; + } + inContinuation = continuation; + if (lineEnd == code.size()) + break; + pos = lineEnd + 1; + } + + prefix = code.substr(0, prefixEnd); + body = code.substr(prefixEnd); + } +} + IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& system) : m_system(std::move(system)) { @@ -233,6 +316,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons { const bool depfileEnabled = options.preprocessorOptions.depfile; const bool supportsDependencies = options.getCodeContentType() == IShader::E_CONTENT_TYPE::ECT_HLSL; + const auto* dependencyOverrides = options.dependencyOverrides; auto writeDepfileFromDependencies = [&](const CCache::SEntry::dependency_container_t& dependencies) -> bool { @@ -255,15 +339,21 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); }; + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; CCache::SEntry entry; if (options.readCache || options.writeCache) - entry = CCache::SEntry(code, options); + entry = CCache::SEntry(cacheCode, options); + + if (options.cacheHit) + *options.cacheHit = false; if (options.readCache) { auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); if (found != options.readCache->m_container.end()) { + if (options.cacheHit) + *options.cacheHit = true; if (options.writeCache) { CCache::SEntry writeEntry = *found; @@ -278,10 +368,13 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons CCache::SEntry::dependency_container_t depfileDependencies; CCache::SEntry::dependency_container_t* dependenciesPtr = nullptr; - if (options.writeCache) - dependenciesPtr = &entry.dependencies; - else if (depfileEnabled && supportsDependencies) - dependenciesPtr = &depfileDependencies; + if (!dependencyOverrides) + { + if (options.writeCache) + dependenciesPtr = &entry.dependencies; + else if (depfileEnabled && supportsDependencies) + dependenciesPtr = &depfileDependencies; + } auto retVal = compileToSPIRV_impl(code, options, dependenciesPtr); if (retVal) @@ -290,9 +383,17 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); } + if (retVal && options.writeCache && dependencyOverrides) + { + entry.dependencies.clear(); + entry.dependencies.reserve(dependencyOverrides->size()); + for (const auto& dep : *dependencyOverrides) + entry.dependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash()); + } + if (retVal && depfileEnabled && supportsDependencies) { - const auto* deps = options.writeCache ? &entry.dependencies : &depfileDependencies; + const auto* deps = dependencyOverrides ? dependencyOverrides : (options.writeCache ? &entry.dependencies : &depfileDependencies); if (!writeDepfileFromDependencies(*deps)) return nullptr; } @@ -513,6 +614,27 @@ core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntr return found->decompressShader(); } +bool IShaderCompiler::CCache::contains(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +{ + return find_impl(mainFile, finder) != m_container.end(); +} + +bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry) const +{ + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + const CCache::SEntry entry(cacheCode, options); + const auto found = find_impl(entry, finder); + if (found == m_container.end()) + return false; + outEntry = SEntry(*found); + return true; +} + +core::smart_refctd_ptr IShaderCompiler::CCache::decompressEntry(const SEntry& entry) const +{ + return entry.decompressShader(); +} + IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { auto found = m_container.find(mainFile); @@ -636,6 +758,485 @@ core::smart_refctd_ptr IShaderCompiler::CCache::deseria return retVal; } +static std::string normalizeLinePath(std::string_view path) +{ + std::string out(path); + std::replace(out.begin(), out.end(), '\\', '/'); + return out; +} + +std::string IShaderCompiler::applyForceIncludes(std::string_view code, std::span forceIncludes) +{ + if (forceIncludes.empty()) + return std::string(code); + + size_t reserveSize = code.size(); + for (const auto& inc : forceIncludes) + reserveSize += inc.size() + 16; + + std::string out; + out.reserve(reserveSize); + for (const auto& inc : forceIncludes) + { + const auto incPath = std::filesystem::path(inc).generic_string(); + out.append("#include \""); + out.append(incPath); + out.append("\"\n"); + } + out.append(code); + return out; +} + +bool IShaderCompiler::probeShaderCache(const CCache* cache, std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder) +{ + if (!cache) + return false; + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + const CCache::SEntry entry(cacheCode, options); + return cache->contains(entry, finder); +} + +bool IShaderCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const +{ + outEntry = {}; + std::vector deps; + auto text = preprocessShader(std::string(code), stage, preprocessOptions, &deps); + if (text.empty()) + return false; + outEntry.preprocessedPrefix = std::move(text); + outEntry.dependencies = std::move(deps); + outEntry.pragmaStage = static_cast(stage); + return true; +} + +IShaderCompiler::CPreprocessCache::SProbeResult IShaderCompiler::CPreprocessCache::probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions) +{ + SProbeResult result = {}; + const CIncludeFinder* finder = preprocessOptions.includeFinder; + std::string_view codeToSplit = code; + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + { + result.codeStorage = applyForceIncludes(code, preprocessOptions.forceIncludes); + codeToSplit = result.codeStorage; + } + splitPrefix(codeToSplit, result.prefix, result.body); + result.hasPrefix = !result.prefix.empty(); + if (!result.hasPrefix) + { + result.status = EProbeStatus::NoPrefix; + result.cacheHit = false; + return result; + } + + { + core::blake3_hasher hasher; + hasher.update(result.prefix.data(), result.prefix.size()); + result.prefixHash = static_cast(hasher); + } + const bool hasEntry = cache && cache->hasEntry(); + if (!hasEntry) + { + result.cacheHit = false; + if (loadStatus == ELoadStatus::Missing) + result.status = EProbeStatus::Missing; + else if (loadStatus == ELoadStatus::Invalid) + result.status = EProbeStatus::Invalid; + else + result.status = EProbeStatus::EntryInvalid; + return result; + } + + const bool prefixMatch = cache->getEntry().prefixHash == result.prefixHash; + const bool depsValid = cache->validateDependencies(finder); + if (prefixMatch && depsValid) + { + result.cacheHit = true; + result.status = EProbeStatus::Hit; + return result; + } + + result.cacheHit = false; + if (!prefixMatch) + result.status = EProbeStatus::PrefixChanged; + else if (!depsValid) + result.status = EProbeStatus::DependenciesChanged; + else + result.status = EProbeStatus::EntryInvalid; + + return result; +} + +const char* IShaderCompiler::CPreprocessCache::getProbeReason(EProbeStatus status) +{ + switch (status) + { + case EProbeStatus::Missing: + return "cache file missing; first build, cleaned, output moved, or out of date"; + case EProbeStatus::Invalid: + return "cache file invalid or version mismatch"; + case EProbeStatus::PrefixChanged: + return "prefix changed; cache invalidated"; + case EProbeStatus::DependenciesChanged: + return "dependencies changed; cache invalidated"; + case EProbeStatus::EntryInvalid: + return "cache entry invalid"; + case EProbeStatus::NoPrefix: + return "no prefix"; + case EProbeStatus::Hit: + return "hit"; + default: + return "unknown"; + } +} + +IShaderCompiler::SPreprocessCacheResult IShaderCompiler::preprocessWithCache(std::string_view code, IShader::E_SHADER_STAGE stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache& cache, CPreprocessCache::ELoadStatus loadStatus, std::string_view sourceIdentifier) const +{ + SPreprocessCacheResult result = {}; + result.stage = stage; + + const auto probe = CPreprocessCache::probe(code, &cache, loadStatus, preprocessOptions); + result.status = probe.status; + if (!probe.hasPrefix) + return result; + + if (probe.cacheHit) + { + result.cacheHit = true; + result.cacheUsed = true; + } + else + { + CPreprocessCache::SEntry entry; + IShader::E_SHADER_STAGE prefixStage = stage; + SPreprocessorOptions preCacheOpt = preprocessOptions; + preCacheOpt.depfile = false; + if (!preprocessPrefixForCache(probe.prefix, prefixStage, preCacheOpt, entry)) + { + result.ok = false; + return result; + } + entry.prefixHash = probe.prefixHash; + entry.pragmaStage = static_cast(prefixStage); + cache.setEntry(std::move(entry)); + result.cacheUsed = true; + result.cacheUpdated = true; + } + + if (!cache.hasEntry()) + { + result.ok = false; + return result; + } + + result.code = cache.buildCombinedCode(probe.body, sourceIdentifier); + if (result.code.empty()) + { + result.ok = false; + return result; + } + + const auto& entry = cache.getEntry(); + if (entry.pragmaStage != static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN)) + result.stage = static_cast(entry.pragmaStage); + + return result; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize() const +{ + if (!m_hasEntry) + return nullptr; + + auto write_bytes = [](std::vector& out, const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](std::vector& out, uint32_t value) + { + write_bytes(out, &value, sizeof(value)); + }; + auto write_string = [&write_u32, &write_bytes](std::vector& out, std::string_view value) + { + write_u32(out, static_cast(value.size())); + if (!value.empty()) + write_bytes(out, value.data(), value.size()); + }; + + std::vector out; + out.reserve(m_entry.preprocessedPrefix.size() + 256); + const uint32_t magic = 0x50435250u; + write_u32(out, magic); + write_string(out, VERSION); + write_bytes(out, &m_entry.prefixHash, sizeof(m_entry.prefixHash)); + write_u32(out, m_entry.pragmaStage); + write_string(out, m_entry.preprocessedPrefix); + + write_u32(out, static_cast(m_entry.macroDefs.size())); + for (const auto& macro : m_entry.macroDefs) + write_string(out, macro); + + write_u32(out, static_cast(m_entry.dxcFlags.size())); + for (const auto& flag : m_entry.dxcFlags) + write_string(out, flag); + + write_u32(out, static_cast(m_entry.dependencies.size())); + for (const auto& dep : m_entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + write_string(out, dir); + write_string(out, dep.getIdentifier()); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(out, &standardInclude, sizeof(standardInclude)); + write_bytes(out, dep.getHash().data, sizeof(dep.getHash().data)); + } + + auto buffer = ICPUBuffer::create({ out.size() }); + if (!buffer) + return nullptr; + std::memcpy(buffer->getPointer(), out.data(), out.size()); + return buffer; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::deserialize(const std::span serializedCache) +{ + if (serializedCache.empty()) + return nullptr; + + auto read_bytes = [](const std::span data, size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](const std::span data, size_t& offset, uint32_t& out) -> bool + { + return read_bytes(data, offset, &out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](const std::span data, size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(data, offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = 0; + uint32_t magic = 0; + if (!read_u32(serializedCache, offset, magic)) + return nullptr; + if (magic != 0x50435250u) + return nullptr; + + std::string version; + if (!read_string(serializedCache, offset, version)) + return nullptr; + if (version != VERSION) + return nullptr; + + auto retVal = core::make_smart_refctd_ptr(); + auto& entry = retVal->m_entry; + if (!read_bytes(serializedCache, offset, &entry.prefixHash, sizeof(entry.prefixHash))) + return nullptr; + if (!read_u32(serializedCache, offset, entry.pragmaStage)) + return nullptr; + if (!read_string(serializedCache, offset, entry.preprocessedPrefix)) + return nullptr; + + uint32_t macroCount = 0; + if (!read_u32(serializedCache, offset, macroCount)) + return nullptr; + entry.macroDefs.clear(); + entry.macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(serializedCache, offset, macro)) + return nullptr; + entry.macroDefs.emplace_back(std::move(macro)); + } + + uint32_t flagCount = 0; + if (!read_u32(serializedCache, offset, flagCount)) + return nullptr; + entry.dxcFlags.clear(); + entry.dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(serializedCache, offset, flag)) + return nullptr; + entry.dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(serializedCache, offset, depCount)) + return nullptr; + entry.dependencies.clear(); + entry.dependencies.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + if (!read_string(serializedCache, offset, dir)) + return nullptr; + if (!read_string(serializedCache, offset, identifier)) + return nullptr; + uint8_t standardInclude = 0; + if (!read_bytes(serializedCache, offset, &standardInclude, sizeof(standardInclude))) + return nullptr; + core::blake3_hash_t hash = {}; + if (!read_bytes(serializedCache, offset, hash.data, sizeof(hash.data))) + return nullptr; + entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash); + } + + retVal->m_hasEntry = true; + return retVal; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::loadFromFile(const system::path& path, ELoadStatus& status) +{ + status = ELoadStatus::Missing; + if (!std::filesystem::exists(path)) + return nullptr; + + std::ifstream in(path, std::ios::binary); + if (!in) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + in.seekg(0, std::ios::end); + const auto size = static_cast(in.tellg()); + in.seekg(0, std::ios::beg); + if (!size) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + std::vector data(size); + if (!in.read(reinterpret_cast(data.data()), data.size())) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + auto cache = deserialize(std::span(data.data(), data.size())); + if (!cache) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + status = ELoadStatus::Loaded; + return cache; +} + +bool IShaderCompiler::CPreprocessCache::writeToFile(const system::path& path, const CPreprocessCache& cache) +{ + auto buffer = cache.serialize(); + if (!buffer) + return false; + + const auto parent = path.parent_path(); + if (!parent.empty() && !std::filesystem::exists(parent)) + std::filesystem::create_directories(parent); + + std::ofstream out(path, std::ios::binary | std::ios::trunc); + if (!out) + return false; + + out.write(reinterpret_cast(buffer->getPointer()), buffer->getSize()); + return bool(out); +} + +bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder) const +{ + if (!m_hasEntry || !finder) + return false; + + for (const auto& dep : m_entry.dependencies) + { + IIncludeLoader::found_t header; + if (dep.isStandardInclude()) + header = finder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + else + header = finder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + + if (!header || header.hash != dep.getHash()) + return false; + } + return true; +} + +std::string IShaderCompiler::CPreprocessCache::buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const +{ + if (!m_hasEntry) + return std::string(body); + + std::string out; + size_t reserve = m_entry.preprocessedPrefix.size() + body.size(); + for (const auto& m : m_entry.macroDefs) + reserve += m.size() + 16; + for (const auto& f : m_entry.dxcFlags) + reserve += f.size() + 1; + reserve += 64; + out.reserve(reserve); + + if (!m_entry.dxcFlags.empty()) + { + out.append("#pragma dxc_compile_flags "); + for (size_t i = 0; i < m_entry.dxcFlags.size(); ++i) + { + if (i) + out.push_back(' '); + out.append(m_entry.dxcFlags[i]); + } + out.push_back('\n'); + } + + if (!m_entry.preprocessedPrefix.empty()) + { + out.append(m_entry.preprocessedPrefix); + if (out.back() != '\n') + out.push_back('\n'); + } + + for (const auto& macro : m_entry.macroDefs) + { + const auto eq = macro.find('='); + std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) + { + out.push_back(' '); + out.append(def); + } + out.push_back('\n'); + } + + if (!sourceIdentifier.empty()) + { + out.append("#line 1 \""); + out.append(normalizeLinePath(sourceIdentifier)); + out.append("\"\n"); + } + + out.append(body); + return out; +} + static void* SzAlloc(ISzAllocPtr p, size_t size) { p = p; return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); } static void SzFree(ISzAllocPtr p, void* address) { p = p; _NBL_ALIGNED_FREE(address); } diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index 6ad33a2ff5..094bba1584 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -35,6 +35,7 @@ inline void to_json(json& j, const SEntry::SPreprocessorArgs& preprocArgs) j = json{ { "sourceIdentifier", preprocArgs.sourceIdentifier }, { "extraDefines", preprocArgs.extraDefines}, + { "forceIncludes", preprocArgs.forceIncludes}, }; } @@ -42,6 +43,7 @@ inline void from_json(const json& j, SEntry::SPreprocessorArgs& preprocArgs) { j.at("sourceIdentifier").get_to(preprocArgs.sourceIdentifier); j.at("extraDefines").get_to(preprocArgs.extraDefines); + j.at("forceIncludes").get_to(preprocArgs.forceIncludes); } // Optimizer pass has its own method for easier vector serialization @@ -193,4 +195,4 @@ inline void from_json(const json& j, SEntry& entry) } } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index f6c0014e39..3d7ce6933d 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -9,6 +9,8 @@ #include #include +#include + #include "nbl/asset/utils/IShaderCompiler.h" namespace nbl::wave @@ -280,6 +282,70 @@ class context : private boost::noncopyable { macros.reset_macromap(); macros.init_predefined_macros(); } + void dump_macro_definitions(std::vector& out) const + { + out.clear(); + std::vector names; + names.reserve(std::distance(macro_names_begin(), macro_names_end())); + for (auto it = macro_names_begin(); it != macro_names_end(); ++it) + names.emplace_back(util::to_string(*it)); + std::sort(names.begin(), names.end()); + for (const auto& name : names) + { + bool has_params = false; + bool is_predefined = false; + position_type pos; + std::vector parameters; + token_sequence_type definition; + if (!get_macro_definition(name, has_params, is_predefined, pos, parameters, definition)) + continue; + if (is_predefined) + continue; + if (name.size() >= 2 && name[0] == '_' && name[1] == '_') + continue; + + std::string params_str; + if (has_params) + { + bool first_param = true; + for (const auto& tok : parameters) + { + auto tok_str = util::to_string(tok.get_value()); + if (tok_str == ",") + continue; + if (!first_param) + params_str.append(", "); + params_str.append(tok_str); + first_param = false; + } + } + + std::string def_str; + std::string prev_tok; + for (const auto& tok : definition) + { + auto tok_str = util::to_string(tok.get_value()); + if (!def_str.empty()) + { + if (!(prev_tok == "__VA_OPT__" && tok_str == "(")) + def_str.push_back(' '); + } + def_str.append(tok_str); + prev_tok = std::move(tok_str); + } + + std::string full = name; + if (has_params) + { + full.push_back('('); + full.append(params_str); + full.push_back(')'); + } + full.push_back('='); + full.append(def_str); + out.push_back(std::move(full)); + } + } // Iterate over names of defined macros typedef boost::wave::util::macromap macromap_type; @@ -570,4 +636,4 @@ template<> inline bool boost::wave::impl::pp_iterator_functor #include #include -#include +#include #include #include +#include +#include #include #include "nbl/asset/metadata/CHLSLMetadata.h" +#include "nbl/asset/utils/shaderCompiler_serialization.h" +#include "nbl/core/hash/fnv1a64.h" #include "nlohmann/json.hpp" using json = nlohmann::json; @@ -166,6 +170,7 @@ class ShaderCompiler final : public IApplicationFramework { const auto rawArgs = std::vector(argv.begin(), argv.end()); const auto expandedArgs = expandJoinedArgs(rawArgs); + m_logger = make_smart_refctd_ptr(bitflag(ILogger::ELL_ALL)); argparse::ArgumentParser program("nsc"); program.add_argument("--dump-build-info").default_value(false).implicit_value(true); @@ -181,6 +186,10 @@ class ShaderCompiler final : public IApplicationFramework program.add_argument("-nolog").default_value(false).implicit_value(true); program.add_argument("-quiet").default_value(false).implicit_value(true); program.add_argument("-verbose").default_value(false).implicit_value(true); + program.add_argument("-shader-cache").default_value(false).implicit_value(true); + program.add_argument("-shader-cache-file").default_value(std::string{}); + program.add_argument("-preprocess-cache").default_value(false).implicit_value(true); + program.add_argument("-preprocess-cache-file").default_value(std::string{}); std::vector unknownArgs; try @@ -189,36 +198,46 @@ class ShaderCompiler final : public IApplicationFramework } catch (const std::runtime_error& err) { - std::cerr << err.what() << std::endl << program; + std::ostringstream usage; + usage << program; + if (m_logger) + m_logger->log("%s\n%s", ILogger::ELL_ERROR, err.what(), usage.str().c_str()); return false; } - if (program.get("--dump-build-info")) - { - dumpBuildInfo(program); - std::exit(0); - } - if (!isAPILoaded()) { - std::cerr << "Could not load Nabla API, terminating!"; + if (m_logger) + m_logger->log("Could not load Nabla API, terminating!", ILogger::ELL_ERROR); return false; } m_system = system ? std::move(system) : IApplicationFramework::createSystem(); if (!m_system) + { + if (m_logger) + m_logger->log("Failed to create system.", ILogger::ELL_ERROR); return false; + } + + if (program.get("--dump-build-info")) + { + dumpBuildInfo(program); + std::exit(0); + } if (rawArgs.size() < 2) { - std::cerr << "Insufficient arguments.\n"; + if (m_logger) + m_logger->log("Insufficient arguments.", ILogger::ELL_ERROR); return false; } const std::string fileToCompile = rawArgs.back(); if (!m_system->exists(fileToCompile, IFileBase::ECF_READ)) { - std::cerr << "Input shader file does not exist: " << fileToCompile << "\n"; + if (m_logger) + m_logger->log("Input shader file does not exist: %s", ILogger::ELL_ERROR, fileToCompile.c_str()); return false; } @@ -229,24 +248,40 @@ class ShaderCompiler final : public IApplicationFramework if (hasFc == hasFo) { if (hasFc) - std::cerr << "Invalid arguments. Passed both -Fo and -Fc.\n"; + { + if (m_logger) + m_logger->log("Invalid arguments. Passed both -Fo and -Fc.", ILogger::ELL_ERROR); + } else - std::cerr << "Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.\n"; + { + if (m_logger) + m_logger->log("Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.", ILogger::ELL_ERROR); + } return false; } const std::string outputFilepath = hasFc ? program.get("-Fc") : program.get("-Fo"); if (outputFilepath.empty()) { - std::cerr << "Invalid output file path.\n"; + if (m_logger) + m_logger->log("Invalid output file path.", ILogger::ELL_ERROR); return false; } const bool quiet = program.get("-quiet"); const bool verbose = program.get("-verbose"); + bool shaderCacheEnabled = program.get("-shader-cache"); + const std::string shaderCachePathOverride = program.is_used("-shader-cache-file") ? program.get("-shader-cache-file") : std::string{}; + if (!shaderCachePathOverride.empty()) + shaderCacheEnabled = true; + bool preprocessCacheEnabled = program.get("-preprocess-cache"); + const std::string preprocessCachePathOverride = program.is_used("-preprocess-cache-file") ? program.get("-preprocess-cache-file") : std::string{}; + if (!preprocessCachePathOverride.empty()) + preprocessCacheEnabled = true; if (quiet && verbose) { - std::cerr << "Invalid arguments. Passed both -quiet and -verbose.\n"; + if (m_logger) + m_logger->log("Invalid arguments. Passed both -quiet and -verbose.", ILogger::ELL_ERROR); return false; } @@ -254,7 +289,8 @@ class ShaderCompiler final : public IApplicationFramework const std::string logPathOverride = program.is_used("-log") ? program.get("-log") : std::string{}; if (noLog && !logPathOverride.empty()) { - std::cerr << "Invalid arguments. Passed both -nolog and -log.\n"; + if (m_logger) + m_logger->log("Invalid arguments. Passed both -nolog and -log.", ILogger::ELL_ERROR); return false; } @@ -269,6 +305,38 @@ class ShaderCompiler final : public IApplicationFramework m_arguments = std::move(unknownArgs); if (!m_arguments.empty() && m_arguments.back() == fileToCompile) m_arguments.pop_back(); + if (!m_arguments.empty()) + { + std::vector filteredArgs; + for (size_t i = 0; i < m_arguments.size(); ++i) + { + const auto& arg = m_arguments[i]; + if (arg == "-FI" || arg == "-include" || arg == "/FI") + { + if (i + 1 >= m_arguments.size()) + { + if (m_logger) + m_logger->log("Missing argument for %s.", ILogger::ELL_ERROR, arg.c_str()); + return false; + } + m_force_includes.push_back(m_arguments[i + 1]); + ++i; + continue; + } + if ((arg.rfind("-FI", 0) == 0 || arg.rfind("/FI", 0) == 0) && arg.size() > 3) + { + m_force_includes.push_back(arg.substr(3)); + continue; + } + if (arg.rfind("-include", 0) == 0 && arg.size() > 8) + { + m_force_includes.push_back(arg.substr(8)); + continue; + } + filteredArgs.push_back(arg); + } + m_arguments = std::move(filteredArgs); + } bool noNblBuiltins = program.get("-no-nbl-builtins"); if (noNblBuiltins) @@ -285,6 +353,18 @@ class ShaderCompiler final : public IApplicationFramework if (dep.enabled && dep.path.empty()) dep.path = outputFilepath + ".dep"; + ShaderCacheConfig shaderCache; + shaderCache.enabled = shaderCacheEnabled && !preprocessOnly; + shaderCache.verbose = verbose; + if (shaderCache.enabled) + shaderCache.path = shaderCachePathOverride.empty() ? makeCachePath(outputFilepath) : std::filesystem::path(shaderCachePathOverride); + + PreprocessCacheConfig preCache; + preCache.enabled = preprocessCacheEnabled && !preprocessOnly; + preCache.verbose = verbose; + if (preCache.enabled) + preCache.path = preprocessCachePathOverride.empty() ? makePreprocessCachePath(outputFilepath) : std::filesystem::path(preprocessCachePathOverride); + #ifndef NBL_EMBED_BUILTIN_RESOURCES if (!noNblBuiltins) { @@ -328,11 +408,15 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Output: %s", ILogger::ELL_DEBUG, outputFilepath.c_str()); if (dep.enabled) m_logger->log("Depfile: %s", ILogger::ELL_DEBUG, dep.path.c_str()); + if (shaderCache.enabled) + m_logger->log("Cache: %s", ILogger::ELL_DEBUG, shaderCache.path.string().c_str()); + if (preCache.enabled) + m_logger->log("Preprocess cache: %s", ILogger::ELL_DEBUG, preCache.path.string().c_str()); } const char* const action = preprocessOnly ? "Preprocessing" : "Compiling"; const char* const outType = preprocessOnly ? "Preprocessed" : "Compiled"; - m_logger->log("%s %s", ILogger::ELL_INFO, action, fileToCompile.c_str()); + m_logger->log("%s the input file.", ILogger::ELL_INFO, action); auto [shader, shaderStage] = open_shader_file(fileToCompile); if (!shader || shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) @@ -342,7 +426,8 @@ class ShaderCompiler final : public IApplicationFramework } const auto start = std::chrono::high_resolution_clock::now(); - const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, preprocessOnly); + const std::string preprocessedOutputPath = outputFilepath + ".pre.hlsl"; + const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, preprocessedOutputPath, verbose); const auto end = std::chrono::high_resolution_clock::now(); const char* const op = preprocessOnly ? "preprocessing" : "compilation"; @@ -371,30 +456,14 @@ class ShaderCompiler final : public IApplicationFramework } } - std::fstream out(outputFilepath, std::ios::out | std::ios::binary); - if (!out.is_open()) + if (!writeBinaryFile(m_system.get(), std::filesystem::path(outputFilepath), job.view.data(), job.view.size())) { - m_logger->log("Failed to open output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - return false; - } - - out.write(job.view.data(), job.view.size()); - if (out.fail()) - { - m_logger->log("Failed to write to output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - out.close(); - return false; - } - - out.close(); - if (out.fail()) - { - m_logger->log("Failed to close output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + m_logger->log("Failed to write output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); return false; } const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); - m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + m_logger->log("Total took: %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); return true; } @@ -409,6 +478,27 @@ class ShaderCompiler final : public IApplicationFramework std::string path; }; + struct ShaderCacheConfig + { + bool enabled = false; + bool verbose = false; + std::filesystem::path path; + }; + + struct PreprocessCacheConfig + { + bool enabled = false; + bool verbose = false; + std::filesystem::path path; + }; + + enum class CacheLoadStatus : uint8_t + { + Missing, + Invalid, + Loaded + }; + struct RunResult { bool ok = false; @@ -417,6 +507,110 @@ class ShaderCompiler final : public IApplicationFramework std::string_view view; }; + static std::filesystem::path makeCachePath(std::filesystem::path outputPath) + { + outputPath += ".ppcache"; + return outputPath; + } + + static std::filesystem::path makePreprocessCachePath(std::filesystem::path outputPath) + { + outputPath += ".ppcache.pre"; + return outputPath; + } + + static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status) + { + status = CacheLoadStatus::Missing; + if (!system) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + if (!system->exists(path, IFileBase::ECF_READ)) + return nullptr; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + const size_t size = file->getSize(); + if (!size) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + std::vector data(size); + IFile::success_t succ; + file->read(succ, data.data(), 0, size); + if (!succ || succ.getBytesProcessed(true) != size) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + auto cache = IShaderCompiler::CCache::deserialize(std::span(data.data(), data.size())); + if (!cache) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + status = CacheLoadStatus::Loaded; + return cache; + } + + static bool writeBinaryFile(system::ISystem* system, const std::filesystem::path& path, const void* data, size_t size) + { + if (!system) + return false; + + const auto parent = path.parent_path(); + if (!parent.empty() && !std::filesystem::exists(parent)) + std::filesystem::create_directories(parent); + + system->deleteFile(path); + + ISystem::future_t> future; + system->createFile(future, path, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + + IFile::success_t succ; + file->write(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; + } + + static bool writeShaderCache(system::ISystem* system, const std::filesystem::path& path, const IShaderCompiler::CCache& cache) + { + auto buffer = cache.serialize(); + if (!buffer) + return false; + return writeBinaryFile(system, path, buffer->getPointer(), buffer->getSize()); + } + + static std::vector expandJoinedArgs(const std::vector& args) { std::vector out; @@ -445,7 +639,7 @@ class ShaderCompiler final : public IApplicationFramework return out; } - static void dumpBuildInfo(const argparse::ArgumentParser& program) + void dumpBuildInfo(const argparse::ArgumentParser& program) { json j; auto& modules = j["modules"]; @@ -482,60 +676,310 @@ class ShaderCompiler final : public IApplicationFramework oPath = filePath; } - std::ofstream outFile(oPath); - if (!outFile.is_open()) + if (!m_system) { - std::printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); + if (m_logger) + m_logger->log("Failed to create system for writing \"%s\"", ILogger::ELL_ERROR, oPath.string().c_str()); std::exit(-1); } - outFile << pretty; - std::printf("Saved \"%s\"\n", oPath.string().c_str()); + if (!writeBinaryFile(m_system.get(), oPath, pretty.data(), pretty.size())) + { + if (m_logger) + m_logger->log("Failed to write \"%s\"", ILogger::ELL_ERROR, oPath.string().c_str()); + std::exit(-1); + } + + if (m_logger) + m_logger->log("Saved \"%s\"", ILogger::ELL_INFO, oPath.string().c_str()); } - RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const bool preprocessOnly) + RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view preprocessedOutputPath, const bool verbose) { RunResult r; - auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto makeIncludeFinder = [&]() + { + auto finder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto loader = finder->getDefaultFileSystemLoader(); + for (const auto& p : m_include_search_paths) + finder->addSearchPath(p, loader); + return finder; + }; - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - for (const auto& p : m_include_search_paths) - includeFinder->addSearchPath(p, includeLoader); + const char* codePtr = (const char*)shader->getContent()->getPointer(); + std::string_view code(codePtr, std::strlen(codePtr)); + CHLSLCompiler::SPreprocessorOptions preOpt = {}; + preOpt.sourceIdentifier = sourceIdentifier; + preOpt.logger = m_logger.get(); + preOpt.forceIncludes = std::span(m_force_includes); + preOpt.depfile = dep.enabled; + preOpt.depfilePath = dep.path; + preOpt.codeForCache = code; - if (preprocessOnly) + CHLSLCompiler::SOptions opt = {}; + opt.stage = static_cast(shaderStage); + opt.preprocessorOptions = preOpt; + opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); + opt.dxcOptions = std::span(m_arguments); + + auto writeTextFile = [&](std::string_view path, std::string_view contents) -> bool + { + if (path.empty()) + return false; + return writeBinaryFile(m_system.get(), std::filesystem::path(std::string(path)), contents.data(), contents.size()); + }; + + const bool useShaderCache = shaderCache.enabled && !preprocessOnly; + const bool usePreCache = preCache.enabled && !preprocessOnly; + + struct ShaderCacheProbeResult { - CHLSLCompiler::SPreprocessorOptions opt = {}; - opt.sourceIdentifier = sourceIdentifier; - opt.logger = m_logger.get(); - opt.includeFinder = includeFinder.get(); - opt.depfile = dep.enabled; - opt.depfilePath = dep.path; + CacheLoadStatus status = CacheLoadStatus::Missing; + bool hit = false; + bool entryReady = false; + smart_refctd_ptr cacheObj; + IShaderCompiler::CCache::SEntry entry; + std::chrono::nanoseconds duration = {}; + }; - const char* codePtr = (const char*)shader->getContent()->getPointer(); - std::string_view code(codePtr, std::strlen(codePtr)); + struct PreprocessCacheProbeResult + { + bool skipped = false; + bool ok = false; + IShaderCompiler::SPreprocessCacheResult result = {}; + IShaderCompiler::CPreprocessCache::ELoadStatus loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; + smart_refctd_ptr cacheObj; + std::chrono::nanoseconds duration = {}; + }; - r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, opt, nullptr); + ShaderCacheProbeResult shaderProbe; + PreprocessCacheProbeResult preProbe; + using clock_t = std::chrono::high_resolution_clock; + const auto probeStart = clock_t::now(); + + if (useShaderCache) + { + const auto start = clock_t::now(); + auto finder = makeIncludeFinder(); + shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status); + if (!shaderProbe.cacheObj) + shaderProbe.cacheObj = make_smart_refctd_ptr(); + if (shaderProbe.status == CacheLoadStatus::Loaded) + { + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder.get(), shaderProbe.entry); + shaderProbe.entryReady = shaderProbe.hit; + } + shaderProbe.duration = clock_t::now() - start; + } + + if (usePreCache) + { + if (useShaderCache && shaderProbe.hit) + { + preProbe.skipped = true; + preProbe.ok = true; + preProbe.duration = {}; + } + else + { + const auto start = clock_t::now(); + auto finder = makeIncludeFinder(); + preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus); + if (!preProbe.cacheObj) + preProbe.cacheObj = make_smart_refctd_ptr(); + + auto localCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SPreprocessorOptions preOptThread = preOpt; + preOptThread.includeFinder = finder.get(); + IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); + preProbe.result = localCompiler->preprocessWithCache(code, stageOverrideThread, preOptThread, *preProbe.cacheObj, preProbe.loadStatus, sourceIdentifier); + preProbe.ok = preProbe.result.ok; + preProbe.duration = clock_t::now() - start; + } + } + + const auto probeEnd = clock_t::now(); + + std::string preprocessedCode; + bool preprocessedReady = false; + std::string_view codeToCompile = code; + smart_refctd_ptr preCacheObj; + IShader::E_SHADER_STAGE stageOverride = static_cast(shaderStage); + auto cacheMissReason = [](CacheLoadStatus status) -> const char* + { + if (status == CacheLoadStatus::Missing) + return "cache file missing; first build, cleaned, output moved, or out of date"; + if (status == CacheLoadStatus::Invalid) + return "cache file invalid or version mismatch"; + return "input/deps/options changed; cache invalidated"; + }; + + auto toMs = [](const std::chrono::nanoseconds duration) -> long long + { + return std::chrono::duration_cast(duration).count(); + }; + + auto writeDepfileFromDependencies = [&](const IShaderCompiler::CCache::SEntry::dependency_container_t& dependencies) -> bool + { + if (!dep.enabled) + return true; + if (preOpt.depfilePath.empty()) + { + m_logger->log("Depfile path is empty.", ILogger::ELL_ERROR); + return false; + } + IShaderCompiler::DepfileWriteParams params = {}; + const std::string depfilePathString = preOpt.depfilePath.generic_string(); + params.depfilePath = depfilePathString; + params.sourceIdentifier = preOpt.sourceIdentifier; + if (!params.sourceIdentifier.empty()) + params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); + params.system = m_system.get(); + return IShaderCompiler::writeDepfile(params, dependencies, nullptr, preOpt.logger); + }; + + if (verbose && (useShaderCache || usePreCache)) + { + if (useShaderCache) + m_logger->log("Shader cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.duration))); + if (usePreCache) + m_logger->log("Preprocess cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(preProbe.duration))); + m_logger->log("Total cache probe took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(std::chrono::duration_cast(probeEnd - probeStart)))); + } + + smart_refctd_ptr cacheObj = shaderProbe.cacheObj; + CacheLoadStatus cacheStatus = shaderProbe.status; + const bool shaderCacheHitExpected = shaderProbe.hit; + + if (usePreCache && preCache.verbose && useShaderCache) + { + if (shaderCacheHitExpected) + m_logger->log("Cache hit! Preprocess cache skipped.", ILogger::ELL_DEBUG); + else + m_logger->log("Cache miss! Cold run (%s). Checking preprocess cache.", ILogger::ELL_DEBUG, cacheMissReason(cacheStatus)); + } + + if (usePreCache && !shaderCacheHitExpected) + { + if (!preProbe.ok) + return r; + if (preCache.verbose) + { + if (preProbe.result.cacheHit) + m_logger->log("Preprocess cache hit!", ILogger::ELL_DEBUG); + else + m_logger->log("Preprocess cache miss! Cold run (%s).", ILogger::ELL_DEBUG, IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status)); + } + if (preProbe.result.cacheUsed) + { + preprocessedCode = std::move(preProbe.result.code); + preprocessedReady = true; + stageOverride = preProbe.result.stage; + preCacheObj = preProbe.cacheObj; + if (!preprocessedOutputPath.empty() && !writeTextFile(preprocessedOutputPath, preprocessedCode)) + return r; + } + } + else if (usePreCache && preCache.verbose) + { + if (preProbe.skipped) + { + m_logger->log("Preprocess cache lookup skipped (shader cache hit).", ILogger::ELL_DEBUG); + } + else if (preProbe.ok) + { + if (preProbe.result.cacheHit) + m_logger->log("Preprocess cache hit (ignored, shader cache hit).", ILogger::ELL_DEBUG); + else + m_logger->log("Preprocess cache miss! Cold run (%s). (ignored, shader cache hit).", ILogger::ELL_DEBUG, IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status)); + } + else + { + m_logger->log("Preprocess cache failed (ignored, shader cache hit).", ILogger::ELL_DEBUG); + } + } + + if (usePreCache && preProbe.result.cacheUpdated && preProbe.cacheObj) + IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj); + + if (useShaderCache && shaderProbe.hit && shaderProbe.entryReady) + { + if (verbose) + m_logger->log("Shader cache hit: using cached SPIR-V.", ILogger::ELL_DEBUG); + r.compiled = cacheObj->decompressEntry(shaderProbe.entry); + r.ok = bool(r.compiled); + if (!r.ok) + return r; + r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies)) + return r; + + return r; + } + + auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + if (preprocessOnly) + { + const auto preprocessStart = std::chrono::high_resolution_clock::now(); + auto finder = makeIncludeFinder(); + preOpt.includeFinder = finder.get(); + r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, preOpt, nullptr); r.ok = !r.text.empty(); r.view = r.text; + const auto preprocessEnd = std::chrono::high_resolution_clock::now(); + if (verbose) + { + const auto duration = std::chrono::duration_cast(preprocessEnd - preprocessStart).count(); + m_logger->log("Preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(duration)); + } return r; } - CHLSLCompiler::SOptions opt = {}; - opt.stage = shaderStage; - opt.preprocessorOptions.sourceIdentifier = sourceIdentifier; - opt.preprocessorOptions.logger = m_logger.get(); - opt.preprocessorOptions.includeFinder = includeFinder.get(); - opt.preprocessorOptions.depfile = dep.enabled; - opt.preprocessorOptions.depfilePath = dep.path; - opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); - opt.dxcOptions = std::span(m_arguments); + opt.stage = stageOverride; + + bool cacheHit = false; + if (shaderCache.enabled && cacheObj) + { + opt.readCache = cacheObj.get(); + opt.writeCache = cacheObj.get(); + opt.cacheHit = &cacheHit; + } - r.compiled = hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), opt); + if (preprocessedReady) + { + opt.preprocessorOptions.applyForceIncludes = false; + if (preCacheObj && preCacheObj->hasEntry()) + opt.dependencyOverrides = &preCacheObj->getEntry().dependencies; + codeToCompile = preprocessedCode; + } + + auto compileFinder = makeIncludeFinder(); + opt.preprocessorOptions.includeFinder = compileFinder.get(); + r.compiled = hlslcompiler->compileToSPIRV(codeToCompile, opt); r.ok = bool(r.compiled); if (r.ok) r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + if (shaderCache.enabled && cacheObj) + { + const bool logShaderCache = verbose && !usePreCache; + if (logShaderCache) + { + if (cacheHit) + { + m_logger->log("Cache hit!", ILogger::ELL_DEBUG); + } + else + { + m_logger->log("Cache miss! Cold run (%s).", ILogger::ELL_DEBUG, cacheMissReason(cacheStatus)); + } + } + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + } + return r; } @@ -578,7 +1022,7 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr m_system; smart_refctd_ptr m_logger; - std::vector m_arguments, m_include_search_paths; + std::vector m_arguments, m_include_search_paths, m_force_includes; smart_refctd_ptr m_assetMgr; }; From 6598d7e726de0f132916fe6ae9951737e975a3bc Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 13 Jan 2026 06:16:47 +0100 Subject: [PATCH 05/14] improve caches, make CCache way faster by mtime + size validation as first citizen (fallback to full on miss) --- cmake/common.cmake | 5 +- include/nbl/asset/utils/IShaderCompiler.h | 23 +- src/nbl/asset/utils/IShaderCompiler.cpp | 253 ++++++++++++++++-- .../utils/shaderCompiler_serialization.h | 12 + src/nbl/asset/utils/waveContext.h | 2 +- src/nbl/system/ISystem.cpp | 4 +- 6 files changed, 274 insertions(+), 25 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 662dc5e665..f1e21913f5 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -2073,6 +2073,9 @@ namespace nbl::core::detail { set(MEMBER_NAME "${_NBL_CAP_NAME}") set(MEMBER_TYPE "${_NBL_CAP_TYPE}") set(MEMBER_VALUE "${_NBL_CAP_VALUE}") + if(MEMBER_TYPE STREQUAL "double") + set(MEMBER_VALUE "${_NBL_CAP_VALUE}L") + endif() string(CONFIGURE [=[ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; ]=] _NBL_MEMBER_LINE @ONLY) @@ -2219,7 +2222,7 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endif() set(HEADER_ONLY_LIKE "${TARGET_INPUT}") - if(NBL_NSC_OUT_FILES) + if(NBL_NSC_OUT_FILES AND NOT CMAKE_CONFIGURATION_TYPES) list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) endif() target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE} "${CONFIG_FILE}") diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index e7671d7eb5..cf6ea26ae7 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -34,8 +34,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted system::path absolutePath = {}; std::string contents = {}; core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future - // Could be used in the future for early rejection of cache hit - //nbl::system::IFileBase::time_point_t lastWriteTime = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + bool hasFileInfo = false; explicit inline operator bool() const {return !absolutePath.empty();} }; @@ -125,8 +126,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { public: // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this - inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) : - requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash) + inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash, + const system::path& _absolutePath = {}, uint64_t _fileSize = 0, int64_t _lastWriteTime = 0, bool _hasFileInfo = false) : + requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash), + absolutePath(_absolutePath), fileSize(_fileSize), lastWriteTime(_lastWriteTime), hasFileInfo(_hasFileInfo) {} inline SPreprocessingDependency(SPreprocessingDependency&) = default; @@ -141,6 +144,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline std::string_view getIdentifier() const { return identifier; } inline bool isStandardInclude() const { return standardInclude; } inline const core::blake3_hash_t& getHash() const { return hash; } + inline const system::path& getAbsolutePath() const { return absolutePath; } + inline uint64_t getFileSize() const { return fileSize; } + inline int64_t getLastWriteTime() const { return lastWriteTime; } + inline bool getHasFileInfo() const { return hasFileInfo; } private: friend void to_json(nlohmann::json& j, const SPreprocessingDependency& dependency); @@ -154,6 +161,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted core::blake3_hash_t hash = {}; // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` bool standardInclude = false; + system::path absolutePath = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + bool hasFileInfo = false; }; // @@ -238,7 +249,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.2.1"; + constexpr static inline std::string_view VERSION = "1.2.4"; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 @@ -481,7 +492,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted class CPreprocessCache final : public IReferenceCounted { public: - constexpr static inline std::string_view VERSION = "2.0"; + constexpr static inline std::string_view VERSION = "2.2"; struct SEntry { diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 50f2abd4cb..a6fe3ca6bd 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -388,7 +389,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons entry.dependencies.clear(); entry.dependencies.reserve(dependencyOverrides->size()); for (const auto& dep : *dependencyOverrides) - entry.dependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash()); + entry.dependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); } if (retVal && depfileEnabled && supportsDependencies) @@ -478,7 +479,11 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s const bool success = bool(succ); assert(success); - return { f->getFileName(),std::move(contents) }; + found_t ret = {}; + ret.absolutePath = path; + ret.contents = std::move(contents); + ret.fileSize = size; + return ret; } IShaderCompiler::CIncludeFinder::CIncludeFinder(core::smart_refctd_ptr&& system) @@ -500,6 +505,18 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req retVal = std::move(contents); else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName); + if (retVal.fileSize == 0 && !retVal.contents.empty()) + retVal.fileSize = retVal.contents.size(); + if (!retVal.absolutePath.empty()) + { + std::error_code ec; + const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); + if (!ec) + { + retVal.lastWriteTime = fileTime.time_since_epoch().count(); + retVal.hasFileInfo = true; + } + } core::blake3_hasher hasher; hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); @@ -517,6 +534,19 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req retVal = std::move(contents); else retVal = std::move(trySearchPaths(includeName)); + if (retVal.fileSize == 0 && !retVal.contents.empty()) + retVal.fileSize = retVal.contents.size(); + if (!retVal.absolutePath.empty()) + { + std::error_code ec; + const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); + if (!ec) + { + retVal.lastWriteTime = fileTime.time_since_epoch().count(); + retVal.hasFileInfo = true; + } + } + core::blake3_hasher hasher; hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); retVal.hash = static_cast(hasher); @@ -527,7 +557,16 @@ void IShaderCompiler::CIncludeFinder::addSearchPath(const std::string& searchPat { if (!loader) return; - m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); + if (searchPath.empty()) + { + m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); + return; + } + const auto insertPos = std::find_if(m_loaders.begin(), m_loaders.end(), [](const LoaderSearchPath& entry) + { + return entry.searchPath.empty(); + }); + m_loaders.insert(insertPos, LoaderSearchPath{ loader, searchPath }); } void IShaderCompiler::CIncludeFinder::addGenerator(const core::smart_refctd_ptr& generatorToAdd) @@ -641,17 +680,98 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ // go through all dependencies if (found!=m_container.end()) { + std::unordered_map fileStatus; + std::unordered_map logicalStatus; for (const auto& dependency : found->dependencies) { - IIncludeLoader::found_t header; - if (dependency.standardInclude) - header = finder->getIncludeStandard(dependency.requestingSourceDir, dependency.identifier); + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + { + if (auto it = fileStatus.find(dependency.getAbsolutePath()); it != fileStatus.end()) + { + if (!it->second) + return m_container.end(); + continue; + } + } else - header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier); + { + std::string key; + key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); + key.append(dependency.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dependency.getIdentifier()); + key.push_back('|'); + key.push_back(dependency.isStandardInclude() ? '1' : '0'); + if (auto it = logicalStatus.find(key); it != logicalStatus.end()) + { + if (!it->second) + return m_container.end(); + continue; + } + } + + bool valid = false; + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + { + std::error_code ec; + std::filesystem::directory_entry entry(dependency.getAbsolutePath(), ec); + if (!ec) + { + const auto time = entry.last_write_time(ec); + if (!ec) + { + const auto ticks = time.time_since_epoch().count(); + if (dependency.getLastWriteTime() == ticks) + { + const auto size = entry.file_size(ec); + if (!ec && size == dependency.getFileSize()) + valid = true; + } + } + } + } - if (header.hash != dependency.hash) + if (!valid) { - return m_container.end(); + if (!finder) + return m_container.end(); + IIncludeLoader::found_t header; + if (dependency.standardInclude) + header = finder->getIncludeStandard(dependency.requestingSourceDir, dependency.identifier); + else + header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier); + + if (header.hash != dependency.hash) + { + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), false); + else + { + std::string key; + key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); + key.append(dependency.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dependency.getIdentifier()); + key.push_back('|'); + key.push_back(dependency.isStandardInclude() ? '1' : '0'); + logicalStatus.emplace(std::move(key), false); + } + return m_container.end(); + } + } + + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), true); + else + { + std::string key; + key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); + key.append(dependency.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dependency.getIdentifier()); + key.push_back('|'); + key.push_back(dependency.isStandardInclude() ? '1' : '0'); + logicalStatus.emplace(std::move(key), true); } } } @@ -986,9 +1106,17 @@ core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize( const auto dir = dep.getRequestingSourceDir().generic_string(); write_string(out, dir); write_string(out, dep.getIdentifier()); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(out, abs); const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; write_bytes(out, &standardInclude, sizeof(standardInclude)); write_bytes(out, dep.getHash().data, sizeof(dep.getHash().data)); + const uint64_t fileSize = dep.getFileSize(); + write_bytes(out, &fileSize, sizeof(fileSize)); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_bytes(out, &lastWriteTime, sizeof(lastWriteTime)); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(out, &hasFileInfo, sizeof(hasFileInfo)); } auto buffer = ICPUBuffer::create({ out.size() }); @@ -1088,13 +1216,25 @@ core::smart_refctd_ptr IShaderCompiler::CPrep return nullptr; if (!read_string(serializedCache, offset, identifier)) return nullptr; + std::string absolutePath; + if (!read_string(serializedCache, offset, absolutePath)) + return nullptr; uint8_t standardInclude = 0; if (!read_bytes(serializedCache, offset, &standardInclude, sizeof(standardInclude))) return nullptr; core::blake3_hash_t hash = {}; if (!read_bytes(serializedCache, offset, hash.data, sizeof(hash.data))) return nullptr; - entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash); + uint64_t fileSize = 0; + if (!read_bytes(serializedCache, offset, &fileSize, sizeof(fileSize))) + return nullptr; + int64_t lastWriteTime = 0; + if (!read_bytes(serializedCache, offset, &lastWriteTime, sizeof(lastWriteTime))) + return nullptr; + uint8_t hasFileInfo = 0; + if (!read_bytes(serializedCache, offset, &hasFileInfo, sizeof(hasFileInfo))) + return nullptr; + entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); } retVal->m_hasEntry = true; @@ -1164,16 +1304,97 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde if (!m_hasEntry || !finder) return false; + std::unordered_map fileStatus; + std::unordered_map logicalStatus; for (const auto& dep : m_entry.dependencies) { - IIncludeLoader::found_t header; - if (dep.isStandardInclude()) - header = finder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + { + if (auto it = fileStatus.find(dep.getAbsolutePath()); it != fileStatus.end()) + { + if (!it->second) + return false; + continue; + } + } else - header = finder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + { + std::string key; + key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + if (auto it = logicalStatus.find(key); it != logicalStatus.end()) + { + if (!it->second) + return false; + continue; + } + } - if (!header || header.hash != dep.getHash()) - return false; + bool valid = false; + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + { + std::error_code ec; + std::filesystem::directory_entry entry(dep.getAbsolutePath(), ec); + if (!ec) + { + const auto time = entry.last_write_time(ec); + if (!ec) + { + const auto ticks = time.time_since_epoch().count(); + if (dep.getLastWriteTime() == ticks) + { + const auto size = entry.file_size(ec); + if (!ec && size == dep.getFileSize()) + valid = true; + } + } + } + } + + if (!valid) + { + IIncludeLoader::found_t header; + if (dep.isStandardInclude()) + header = finder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + else + header = finder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + + if (!header || header.hash != dep.getHash()) + { + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), false); + else + { + std::string key; + key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + logicalStatus.emplace(std::move(key), false); + } + return false; + } + } + + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), true); + else + { + std::string key; + key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + logicalStatus.emplace(std::move(key), true); + } } return true; } diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index 094bba1584..3f0c882a72 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -120,6 +120,10 @@ inline void to_json(json& j, const SEntry::SPreprocessingDependency& dependency) { "identifier", dependency.identifier }, { "hash", dependency.hash.data }, { "standardInclude", dependency.standardInclude }, + { "absolutePath", dependency.absolutePath }, + { "fileSize", dependency.fileSize }, + { "lastWriteTime", dependency.lastWriteTime }, + { "hasFileInfo", dependency.hasFileInfo }, }; } @@ -129,6 +133,14 @@ inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependenc j.at("identifier").get_to(dependency.identifier); j.at("hash").get_to(dependency.hash.data); j.at("standardInclude").get_to(dependency.standardInclude); + if (j.contains("absolutePath")) + j.at("absolutePath").get_to(dependency.absolutePath); + if (j.contains("fileSize")) + j.at("fileSize").get_to(dependency.fileSize); + if (j.contains("lastWriteTime")) + j.at("lastWriteTime").get_to(dependency.lastWriteTime); + if (j.contains("hasFileInfo")) + j.at("hasFileInfo").get_to(dependency.hasFileInfo); } // We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index 3d7ce6933d..1958be6109 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -593,7 +593,7 @@ template<> inline bool boost::wave::impl::pp_iterator_functor Date: Tue, 13 Jan 2026 09:08:14 +0100 Subject: [PATCH 06/14] use precomputed archive hashes for builtin includes, avoid duplicate hashing --- include/nbl/asset/utils/IShaderCompiler.h | 3 ++- include/nbl/system/CFileArchive.h | 14 ++++++++++- include/nbl/system/IFileBase.h | 1 + src/nbl/asset/utils/IShaderCompiler.cpp | 30 +++++++++++++++++------ src/nbl/builtin/builtinHeaderGen.py | 5 ++-- tools/nsc/main.cpp | 2 +- 6 files changed, 42 insertions(+), 13 deletions(-) diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index cf6ea26ae7..168e9e6632 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -33,10 +33,11 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { system::path absolutePath = {}; std::string contents = {}; - core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future + core::blake3_hash_t hash = {}; uint64_t fileSize = 0; int64_t lastWriteTime = 0; bool hasFileInfo = false; + bool hasHash = false; explicit inline operator bool() const {return !absolutePath.empty();} }; diff --git a/include/nbl/system/CFileArchive.h b/include/nbl/system/CFileArchive.h index 818bd8f6ba..35cb2e9413 100644 --- a/include/nbl/system/CFileArchive.h +++ b/include/nbl/system/CFileArchive.h @@ -10,6 +10,8 @@ #include "nbl/system/CFileView.h" #include "nbl/system/IFileViewAllocator.h" +#include + #ifdef _NBL_PLATFORM_ANDROID_ #include "nbl/system/CFileViewAPKAllocator.h" #endif @@ -22,13 +24,21 @@ template class CInnerArchiveFile : public CFileView { std::atomic_flag* alive; + std::optional m_precomputedHash; public: template - CInnerArchiveFile(std::atomic_flag* _flag, Args&&... args) : CFileView(std::forward(args)...), alive(_flag) + CInnerArchiveFile(std::atomic_flag* _flag, std::optional precomputedHash, Args&&... args) + : CFileView(std::forward(args)...), alive(_flag), m_precomputedHash(std::move(precomputedHash)) { } ~CInnerArchiveFile() = default; + // Non-empty return means the file came from an archive that embeds a precomputed hash. + std::optional getPrecomputedHash() const override + { + return m_precomputedHash; + } + static void* operator new(size_t size) noexcept { assert(false); @@ -144,6 +154,7 @@ class CFileArchive : public IFileArchive // coast is clear, do placement new new (file, &m_fileFlags[found->ID]) CInnerArchiveFile( m_fileFlags+found->ID, + std::move(fileBuffer.precomputedHash), getDefaultAbsolutePath()/found->pathRelativeToArchive, flags, fileBuffer.initialModified, @@ -162,6 +173,7 @@ class CFileArchive : public IFileArchive void* buffer; size_t size; void* allocatorState; + std::optional precomputedHash = {}; // TODO: Implement this !!! IFileBase::time_point_t initialModified = std::chrono::utc_clock::now(); }; diff --git a/include/nbl/system/IFileBase.h b/include/nbl/system/IFileBase.h index c9ceb13a04..ae336e24cd 100644 --- a/include/nbl/system/IFileBase.h +++ b/include/nbl/system/IFileBase.h @@ -41,6 +41,7 @@ class IFileBase : public core::IReferenceCounted //! Optional, if not present this means that the hash was not already precomputed for you. // Equivalent to calling `xxHash256(getMappedPointer(),getSize(),&retval.x)` // Only really available for built-in resources or some other files that had to be read in their entirety at some point. + // Non-empty return means the file comes from an archive that embeds a precomputed hash. virtual inline std::optional getPrecomputedHash() const {return {};} //! diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index a6fe3ca6bd..f21436d9ea 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -5,7 +5,6 @@ #include "nbl/asset/utils/shadercUtils.h" #include "nbl/asset/utils/shaderCompiler_serialization.h" #include "nbl/core/hash/blake.h" - #include #include #include @@ -13,6 +12,7 @@ #include #include #include +#include #include #include @@ -480,9 +480,15 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s assert(success); found_t ret = {}; - ret.absolutePath = path; + ret.absolutePath = f->getFileName(); ret.contents = std::move(contents); ret.fileSize = size; + if (auto precomputed = f->getPrecomputedHash()) + { + static_assert(sizeof(ret.hash.data) == sizeof(*precomputed)); + std::memcpy(ret.hash.data, &(*precomputed), sizeof(ret.hash.data)); + ret.hasHash = true; + } return ret; } @@ -518,9 +524,13 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req } } - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (!retVal.hasHash) + { + core::blake3_hasher hasher; + hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); + retVal.hasHash = true; + } return retVal; } @@ -547,9 +557,13 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req } } - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (!retVal.hasHash) + { + core::blake3_hasher hasher; + hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); + retVal.hasHash = true; + } return retVal; } diff --git a/src/nbl/builtin/builtinHeaderGen.py b/src/nbl/builtin/builtinHeaderGen.py index 1a0f3915ed..8c30580bb8 100644 --- a/src/nbl/builtin/builtinHeaderGen.py +++ b/src/nbl/builtin/builtinHeaderGen.py @@ -106,6 +106,7 @@ def execute(args): #include "nbl/system/CFileArchive.h" #include "nbl/core/def/smart_refctd_ptr.h" +#include "nbl/builtin/hlsl/cpp_compat/vector.hlsl" #include "{os.path.basename(outputBuiltinPath)}" #include @@ -129,7 +130,7 @@ class {NBL_BR_API} CArchive final : public nbl::system::CFileArchive file_buffer_t getFileBuffer(const nbl::system::IFileArchive::SFileList::found_t& found) override {{ auto resource = get_resource_runtime(found->pathRelativeToArchive.string()); - return {{const_cast(resource.contents),resource.size,nullptr}}; + return {{const_cast(resource.contents),resource.size,nullptr,nbl::hlsl::uint64_t4{{resource.xx256Hash[0],resource.xx256Hash[1],resource.xx256Hash[2],resource.xx256Hash[3]}}}}; }} }}; }} @@ -143,4 +144,4 @@ class {NBL_BR_API} CArchive final : public nbl::system::CFileArchive if __name__ == "__main__": args: argparse.Namespace = parser.parse_args() - execute(args) \ No newline at end of file + execute(args) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 337a005735..a5275acad8 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -409,7 +409,7 @@ class ShaderCompiler final : public IApplicationFramework if (dep.enabled) m_logger->log("Depfile: %s", ILogger::ELL_DEBUG, dep.path.c_str()); if (shaderCache.enabled) - m_logger->log("Cache: %s", ILogger::ELL_DEBUG, shaderCache.path.string().c_str()); + m_logger->log("Shader Cache: %s", ILogger::ELL_DEBUG, shaderCache.path.string().c_str()); if (preCache.enabled) m_logger->log("Preprocess cache: %s", ILogger::ELL_DEBUG, preCache.path.string().c_str()); } From ec7e7470daaeedd82dc1e1a912ac0b22d8e7c482 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 17 Jan 2026 09:19:38 +0100 Subject: [PATCH 07/14] save refactor work --- cmake/common.cmake | 167 +- docs/nsc-prebuilds.md | 13 +- include/nbl/asset/utils/IShaderCompiler.h | 58 +- src/nbl/asset/utils/CHLSLCompiler.cpp | 55 +- src/nbl/asset/utils/IShaderCompiler.cpp | 1337 ++++++++++++----- .../utils/shaderCompiler_serialization.h | 3 +- tools/nsc/main.cpp | 452 +++++- 7 files changed, 1562 insertions(+), 523 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index f1e21913f5..a95590ce1f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1207,14 +1207,14 @@ struct DeviceConfigCaps list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=vulkan-with-source>) endif() + list(APPEND REQUIRED_OPTIONS + -I "${NBL_ROOT_PATH}/include" + -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" + -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" + -I "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" + ) if(NOT NBL_EMBED_BUILTIN_RESOURCES) - list(APPEND REQUIRED_OPTIONS - -no-nbl-builtins - -I "${NBL_ROOT_PATH}/include" - -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" - -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" - -I "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" - ) + list(APPEND REQUIRED_OPTIONS -no-nbl-builtins) endif() set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) @@ -1275,7 +1275,7 @@ $ target_sources(${IMPL_TARGET} PUBLIC ${INCLUDE_FILE}) set_source_files_properties(${INCLUDE_FILE} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None + GENERATED TRUE ) target_compile_definitions(${IMPL_TARGET} INTERFACE $) @@ -1724,6 +1724,10 @@ sys.stdout.write(str(h)) if(NOT IS_ABSOLUTE "${TARGET_INPUT}") set(TARGET_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_INPUT}") endif() + if(IMPL_HLSL_GLOB) + get_filename_component(_ABS_TARGET_INPUT "${TARGET_INPUT}" ABSOLUTE) + list(REMOVE_ITEM IMPL_HLSL_GLOB "${TARGET_INPUT}" "${_ABS_TARGET_INPUT}") + endif() get_target_property(CANONICAL_IDENTIFIERS ${IMPL_TARGET} NBL_CANONICAL_IDENTIFIERS) @@ -2073,8 +2077,11 @@ namespace nbl::core::detail { set(MEMBER_NAME "${_NBL_CAP_NAME}") set(MEMBER_TYPE "${_NBL_CAP_TYPE}") set(MEMBER_VALUE "${_NBL_CAP_VALUE}") + if(MEMBER_TYPE STREQUAL "double" AND MEMBER_VALUE STREQUAL "1.7976931348623165e+308") + set(MEMBER_VALUE "1.7976931348623157e+308") + endif() if(MEMBER_TYPE STREQUAL "double") - set(MEMBER_VALUE "${_NBL_CAP_VALUE}L") + set(MEMBER_VALUE "${MEMBER_VALUE}L") endif() string(CONFIGURE [=[ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; @@ -2127,7 +2134,16 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE set(CAPS_EVAL " // no caps\n") endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) - file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") + set(_NBL_CONFIG_WRITE TRUE) + if(EXISTS "${CONFIG_FILE}") + file(READ "${CONFIG_FILE}" _NBL_CONFIG_OLD) + if(_NBL_CONFIG_OLD STREQUAL "${CONFIG_CONTENT}") + set(_NBL_CONFIG_WRITE FALSE) + endif() + endif() + if(_NBL_CONFIG_WRITE) + file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") + endif() list(APPEND DEPENDS_ON "${TARGET_INPUT}" "${CONFIG_FILE}") # generate keys and commands for compiling shaders @@ -2178,6 +2194,20 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE get_filename_component(NBL_NSC_INPUT_NAME "${TARGET_INPUT}" NAME) get_filename_component(NBL_NSC_CONFIG_NAME "${CONFIG_FILE}" NAME) + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_INPUT_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_CONFIG_NAME}") + if(NBL_NSC_INPUT_NAME MATCHES "\\.in\\.hlsl$") + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_CONFIG_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_INPUT_NAME}") + endif() + set(NBL_NSC_MAIN_DEPENDENCY "${TARGET_INPUT}") + if(TARGET nsc) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + list(APPEND DEPENDS_ON "$") + else() + list(APPEND DEPENDS_ON nsc) + endif() + endif() set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") @@ -2195,10 +2225,13 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE BYPRODUCTS ${NBL_NSC_BYPRODUCTS} COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} - COMMENT "${NBL_NSC_INPUT_NAME} (${NBL_NSC_CONFIG_NAME})" + COMMENT "${NBL_NSC_COMMENT_LEFT} (${NBL_NSC_COMMENT_RIGHT})" VERBATIM COMMAND_EXPAND_LISTS ) + if(NBL_NSC_MAIN_DEPENDENCY) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS MAIN_DEPENDENCY "${NBL_NSC_MAIN_DEPENDENCY}") + endif() if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() @@ -2221,23 +2254,54 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) endif() - set(HEADER_ONLY_LIKE "${TARGET_INPUT}") + set(HEADER_ONLY_LIKE "") + set(ADD_INPUT_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS AND CMAKE_GENERATOR MATCHES "Visual Studio") + set(ADD_INPUT_AS_HEADER_ONLY FALSE) + endif() + if(ADD_INPUT_AS_HEADER_ONLY) + list(APPEND HEADER_ONLY_LIKE "${TARGET_INPUT}") + endif() if(NBL_NSC_OUT_FILES AND NOT CMAKE_CONFIGURATION_TYPES) list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) endif() - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE} "${CONFIG_FILE}") - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - set_source_files_properties("${CONFIG_FILE}" PROPERTIES - GENERATED TRUE - VS_TOOL_OVERRIDE None - ) + if(HEADER_ONLY_LIKE AND IMPL_HLSL_GLOB) + foreach(_HLSL_SOURCE IN LISTS IMPL_HLSL_GLOB) + list(REMOVE_ITEM HEADER_ONLY_LIKE "${_HLSL_SOURCE}") + endforeach() + endif() + if(HEADER_ONLY_LIKE) + list(REMOVE_DUPLICATES HEADER_ONLY_LIKE) + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + ) + endif() + set(ADD_CONFIG_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NBL_NSC_MAIN_DEPENDENCY STREQUAL "${CONFIG_FILE}") + set(ADD_CONFIG_AS_HEADER_ONLY FALSE) + endif() + endif() + if(ADD_CONFIG_AS_HEADER_ONLY) + target_sources(${IMPL_TARGET} PRIVATE "${CONFIG_FILE}") + set_source_files_properties("${CONFIG_FILE}" PROPERTIES + GENERATED TRUE + HEADER_FILE_ONLY ON + ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${CONFIG_FILE}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) if(CMAKE_CONFIGURATION_TYPES) foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + if(_CFG STREQUAL "") + continue() + endif() set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${HASHED_KEY}") set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") if(NSC_CACHE_DIR) @@ -2255,14 +2319,17 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_SHADER_CACHE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") endif() + set(ADD_PREPROCESSED_IDE TRUE) if(NSC_PREPROCESS_CACHE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None GENERATED TRUE ) if(NSC_SHADER_CACHE) @@ -2270,7 +2337,15 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endif() if(NSC_PREPROCESS_CACHE) set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) - set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() endif() source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) endforeach() @@ -2292,14 +2367,17 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_SHADER_CACHE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") endif() + set(ADD_PREPROCESSED_IDE TRUE) if(NSC_PREPROCESS_CACHE) list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None GENERATED TRUE ) if(NSC_SHADER_CACHE) @@ -2307,7 +2385,15 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endif() if(NSC_PREPROCESS_CACHE) set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) - set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() endif() source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() @@ -2367,12 +2453,35 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE source_group("${IN}/autogen" FILES ${CONFIGS}) source_group("${IN}" FILES ${INPUTS}) + if(IMPL_HLSL_GLOB AND INPUTS) + set(_NBL_INPUTS_ABS "") + foreach(_IN_FILE IN LISTS INPUTS) + get_filename_component(_IN_ABS "${_IN_FILE}" ABSOLUTE) + string(TOLOWER "${_IN_ABS}" _IN_ABS_LOWER) + list(APPEND _NBL_INPUTS_ABS "${_IN_ABS_LOWER}") + endforeach() + set(_NBL_HLSL_FILTERED "") + foreach(_HLSL_FILE IN LISTS IMPL_HLSL_GLOB) + get_filename_component(_HLSL_ABS "${_HLSL_FILE}" ABSOLUTE) + string(TOLOWER "${_HLSL_ABS}" _HLSL_ABS_LOWER) + list(FIND _NBL_INPUTS_ABS "${_HLSL_ABS_LOWER}" _HLSL_INDEX) + if(_HLSL_INDEX EQUAL -1) + list(APPEND _NBL_HLSL_FILTERED "${_HLSL_FILE}") + endif() + endforeach() + set(IMPL_HLSL_GLOB "${_NBL_HLSL_FILTERED}") + endif() if(IMPL_HLSL_GLOB) target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() source_group("HLSL Files" FILES ${IMPL_HLSL_GLOB}) endif() diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 38c0e48716..97d915a1fe 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -41,6 +41,7 @@ Runtime compilation is still useful for prototyping, but (assuming you don't use For each registered input it generates: - One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). +- A matching `.spv.hash` sidecar for fast up-to-date checks on cache hits. - If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. - A generated header (you choose the path via `INCLUDE`) containing: - a primary template `get_spirv_key(...args)` and `get_spirv_key(device, ...args)` @@ -54,7 +55,7 @@ For each registered input it generates: Keys are hashed to keep filenames short and stable across long permutation strings. The **full key string** is built as: ``` -(__._)(._)....spv +__._._...spv ``` Then `FNV-1a 64-bit` is computed from that full key (no `` prefix), and the **final output key** is: @@ -118,15 +119,19 @@ There are two independent caches: With `-verbose`, `.log` shows: -- `Cache: ` and `Cache hit!/miss! ...` for SPIR-V cache. +- `Shader Cache: ` and `Cache hit!/miss! ...` for SPIR-V cache. - `Preprocess cache: ` and `Preprocess cache hit!/miss! ...` for the prefix cache. - Timing lines (performance): + - `Shader cache load took: ...` + - `Shader cache validate took: ...` - `Shader cache lookup took: ...` + - `Shader cache write took: ...` (only when deps metadata changed on hit) - `Preprocess cache lookup took: ...` - `Total cache probe took: ...` - `Preprocess took: ...` (only on compile path) - `Compile took: ...` (only on compile path) - `Total build time: ...` (preprocess + compile) + - `Write output took: ...` (only when output file is written) - `Total took: ...` (overall tool runtime) You can redirect both caches into a shared directory with: @@ -361,8 +366,8 @@ static constexpr std::string_view keyView = keyBuf; This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): ``` -Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv -Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv +Debug/6014683721143225910.spv +Debug/10493750182651038558.spv ... ``` diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 168e9e6632..c8dd6accb5 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -89,6 +89,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted IIncludeLoader::found_t getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName) const; inline core::smart_refctd_ptr getDefaultFileSystemLoader() const { return m_defaultFileSystemLoader; } + inline system::ISystem* getSystem() const { return m_system.get(); } void addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader); @@ -108,6 +109,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::vector m_loaders; std::vector> m_generators; core::smart_refctd_ptr m_defaultFileSystemLoader; + core::smart_refctd_ptr m_system; }; // @@ -149,6 +151,12 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline uint64_t getFileSize() const { return fileSize; } inline int64_t getLastWriteTime() const { return lastWriteTime; } inline bool getHasFileInfo() const { return hasFileInfo; } + inline void setFileInfo(uint64_t size, int64_t timeTicks, bool hasInfo) const + { + fileSize = size; + lastWriteTime = timeTicks; + hasFileInfo = hasInfo; + } private: friend void to_json(nlohmann::json& j, const SPreprocessingDependency& dependency); @@ -163,9 +171,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` bool standardInclude = false; system::path absolutePath = {}; - uint64_t fileSize = 0; - int64_t lastWriteTime = 0; - bool hasFileInfo = false; + mutable uint64_t fileSize = 0; + mutable int64_t lastWriteTime = 0; + mutable bool hasFileInfo = false; }; // @@ -250,7 +258,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.2.4"; + constexpr static inline std::string_view VERSION = "1.2.6"; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 @@ -455,14 +463,14 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder) const; - NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry) const; + NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies = true, bool* depsUpdated = nullptr) const; NBL_API2 core::smart_refctd_ptr decompressEntry(const SEntry& entry) const; inline CCache() {} // De/serialization methods NBL_API2 core::smart_refctd_ptr serialize() const; - NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache); + NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache, bool skipDependencies = false); private: // we only do lookups based on main file contents + compiler options @@ -487,13 +495,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted using EntrySet = core::unordered_set; EntrySet m_container; - NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; + NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated) const; }; class CPreprocessCache final : public IReferenceCounted { public: - constexpr static inline std::string_view VERSION = "2.2"; + constexpr static inline std::string_view VERSION = "2.3"; struct SEntry { @@ -532,24 +540,39 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted EProbeStatus status = EProbeStatus::EntryInvalid; bool hasPrefix = false; bool cacheHit = false; + bool depsUpdated = false; }; inline bool hasEntry() const { return m_hasEntry; } inline const SEntry& getEntry() const { return m_entry; } - inline void setEntry(SEntry&& entry) { m_entry = std::move(entry); m_hasEntry = true; } + inline void setEntry(SEntry&& entry) + { + m_entry = std::move(entry); + m_hasEntry = true; + m_prefixLoaded = true; + m_backingPath.clear(); + m_prefixOffset = 0; + m_prefixSize = 0; + } NBL_API2 core::smart_refctd_ptr serialize() const; NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache); - NBL_API2 static core::smart_refctd_ptr loadFromFile(const system::path& path, ELoadStatus& status); + NBL_API2 static core::smart_refctd_ptr loadFromFile(const system::path& path, ELoadStatus& status, bool loadPrefix = true); NBL_API2 static bool writeToFile(const system::path& path, const CPreprocessCache& cache); NBL_API2 static SProbeResult probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions); NBL_API2 static const char* getProbeReason(EProbeStatus status); - NBL_API2 bool validateDependencies(const CIncludeFinder* finder) const; + NBL_API2 bool validateDependencies(const CIncludeFinder* finder, bool* depsUpdated = nullptr) const; NBL_API2 std::string buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const; private: + void ensurePrefixLoaded() const; + bool m_hasEntry = false; - SEntry m_entry; + mutable SEntry m_entry; + mutable system::path m_backingPath; + mutable uint64_t m_prefixOffset = 0; + mutable uint32_t m_prefixSize = 0; + mutable bool m_prefixLoaded = true; }; struct SPreprocessCacheResult @@ -563,17 +586,6 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::string code; }; - struct DepfileWriteParams - { - system::ISystem* system = nullptr; - std::string_view depfilePath = {}; - std::string_view outputPath = {}; - std::string_view sourceIdentifier = {}; - system::path workingDirectory = {}; - }; - - static bool writeDepfile(const DepfileWriteParams& params, const CCache::SEntry::dependency_container_t& dependencies, const CIncludeFinder* includeFinder = nullptr, system::logger_opt_ptr logger = nullptr); - core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; SPreprocessCacheResult preprocessWithCache(std::string_view code, IShader::E_SHADER_STAGE stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache& cache, CPreprocessCache::ELoadStatus loadStatus, std::string_view sourceIdentifier) const; diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 4132fe6fe6..2e5733c9b5 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -366,22 +367,12 @@ namespace nbl::wave std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies, std::vector* macro_defs) const { - const bool depfileEnabled = preprocessOptions.depfile; - if (depfileEnabled) - { - if (preprocessOptions.depfilePath.empty()) - { - preprocessOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return {}; - } - } - if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); std::vector localDependencies; auto* dependenciesOut = dependencies; - if (depfileEnabled && !dependenciesOut) + if (!dependenciesOut) dependenciesOut = &localDependencies; // HACK: we do a pre-pre-process here to add \n after every #pragma to neutralize boost::wave's actions @@ -409,7 +400,36 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE stage = context.get_hooks().m_pragmaStage; if (dependenciesOut) + { *dependenciesOut = std::move(context.get_dependencies()); + if (!dependenciesOut->empty()) + { + std::unordered_set seen; + seen.reserve(dependenciesOut->size()); + std::vector unique; + unique.reserve(dependenciesOut->size()); + for (auto& dep : *dependenciesOut) + { + std::string key; + if (!dep.getAbsolutePath().empty()) + { + key = dep.getAbsolutePath().string(); + } + else + { + key.reserve(dep.getRequestingSourceDir().string().size() + dep.getIdentifier().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + } + if (seen.insert(key).second) + unique.emplace_back(std::move(dep)); + } + *dependenciesOut = std::move(unique); + } + } if (macro_defs) context.dump_macro_definitions(*macro_defs); } @@ -431,19 +451,6 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (resolvedString.empty()) return resolvedString; - if (depfileEnabled) - { - IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); - params.depfilePath = depfilePathString; - params.sourceIdentifier = preprocessOptions.sourceIdentifier; - if (!params.sourceIdentifier.empty()) - params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); - params.system = m_system.get(); - if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) - return {}; - } - return resolvedString; } diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index f21436d9ea..5aa16e645d 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -5,18 +5,327 @@ #include "nbl/asset/utils/shadercUtils.h" #include "nbl/asset/utils/shaderCompiler_serialization.h" #include "nbl/core/hash/blake.h" +#include "nbl/core/hash/xxHash256.h" #include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/builtin/CArchive.h" +#include "spirv/builtin/CArchive.h" +#include "boost/builtin/CArchive.h" +#include "nbl/devicegen/builtin/CArchive.h" +#endif + +#ifdef _WIN32 +#include +#endif #include #include +namespace +{ +struct FileInfoCacheEntry +{ + uint64_t size = 0; + int64_t ticks = 0; + bool ok = false; +}; + +std::unordered_map g_fileInfoCache; +std::mutex g_fileInfoCacheMutex; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +inline bool tryGetBuiltinResource(const std::string& normalized, const nbl::system::SBuiltinFile*& outFile, std::string& outRel, std::string_view& outPrefix) +{ + auto tryNamespace = [&](std::string_view prefix, const nbl::system::SBuiltinFile& (*getResource)(const std::string&)) -> bool + { + if (normalized.rfind(prefix, 0) != 0) + return false; + std::string rel = normalized.substr(prefix.size()); + if (!rel.empty() && (rel.front() == '/' || rel.front() == '\\')) + rel.erase(rel.begin()); + const auto& resource = getResource(rel); + if (!resource.contents || resource.size == 0) + return false; + outFile = &resource; + outRel = std::move(rel); + outPrefix = prefix; + return true; + }; + + if (tryNamespace(nbl::builtin::pathPrefix, nbl::builtin::get_resource_runtime)) + return true; + if (tryNamespace(spirv::builtin::pathPrefix, spirv::builtin::get_resource_runtime)) + return true; + if (tryNamespace(boost::builtin::pathPrefix, boost::builtin::get_resource_runtime)) + return true; + if (tryNamespace(nbl::devicegen::builtin::pathPrefix, nbl::devicegen::builtin::get_resource_runtime)) + return true; + + return false; +} + +inline bool tryGetBuiltinResourceHash(const nbl::system::path& path, nbl::core::blake3_hash_t& outHash) +{ + if (path.empty()) + return false; + const std::string normalized = path.generic_string(); + const nbl::system::SBuiltinFile* resource = nullptr; + std::string rel; + std::string_view prefix; + if (!tryGetBuiltinResource(normalized, resource, rel, prefix)) + return false; + std::memcpy(outHash.data, resource->xx256Hash.data(), sizeof(outHash.data)); + return true; +} + +inline bool matchBuiltinResourceHash(const nbl::system::path& path, const nbl::core::blake3_hash_t& expected) +{ + nbl::core::blake3_hash_t hash = {}; + if (!tryGetBuiltinResourceHash(path, hash)) + return false; + return hash == expected; +} + +class CBuiltinArchiveIncludeLoader final : public nbl::asset::IShaderCompiler::IIncludeLoader +{ + public: + using IIncludeLoader = nbl::asset::IShaderCompiler::IIncludeLoader; + + IIncludeLoader::found_t getInclude(const nbl::system::path& searchPath, const std::string& includeName) const override + { + std::string normalized = nbl::system::path(includeName).generic_string(); + if (!searchPath.empty()) + { + const std::string search = nbl::system::path(searchPath).generic_string(); + if (normalized.rfind(search, 0) != 0) + normalized = (nbl::system::path(search) / includeName).generic_string(); + } + + const nbl::system::SBuiltinFile* resource = nullptr; + std::string rel; + std::string_view prefix; + if (!tryGetBuiltinResource(normalized, resource, rel, prefix)) + return {}; + + IIncludeLoader::found_t ret = {}; + ret.absolutePath = nbl::system::path(std::string(prefix)) / rel; + ret.contents.assign(reinterpret_cast(resource->contents), resource->size); + if (!ret.contents.empty() && ret.contents.back() != '\n' && ret.contents.back() != '\r') + ret.contents.push_back('\n'); + std::memcpy(ret.hash.data, resource->xx256Hash.data(), sizeof(ret.hash.data)); + ret.hasHash = true; + ret.fileSize = resource->size; + ret.hasFileInfo = false; + return ret; + } +}; +#endif + +inline bool getFileInfoFast(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut) +{ +#ifdef _WIN32 + WIN32_FILE_ATTRIBUTE_DATA data = {}; + if (!GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &data)) + return false; + ULARGE_INTEGER size = {}; + size.HighPart = data.nFileSizeHigh; + size.LowPart = data.nFileSizeLow; + ULARGE_INTEGER time = {}; + time.HighPart = data.ftLastWriteTime.dwHighDateTime; + time.LowPart = data.ftLastWriteTime.dwLowDateTime; + sizeOut = size.QuadPart; + using file_clock = std::chrono::file_clock; + const auto duration = file_clock::duration{ static_cast(time.QuadPart) }; + const auto fileTp = std::chrono::time_point{ duration }; + const auto utcTp = std::chrono::clock_cast(fileTp); + timeOut = utcTp.time_since_epoch().count(); + return true; +#else + std::error_code ec; + std::filesystem::directory_entry entry(path, ec); + if (ec) + return false; + const auto time = entry.last_write_time(ec); + if (ec) + return false; + const auto size = entry.file_size(ec); + if (ec) + return false; + sizeOut = size; + const auto utcTp = std::chrono::clock_cast(time); + timeOut = utcTp.time_since_epoch().count(); + return true; +#endif +} + +inline bool getFileInfoFast(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut, nbl::system::ISystem* system) +{ + if (getFileInfoFast(path, sizeOut, timeOut)) + return true; + if (!system || path.empty()) + return false; + + nbl::system::ISystem::future_t> future; + system->createFile(future, path, nbl::system::IFile::ECF_READ); + if (!future.wait()) + return false; + nbl::core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + sizeOut = file->getSize(); + timeOut = file->getLastWriteTime().time_since_epoch().count(); + return true; +} + +inline bool getFileInfoCached(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut, nbl::system::ISystem* system) +{ + if (path.empty()) + return false; + + { + std::lock_guard lock(g_fileInfoCacheMutex); + const auto it = g_fileInfoCache.find(path); + if (it != g_fileInfoCache.end()) + { + if (!it->second.ok) + return false; + sizeOut = it->second.size; + timeOut = it->second.ticks; + return true; + } + } + + uint64_t size = 0; + int64_t ticks = 0; + const bool ok = getFileInfoFast(path, size, ticks, system); + { + std::lock_guard lock(g_fileInfoCacheMutex); + g_fileInfoCache.emplace(path, FileInfoCacheEntry{ size, ticks, ok }); + } + if (!ok) + return false; + sizeOut = size; + timeOut = ticks; + return true; +} + +template +inline void collectFileInfoMismatchesParallel(const DepContainer& deps, std::vector& out, nbl::system::ISystem* system) +{ + const size_t count = deps.size(); + if (!count) + return; + + std::vector fileInfoIndices; + fileInfoIndices.reserve(count); + std::unordered_map seenPaths; + seenPaths.reserve(count); + + for (size_t i = 0; i < count; ++i) + { + const auto& dep = deps[i]; + if (!dep.getHasFileInfo()) + { +#ifdef NBL_EMBED_BUILTIN_RESOURCES + if (!dep.getAbsolutePath().empty()) + { + if (matchBuiltinResourceHash(dep.getAbsolutePath(), dep.getHash())) + continue; + } + else + { + const nbl::system::path logicalPath(dep.getIdentifier()); + if (matchBuiltinResourceHash(logicalPath, dep.getHash())) + continue; + } +#endif + out.push_back(i); + continue; + } + const auto& path = dep.getAbsolutePath(); + if (path.empty()) + { + out.push_back(i); + continue; + } + if (seenPaths.emplace(path, true).second) + fileInfoIndices.push_back(i); + } + + const size_t fileCount = fileInfoIndices.size(); + if (!fileCount) + return; + + unsigned threads = std::thread::hardware_concurrency(); + if (!threads) + threads = 1u; + if (threads > 32u) + threads = 32u; + if (threads > fileCount) + threads = static_cast(fileCount); + + if (threads <= 1u || fileCount < 32u) + { + for (size_t k = 0; k < fileCount; ++k) + { + const size_t i = fileInfoIndices[k]; + const auto& dep = deps[i]; + const auto& path = dep.getAbsolutePath(); + uint64_t size = 0; + int64_t ticks = 0; + if (path.empty() || !getFileInfoFast(path, size, ticks) || dep.getLastWriteTime() != ticks || dep.getFileSize() != size) + out.push_back(i); + } + return; + } + + std::vector> perThread(threads); + const size_t chunk = (fileCount + threads - 1u) / threads; + std::vector workers; + workers.reserve(threads); + for (unsigned t = 0; t < threads; ++t) + { + const size_t start = t * chunk; + if (start >= fileCount) + break; + const size_t end = std::min(start + chunk, fileCount); + workers.emplace_back([&deps, &perThread, &fileInfoIndices, t, start, end, system]() + { + auto& local = perThread[t]; + for (size_t k = start; k < end; ++k) + { + const size_t i = fileInfoIndices[k]; + const auto& dep = deps[i]; + const auto& path = dep.getAbsolutePath(); + uint64_t size = 0; + int64_t ticks = 0; + if (path.empty() || !getFileInfoFast(path, size, ticks) || dep.getLastWriteTime() != ticks || dep.getFileSize() != size) + local.push_back(i); + } + }); + } + for (auto& worker : workers) + worker.join(); + for (auto& local : perThread) + out.insert(out.end(), local.begin(), local.end()); +} +} + using namespace nbl; using namespace nbl::asset; @@ -107,239 +416,10 @@ IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& syste m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); } -bool IShaderCompiler::writeDepfile( - const DepfileWriteParams& params, - const CCache::SEntry::dependency_container_t& dependencies, - const CIncludeFinder* includeFinder, - system::logger_opt_ptr logger) -{ - std::string depfilePathString; - if (!params.depfilePath.empty()) - depfilePathString = std::string(params.depfilePath); - else - depfilePathString = std::string(params.outputPath) + ".d"; - - if (depfilePathString.empty()) - { - logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return false; - } - - const auto parentDirectory = std::filesystem::path(depfilePathString).parent_path(); - if (!parentDirectory.empty() && !std::filesystem::exists(parentDirectory)) - { - if (!std::filesystem::create_directories(parentDirectory)) - { - logger.log("Failed to create parent directory for depfile.", system::ILogger::ELL_ERROR); - return false; - } - } - - std::vector depPaths; - depPaths.reserve(dependencies.size() + 1); - - auto addDepPath = [&depPaths, ¶ms](std::filesystem::path path) - { - if (path.empty()) - return; - if (path.is_relative()) - { - if (params.workingDirectory.empty()) - return; - path = std::filesystem::path(params.workingDirectory) / path; - } - std::error_code ec; - std::filesystem::path normalized = std::filesystem::weakly_canonical(path, ec); - if (ec) - { - normalized = std::filesystem::absolute(path, ec); - if (ec) - return; - } - if (normalized.empty() || !std::filesystem::exists(normalized)) - return; - auto normalizedString = normalized.generic_string(); - if (normalizedString.find_first_of("\r\n") != std::string::npos) - return; - depPaths.emplace_back(std::move(normalizedString)); - }; - - if (!params.sourceIdentifier.empty()) - { - std::filesystem::path rootPath{std::string(params.sourceIdentifier)}; - if (rootPath.is_relative()) - { - if (!params.workingDirectory.empty()) - rootPath = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / rootPath); - else - rootPath = std::filesystem::absolute(rootPath); - } - addDepPath(rootPath); - } - - for (const auto& dep : dependencies) - { - if (includeFinder) - { - IShaderCompiler::IIncludeLoader::found_t header = dep.isStandardInclude() ? - includeFinder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())) : - includeFinder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); - - if (!header) - continue; - addDepPath(header.absolutePath); - } - else - { - std::filesystem::path candidate = dep.isStandardInclude() ? std::filesystem::path(std::string(dep.getIdentifier())) : (dep.getRequestingSourceDir() / std::string(dep.getIdentifier())); - if (candidate.is_relative()) - { - if (!params.workingDirectory.empty()) - candidate = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / candidate); - else - candidate = std::filesystem::absolute(candidate); - } - addDepPath(candidate); - } - } - - std::sort(depPaths.begin(), depPaths.end()); - depPaths.erase(std::unique(depPaths.begin(), depPaths.end()), depPaths.end()); - - auto escapeDepPath = [](const std::string& path) -> std::string - { - std::string normalized = path; - std::replace(normalized.begin(), normalized.end(), '\\', '/'); - std::string out; - out.reserve(normalized.size()); - for (const char c : normalized) - { - if (c == ' ' || c == '#') - out.push_back('\\'); - if (c == '$') - { - out.push_back('$'); - out.push_back('$'); - continue; - } - out.push_back(c); - } - return out; - }; - - if (!params.system) - { - logger.log("Depfile system is null.", system::ILogger::ELL_ERROR); - return false; - } - - const auto depfilePath = std::filesystem::path(depfilePathString); - auto tempPath = depfilePath; - tempPath += ".tmp"; - params.system->deleteFile(tempPath); - - core::smart_refctd_ptr depfile; - { - system::ISystem::future_t> future; - params.system->createFile(future, tempPath, system::IFileBase::ECF_WRITE); - if (!future.wait()) - { - logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - future.acquire().move_into(depfile); - } - if (!depfile) - { - logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - - std::string targetPathString; - if (params.outputPath.empty()) - { - std::filesystem::path targetPath = depfilePathString; - if (targetPath.extension() == ".d") - targetPath.replace_extension(); - targetPathString = targetPath.generic_string(); - } - else - { - targetPathString = std::string(params.outputPath); - } - if (targetPathString.empty()) - { - logger.log("Depfile target path is empty.", system::ILogger::ELL_ERROR); - return false; - } - const std::string target = escapeDepPath(std::filesystem::path(targetPathString).generic_string()); - std::vector escapedDeps; - escapedDeps.reserve(depPaths.size()); - for (const auto& depPath : depPaths) - escapedDeps.emplace_back(escapeDepPath(depPath)); - - std::string depfileContents; - depfileContents.append(target); - depfileContents.append(":"); - if (!escapedDeps.empty()) - { - depfileContents.append(" \\\n"); - for (size_t index = 0; index < escapedDeps.size(); ++index) - { - depfileContents.append(" "); - depfileContents.append(escapedDeps[index]); - if (index + 1 < escapedDeps.size()) - depfileContents.append(" \\\n"); - } - } - depfileContents.append("\n"); - - system::IFile::success_t success; - depfile->write(success, depfileContents.data(), 0, depfileContents.size()); - if (!success) - { - logger.log("Failed to write depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - depfile = nullptr; - - params.system->deleteFile(depfilePath); - const std::error_code moveError = params.system->moveFileOrDirectory(tempPath, depfilePath); - if (moveError) - { - logger.log("Failed to replace depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - return true; -} - core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const { - const bool depfileEnabled = options.preprocessorOptions.depfile; - const bool supportsDependencies = options.getCodeContentType() == IShader::E_CONTENT_TYPE::ECT_HLSL; const auto* dependencyOverrides = options.dependencyOverrides; - auto writeDepfileFromDependencies = [&](const CCache::SEntry::dependency_container_t& dependencies) -> bool - { - if (!depfileEnabled) - return true; - - if (options.preprocessorOptions.depfilePath.empty()) - { - options.preprocessorOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return false; - } - - IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = options.preprocessorOptions.depfilePath.generic_string(); - params.depfilePath = depfilePathString; - params.sourceIdentifier = options.preprocessorOptions.sourceIdentifier; - if (!params.sourceIdentifier.empty()) - params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); - params.system = m_system.get(); - return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); - }; - const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; CCache::SEntry entry; if (options.readCache || options.writeCache) @@ -350,7 +430,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons if (options.readCache) { - auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); + auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder, true, nullptr); if (found != options.readCache->m_container.end()) { if (options.cacheHit) @@ -361,20 +441,15 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons options.writeCache->insert(std::move(writeEntry)); } auto shader = found->decompressShader(); - if (depfileEnabled && !writeDepfileFromDependencies(found->dependencies)) - return nullptr; return shader; } } - CCache::SEntry::dependency_container_t depfileDependencies; CCache::SEntry::dependency_container_t* dependenciesPtr = nullptr; if (!dependencyOverrides) { if (options.writeCache) dependenciesPtr = &entry.dependencies; - else if (depfileEnabled && supportsDependencies) - dependenciesPtr = &depfileDependencies; } auto retVal = compileToSPIRV_impl(code, options, dependenciesPtr); @@ -392,13 +467,6 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons entry.dependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); } - if (retVal && depfileEnabled && supportsDependencies) - { - const auto* deps = dependencyOverrides ? dependencyOverrides : (options.writeCache ? &entry.dependencies : &depfileDependencies); - if (!writeDepfileFromDependencies(*deps)) - return nullptr; - } - if (options.writeCache) { if (entry.setContent(retVal->getContent())) @@ -478,23 +546,40 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s f->read(succ, contents.data(), 0, size); const bool success = bool(succ); assert(success); + if (!contents.empty() && contents.back() != '\n' && contents.back() != '\r') + contents.push_back('\n'); found_t ret = {}; - ret.absolutePath = f->getFileName(); + ret.absolutePath = path; ret.contents = std::move(contents); - ret.fileSize = size; if (auto precomputed = f->getPrecomputedHash()) { static_assert(sizeof(ret.hash.data) == sizeof(*precomputed)); std::memcpy(ret.hash.data, &(*precomputed), sizeof(ret.hash.data)); ret.hasHash = true; + ret.hasFileInfo = false; + } + else + { + ret.fileSize = size; + const auto fileTime = f->getLastWriteTime(); + ret.lastWriteTime = fileTime.time_since_epoch().count(); + ret.hasFileInfo = true; } return ret; } IShaderCompiler::CIncludeFinder::CIncludeFinder(core::smart_refctd_ptr&& system) - : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(std::move(system))) + : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(core::smart_refctd_ptr(system))) + , m_system(std::move(system)) { +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto builtinLoader = core::make_smart_refctd_ptr(); + addSearchPath(std::string(nbl::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(spirv::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(boost::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(nbl::devicegen::builtin::pathPrefix), builtinLoader); +#endif addSearchPath("", m_defaultFileSystemLoader); } @@ -513,7 +598,7 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req if (retVal.fileSize == 0 && !retVal.contents.empty()) retVal.fileSize = retVal.contents.size(); - if (!retVal.absolutePath.empty()) + if (!retVal.hasFileInfo && !retVal.absolutePath.empty() && !retVal.hasHash) { std::error_code ec; const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); @@ -526,9 +611,9 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req if (!retVal.hasHash) { - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + std::array hash = {}; + core::XXHash_256(retVal.contents.data(), retVal.contents.size(), hash.data()); + std::memcpy(retVal.hash.data, hash.data(), sizeof(retVal.hash.data)); retVal.hasHash = true; } return retVal; @@ -546,7 +631,7 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req if (retVal.fileSize == 0 && !retVal.contents.empty()) retVal.fileSize = retVal.contents.size(); - if (!retVal.absolutePath.empty()) + if (!retVal.hasFileInfo && !retVal.absolutePath.empty() && !retVal.hasHash) { std::error_code ec; const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); @@ -559,9 +644,9 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req if (!retVal.hasHash) { - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + std::array hash = {}; + core::XXHash_256(retVal.contents.data(), retVal.contents.size(), hash.data()); + std::memcpy(retVal.hash.data, hash.data(), sizeof(retVal.hash.data)); retVal.hasHash = true; } return retVal; @@ -661,7 +746,7 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { - const auto found = find_impl(mainFile, finder); + const auto found = find_impl(mainFile, finder, true, nullptr); if (found==m_container.end()) return nullptr; return found->decompressShader(); @@ -669,14 +754,14 @@ core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntr bool IShaderCompiler::CCache::contains(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { - return find_impl(mainFile, finder) != m_container.end(); + return find_impl(mainFile, finder, true, nullptr) != m_container.end(); } -bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry) const +bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies, bool* depsUpdated) const { const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; const CCache::SEntry entry(cacheCode, options); - const auto found = find_impl(entry, finder); + const auto found = find_impl(entry, finder, validateDependencies, depsUpdated); if (found == m_container.end()) return false; outEntry = SEntry(*found); @@ -688,16 +773,45 @@ core::smart_refctd_ptr IShaderCompiler::CCache::decompressEntry( return entry.decompressShader(); } -IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated) const { auto found = m_container.find(mainFile); + if (found == m_container.end() || !validateDependencies) + return found; + if (depsUpdated) + *depsUpdated = false; + bool updated = false; + auto* system = finder ? finder->getSystem() : nullptr; // go through all dependencies if (found!=m_container.end()) { + std::vector mismatches; + mismatches.reserve(found->dependencies.size()); + collectFileInfoMismatchesParallel(found->dependencies, mismatches, system); + if (mismatches.empty()) + return found; + if (!finder) + return m_container.end(); + std::unordered_map fileStatus; std::unordered_map logicalStatus; - for (const auto& dependency : found->dependencies) + fileStatus.reserve(mismatches.size()); + logicalStatus.reserve(mismatches.size()); + for (size_t idx : mismatches) { + const auto& dependency = found->dependencies[idx]; + auto makeLogicalKey = [&dependency]() + { + std::string key; + key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); + key.append(dependency.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dependency.getIdentifier()); + key.push_back('|'); + key.push_back(dependency.isStandardInclude() ? '1' : '0'); + return key; + }; + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) { if (auto it = fileStatus.find(dependency.getAbsolutePath()); it != fileStatus.end()) @@ -709,13 +823,7 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ } else { - std::string key; - key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); - key.append(dependency.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dependency.getIdentifier()); - key.push_back('|'); - key.push_back(dependency.isStandardInclude() ? '1' : '0'); + auto key = makeLogicalKey(); if (auto it = logicalStatus.find(key); it != logicalStatus.end()) { if (!it->second) @@ -725,30 +833,43 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ } bool valid = false; - if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + bool precomputedChecked = false; + if (!dependency.getAbsolutePath().empty()) { - std::error_code ec; - std::filesystem::directory_entry entry(dependency.getAbsolutePath(), ec); - if (!ec) + if (auto* system = finder->getSystem()) { - const auto time = entry.last_write_time(ec); - if (!ec) + system::ISystem::future_t> future; + system->createFile(future, dependency.getAbsolutePath(), system::IFile::ECF_READ); + if (future.wait()) { - const auto ticks = time.time_since_epoch().count(); - if (dependency.getLastWriteTime() == ticks) + core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (file) { - const auto size = entry.file_size(ec); - if (!ec && size == dependency.getFileSize()) - valid = true; + if (auto precomputed = file->getPrecomputedHash()) + { + precomputedChecked = true; + core::blake3_hash_t hash = {}; + std::memcpy(hash.data, &(*precomputed), sizeof(hash.data)); + if (hash == dependency.getHash()) + valid = true; + else + { + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return m_container.end(); + } + } } } } } - if (!valid) + if (!valid && !precomputedChecked) { - if (!finder) - return m_container.end(); IIncludeLoader::found_t header; if (dependency.standardInclude) header = finder->getIncludeStandard(dependency.requestingSourceDir, dependency.identifier); @@ -760,36 +881,38 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) fileStatus.emplace(dependency.getAbsolutePath(), false); else - { - std::string key; - key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); - key.append(dependency.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dependency.getIdentifier()); - key.push_back('|'); - key.push_back(dependency.isStandardInclude() ? '1' : '0'); - logicalStatus.emplace(std::move(key), false); - } + logicalStatus.emplace(makeLogicalKey(), false); return m_container.end(); } + + valid = true; + if (header.hasFileInfo) + { + dependency.setFileInfo(header.fileSize, header.lastWriteTime, true); + updated = true; + } + } + + if (valid && dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + { + uint64_t size = 0; + int64_t ticks = 0; + if (getFileInfoCached(dependency.getAbsolutePath(), size, ticks, system)) + { + dependency.setFileInfo(size, ticks, true); + updated = true; + } } if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) fileStatus.emplace(dependency.getAbsolutePath(), true); else - { - std::string key; - key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); - key.append(dependency.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dependency.getIdentifier()); - key.push_back('|'); - key.push_back(dependency.isStandardInclude() ? '1' : '0'); - logicalStatus.emplace(std::move(key), true); - } + logicalStatus.emplace(makeLogicalKey(), true); } } + if (depsUpdated) + *depsUpdated = updated; return found; } @@ -798,28 +921,66 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const size_t shaderBufferSize = 0; core::vector offsets(m_container.size()); core::vector sizes(m_container.size()); - json entries; + json entries = json::array(); core::vector shaderCreationParams; + std::vector depsBuffer; + depsBuffer.reserve(m_container.size() * 64u); + + auto write_bytes = [](std::vector& out, const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](std::vector& out, uint32_t value) + { + write_bytes(out, &value, sizeof(value)); + }; + auto write_string = [&write_u32, &write_bytes](std::vector& out, std::string_view value) + { + write_u32(out, static_cast(value.size())); + if (!value.empty()) + write_bytes(out, value.data(), value.size()); + }; - // In a first loop over entries we add all entries and their shader creation parameters to a json, and get the size of the shaders buffer + write_u32(depsBuffer, static_cast(m_container.size())); size_t i = 0u; for (auto& entry : m_container) { - // Add the entry as a json array - entries.emplace_back(entry); + json entryJson{ + { "mainFileContents", entry.mainFileContents }, + { "compilerArgs", entry.compilerArgs }, + { "hash", entry.hash.data }, + { "lookupHash", entry.lookupHash }, + { "uncompressedContentHash", entry.uncompressedContentHash.data }, + { "uncompressedSize", entry.uncompressedSize }, + }; + entries.emplace_back(std::move(entryJson)); - // We keep a copy of the offsets and the sizes of each shader. This is so that later on, when we add the shaders to the buffer after json creation - // (where the params array has been moved) we don't have to read the json to get the offsets again offsets[i] = shaderBufferSize; sizes[i] = entry.spirv->getSize(); - - // And add the params to the shader creation parameters array shaderCreationParams.emplace_back(entry.compilerArgs.stage, entry.compilerArgs.preprocessorArgs.sourceIdentifier.data(), sizes[i], shaderBufferSize); - // Enlarge the shader buffer by the size of the current shader shaderBufferSize += sizes[i]; + + write_u32(depsBuffer, static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + write_string(depsBuffer, dir); + write_string(depsBuffer, dep.getIdentifier()); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(depsBuffer, abs); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(depsBuffer, &standardInclude, sizeof(standardInclude)); + write_bytes(depsBuffer, dep.getHash().data, sizeof(dep.getHash().data)); + const uint64_t fileSize = dep.getFileSize(); + write_bytes(depsBuffer, &fileSize, sizeof(fileSize)); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_bytes(depsBuffer, &lastWriteTime, sizeof(lastWriteTime)); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(depsBuffer, &hasFileInfo, sizeof(hasFileInfo)); + } i++; } - // Create the containerJson json containerJson{ { "version", VERSION }, { "entries", std::move(entries) }, @@ -828,67 +989,186 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const std::string dumpedContainerJson = std::move(containerJson.dump()); uint64_t dumpedContainerJsonLength = dumpedContainerJson.size(); - // Create a buffer able to hold all shaders + the containerJson - size_t retValSize = shaderBufferSize + SHADER_BUFFER_SIZE_BYTES + dumpedContainerJsonLength; + size_t retValSize = shaderBufferSize + SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + dumpedContainerJsonLength + depsBuffer.size(); core::vector retVal(retValSize); - // first SHADER_BUFFER_SIZE_BYTES (8) in the buffer are the size of the shader buffer memcpy(retVal.data(), &shaderBufferSize, SHADER_BUFFER_SIZE_BYTES); + memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES, &dumpedContainerJsonLength, sizeof(uint64_t)); - // Loop over entries again, adding each one's shader to the buffer. i = 0u; + const size_t shaderOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); for (auto& entry : m_container) { - memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.spirv->getPointer(), sizes[i]); + memcpy(retVal.data() + shaderOffset + offsets[i], entry.spirv->getPointer(), sizes[i]); i++; } - // Might as well memcpy everything - memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, dumpedContainerJson.data(), dumpedContainerJsonLength); + const size_t jsonOffset = shaderOffset + shaderBufferSize; + memcpy(retVal.data() + jsonOffset, dumpedContainerJson.data(), dumpedContainerJsonLength); + if (!depsBuffer.empty()) + memcpy(retVal.data() + jsonOffset + dumpedContainerJsonLength, depsBuffer.data(), depsBuffer.size()); auto memoryResource = core::make_smart_refctd_ptr>(std::move(retVal)); return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory); } -core::smart_refctd_ptr IShaderCompiler::CCache::deserialize(const std::span serializedCache) +core::smart_refctd_ptr IShaderCompiler::CCache::deserialize(const std::span serializedCache, bool skipDependencies) { auto retVal = core::make_smart_refctd_ptr(); - // First get the size of the shader buffer, stored in the first 8 bytes - const uint64_t* cacheStart = reinterpret_cast(serializedCache.data()); - uint64_t shaderBufferSize = cacheStart[0]; - // Next up get the json that stores the container data - std::span cacheAsChar = { reinterpret_cast(serializedCache.data()), serializedCache.size() }; - std::string_view containerJsonString(cacheAsChar.begin() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, cacheAsChar.end()); - json containerJson = json::parse(containerJsonString); + if (serializedCache.size() < SHADER_BUFFER_SIZE_BYTES) + return nullptr; + + uint64_t shaderBufferSize = 0; + std::memcpy(&shaderBufferSize, serializedCache.data(), SHADER_BUFFER_SIZE_BYTES); - // Check that this cache is from the currently supported version + const size_t minOldHeader = SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + if (serializedCache.size() < minOldHeader) + return nullptr; + + bool hasBinaryDeps = false; + uint64_t jsonSize = 0; + size_t jsonOffset = 0; + size_t depsOffset = 0; + size_t shaderOffset = SHADER_BUFFER_SIZE_BYTES; + + const size_t minNewHeader = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (serializedCache.size() >= minNewHeader) { - std::string version; - containerJson.at("version").get_to(version); - if (version != VERSION) { - return nullptr; + std::memcpy(&jsonSize, serializedCache.data() + SHADER_BUFFER_SIZE_BYTES, sizeof(jsonSize)); + const size_t candidateJsonOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (candidateJsonOffset + jsonSize <= serializedCache.size()) + { + hasBinaryDeps = true; + jsonOffset = candidateJsonOffset; + depsOffset = candidateJsonOffset + jsonSize; + shaderOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); } } - // Now retrieve two vectors, one with the entries and one with the extra data to recreate the CPUShaders + if (!hasBinaryDeps) + { + jsonOffset = SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + jsonSize = serializedCache.size() - jsonOffset; + shaderOffset = SHADER_BUFFER_SIZE_BYTES; + } + + std::string_view containerJsonString(reinterpret_cast(serializedCache.data() + jsonOffset), jsonSize); + json containerJson; + if (skipDependencies) + { + bool skipNext = false; + auto cb = [&skipNext](int, json::parse_event_t event, json& parsed) + { + if (event == json::parse_event_t::key && parsed.is_string() && parsed.get_ref() == "dependencies") + { + skipNext = true; + return true; + } + if (skipNext) + { + skipNext = false; + return false; + } + return true; + }; + containerJson = json::parse(containerJsonString, cb, true, true); + } + else + { + containerJson = json::parse(containerJsonString); + } + + std::string version; + containerJson.at("version").get_to(version); + if (version != VERSION) + return nullptr; + std::vector entries; std::vector shaderCreationParams; containerJson.at("entries").get_to(entries); containerJson.at("shaderCreationParams").get_to(shaderCreationParams); - // We must now recreate the shaders, add them to each entry, then move the entry into the multiset for (auto i = 0u; i < entries.size(); i++) { - // Create buffer to hold the code auto code = ICPUBuffer::create({ shaderCreationParams[i].codeByteSize }); - // Copy the shader bytecode into the buffer - - memcpy(code->getPointer(), serializedCache.data() + SHADER_BUFFER_SIZE_BYTES + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); + memcpy(code->getPointer(), serializedCache.data() + shaderOffset + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); code->setContentHash(code->computeContentHash()); entries[i].spirv = std::move(code); + } - retVal->insert(std::move(entries[i])); + if (hasBinaryDeps && !skipDependencies) + { + auto read_bytes = [](const std::span data, size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](const std::span data, size_t& offset, uint32_t& out) -> bool + { + return read_bytes(data, offset, &out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](const std::span data, size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(data, offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = depsOffset; + uint32_t entryCount = 0; + if (!read_u32(serializedCache, offset, entryCount)) + return nullptr; + if (entryCount != entries.size()) + return nullptr; + + for (uint32_t i = 0; i < entryCount; ++i) + { + uint32_t depCount = 0; + if (!read_u32(serializedCache, offset, depCount)) + return nullptr; + entries[i].dependencies.clear(); + entries[i].dependencies.reserve(depCount); + for (uint32_t d = 0; d < depCount; ++d) + { + std::string dir; + std::string identifier; + std::string absolutePath; + if (!read_string(serializedCache, offset, dir)) + return nullptr; + if (!read_string(serializedCache, offset, identifier)) + return nullptr; + if (!read_string(serializedCache, offset, absolutePath)) + return nullptr; + uint8_t standardInclude = 0; + if (!read_bytes(serializedCache, offset, &standardInclude, sizeof(standardInclude))) + return nullptr; + core::blake3_hash_t hash = {}; + if (!read_bytes(serializedCache, offset, hash.data, sizeof(hash.data))) + return nullptr; + uint64_t fileSize = 0; + if (!read_bytes(serializedCache, offset, &fileSize, sizeof(fileSize))) + return nullptr; + int64_t lastWriteTime = 0; + if (!read_bytes(serializedCache, offset, &lastWriteTime, sizeof(lastWriteTime))) + return nullptr; + uint8_t hasFileInfo = 0; + if (!read_bytes(serializedCache, offset, &hasFileInfo, sizeof(hasFileInfo))) + return nullptr; + entries[i].dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + } } + for (auto& entry : entries) + retVal->insert(std::move(entry)); + return retVal; } @@ -981,7 +1261,15 @@ IShaderCompiler::CPreprocessCache::SProbeResult IShaderCompiler::CPreprocessCach } const bool prefixMatch = cache->getEntry().prefixHash == result.prefixHash; - const bool depsValid = cache->validateDependencies(finder); + if (!prefixMatch) + { + result.cacheHit = false; + result.status = EProbeStatus::PrefixChanged; + return result; + } + bool depsUpdated = false; + const bool depsValid = cache->validateDependencies(finder, &depsUpdated); + result.depsUpdated = depsUpdated; if (prefixMatch && depsValid) { result.cacheHit = true; @@ -1037,6 +1325,8 @@ IShaderCompiler::SPreprocessCacheResult IShaderCompiler::preprocessWithCache(std { result.cacheHit = true; result.cacheUsed = true; + if (probe.depsUpdated) + result.cacheUpdated = true; } else { @@ -1080,6 +1370,7 @@ core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize( { if (!m_hasEntry) return nullptr; + ensurePrefixLoaded(); auto write_bytes = [](std::vector& out, const void* data, size_t size) { @@ -1104,7 +1395,8 @@ core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize( write_string(out, VERSION); write_bytes(out, &m_entry.prefixHash, sizeof(m_entry.prefixHash)); write_u32(out, m_entry.pragmaStage); - write_string(out, m_entry.preprocessedPrefix); + const uint32_t prefixSize = static_cast(m_entry.preprocessedPrefix.size()); + write_u32(out, prefixSize); write_u32(out, static_cast(m_entry.macroDefs.size())); for (const auto& macro : m_entry.macroDefs) @@ -1132,6 +1424,8 @@ core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize( const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; write_bytes(out, &hasFileInfo, sizeof(hasFileInfo)); } + if (prefixSize) + write_bytes(out, m_entry.preprocessedPrefix.data(), m_entry.preprocessedPrefix.size()); auto buffer = ICPUBuffer::create({ out.size() }); if (!buffer) @@ -1188,7 +1482,8 @@ core::smart_refctd_ptr IShaderCompiler::CPrep return nullptr; if (!read_u32(serializedCache, offset, entry.pragmaStage)) return nullptr; - if (!read_string(serializedCache, offset, entry.preprocessedPrefix)) + uint32_t prefixSize = 0; + if (!read_u32(serializedCache, offset, prefixSize)) return nullptr; uint32_t macroCount = 0; @@ -1251,11 +1546,23 @@ core::smart_refctd_ptr IShaderCompiler::CPrep entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); } + if (offset + prefixSize > serializedCache.size()) + return nullptr; + if (prefixSize) + { + entry.preprocessedPrefix.assign(reinterpret_cast(serializedCache.data() + offset), prefixSize); + offset += prefixSize; + } + + retVal->m_prefixLoaded = true; + retVal->m_backingPath.clear(); + retVal->m_prefixOffset = 0; + retVal->m_prefixSize = 0; retVal->m_hasEntry = true; return retVal; } -core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::loadFromFile(const system::path& path, ELoadStatus& status) +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::loadFromFile(const system::path& path, ELoadStatus& status, bool loadPrefix) { status = ELoadStatus::Missing; if (!std::filesystem::exists(path)) @@ -1277,22 +1584,193 @@ core::smart_refctd_ptr IShaderCompiler::CPrep return nullptr; } - std::vector data(size); - if (!in.read(reinterpret_cast(data.data()), data.size())) + auto read_bytes = [&in](void* dst, size_t count) -> bool + { + return bool(in.read(reinterpret_cast(dst), count)); + }; + auto read_u32 = [&read_bytes](uint32_t& out) -> bool + { + return read_bytes(&out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](std::string& out) -> bool + { + uint32_t len = 0; + if (!read_u32(len)) + return false; + if (!len) + { + out.clear(); + return true; + } + out.resize(len); + return read_bytes(out.data(), len); + }; + + uint32_t magic = 0; + if (!read_u32(magic) || magic != 0x50435250u) { status = ELoadStatus::Invalid; return nullptr; } - auto cache = deserialize(std::span(data.data(), data.size())); - if (!cache) + std::string version; + if (!read_string(version) || version != VERSION) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + auto retVal = core::make_smart_refctd_ptr(); + auto& entry = retVal->m_entry; + if (!read_bytes(&entry.prefixHash, sizeof(entry.prefixHash))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + if (!read_u32(entry.pragmaStage)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + uint32_t prefixSize = 0; + if (!read_u32(prefixSize)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + uint32_t macroCount = 0; + if (!read_u32(macroCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.macroDefs.clear(); + entry.macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(macro)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.macroDefs.emplace_back(std::move(macro)); + } + + uint32_t flagCount = 0; + if (!read_u32(flagCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dxcFlags.clear(); + entry.dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(flag)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(depCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dependencies.clear(); + entry.dependencies.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + if (!read_string(dir) || !read_string(identifier)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + std::string absolutePath; + if (!read_string(absolutePath)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint8_t standardInclude = 0; + if (!read_bytes(&standardInclude, sizeof(standardInclude))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + core::blake3_hash_t hash = {}; + if (!read_bytes(hash.data, sizeof(hash.data))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint64_t fileSize = 0; + if (!read_bytes(&fileSize, sizeof(fileSize))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + int64_t lastWriteTime = 0; + if (!read_bytes(&lastWriteTime, sizeof(lastWriteTime))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint8_t hasFileInfo = 0; + if (!read_bytes(&hasFileInfo, sizeof(hasFileInfo))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + + const auto prefixOffset = static_cast(in.tellg()); + if (prefixOffset + prefixSize > size) { status = ELoadStatus::Invalid; return nullptr; } + if (loadPrefix) + { + entry.preprocessedPrefix.clear(); + if (prefixSize) + { + entry.preprocessedPrefix.resize(prefixSize); + if (!read_bytes(entry.preprocessedPrefix.data(), prefixSize)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + } + retVal->m_prefixLoaded = true; + retVal->m_backingPath.clear(); + retVal->m_prefixOffset = 0; + retVal->m_prefixSize = 0; + } + else + { + if (prefixSize) + in.seekg(static_cast(prefixSize), std::ios::cur); + retVal->m_prefixLoaded = false; + retVal->m_backingPath = path; + retVal->m_prefixOffset = prefixOffset; + retVal->m_prefixSize = prefixSize; + } + + retVal->m_hasEntry = true; status = ELoadStatus::Loaded; - return cache; + return retVal; } bool IShaderCompiler::CPreprocessCache::writeToFile(const system::path& path, const CPreprocessCache& cache) @@ -1313,15 +1791,40 @@ bool IShaderCompiler::CPreprocessCache::writeToFile(const system::path& path, co return bool(out); } -bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder) const +bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder, bool* depsUpdated) const { if (!m_hasEntry || !finder) return false; + if (depsUpdated) + *depsUpdated = false; + bool updated = false; + auto* system = finder->getSystem(); + + std::vector mismatches; + mismatches.reserve(m_entry.dependencies.size()); + collectFileInfoMismatchesParallel(m_entry.dependencies, mismatches, system); + if (mismatches.empty()) + return true; std::unordered_map fileStatus; std::unordered_map logicalStatus; - for (const auto& dep : m_entry.dependencies) + fileStatus.reserve(mismatches.size()); + logicalStatus.reserve(mismatches.size()); + for (size_t idx : mismatches) { + const auto& dep = m_entry.dependencies[idx]; + auto makeLogicalKey = [&dep]() + { + std::string key; + key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + return key; + }; + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) { if (auto it = fileStatus.find(dep.getAbsolutePath()); it != fileStatus.end()) @@ -1333,13 +1836,7 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde } else { - std::string key; - key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); - key.append(dep.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dep.getIdentifier()); - key.push_back('|'); - key.push_back(dep.isStandardInclude() ? '1' : '0'); + auto key = makeLogicalKey(); if (auto it = logicalStatus.find(key); it != logicalStatus.end()) { if (!it->second) @@ -1348,76 +1845,128 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde } } - bool valid = false; - if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) - { - std::error_code ec; - std::filesystem::directory_entry entry(dep.getAbsolutePath(), ec); - if (!ec) + bool valid = false; + bool precomputedChecked = false; + if (system && !dep.getAbsolutePath().empty()) { - const auto time = entry.last_write_time(ec); - if (!ec) + system::ISystem::future_t> future; + system->createFile(future, dep.getAbsolutePath(), system::IFile::ECF_READ); + if (future.wait()) { - const auto ticks = time.time_since_epoch().count(); - if (dep.getLastWriteTime() == ticks) + core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (file) { - const auto size = entry.file_size(ec); - if (!ec && size == dep.getFileSize()) - valid = true; + if (auto precomputed = file->getPrecomputedHash()) + { + precomputedChecked = true; + core::blake3_hash_t hash = {}; + std::memcpy(hash.data, &(*precomputed), sizeof(hash.data)); + if (hash == dep.getHash()) + { + valid = true; + if (!dep.getHasFileInfo()) + { + dep.setFileInfo(file->getSize(), file->getLastWriteTime().time_since_epoch().count(), true); + updated = true; + } + } + else + { + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return false; + } + } } } } - } - if (!valid) + if (!valid && !precomputedChecked) { + const std::string identifier(dep.getIdentifier()); IIncludeLoader::found_t header; if (dep.isStandardInclude()) - header = finder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + header = finder->getIncludeStandard(dep.getRequestingSourceDir(), identifier); else - header = finder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); - - if (!header || header.hash != dep.getHash()) + header = finder->getIncludeRelative(dep.getRequestingSourceDir(), identifier); + if (header.hash != dep.getHash()) { if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) fileStatus.emplace(dep.getAbsolutePath(), false); else - { - std::string key; - key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); - key.append(dep.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dep.getIdentifier()); - key.push_back('|'); - key.push_back(dep.isStandardInclude() ? '1' : '0'); - logicalStatus.emplace(std::move(key), false); - } + logicalStatus.emplace(makeLogicalKey(), false); return false; } + + valid = true; + if (header.hasFileInfo) + { + dep.setFileInfo(header.fileSize, header.lastWriteTime, true); + updated = true; + } + } + + if (valid && dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + { + uint64_t size = 0; + int64_t ticks = 0; + if (getFileInfoCached(dep.getAbsolutePath(), size, ticks, system)) + { + dep.setFileInfo(size, ticks, true); + updated = true; + } } if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) fileStatus.emplace(dep.getAbsolutePath(), true); else - { - std::string key; - key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); - key.append(dep.getRequestingSourceDir().string()); - key.push_back('|'); - key.append(dep.getIdentifier()); - key.push_back('|'); - key.push_back(dep.isStandardInclude() ? '1' : '0'); - logicalStatus.emplace(std::move(key), true); - } + logicalStatus.emplace(makeLogicalKey(), true); } + + if (depsUpdated) + *depsUpdated = updated; return true; } +void IShaderCompiler::CPreprocessCache::ensurePrefixLoaded() const +{ + if (m_prefixLoaded) + return; + if (m_prefixSize == 0) + { + m_prefixLoaded = true; + return; + } + if (m_backingPath.empty()) + return; + + std::ifstream in(m_backingPath, std::ios::binary); + if (!in) + return; + in.seekg(static_cast(m_prefixOffset), std::ios::beg); + if (!in) + return; + + std::string prefix; + prefix.resize(m_prefixSize); + if (!in.read(prefix.data(), prefix.size())) + return; + + m_entry.preprocessedPrefix = std::move(prefix); + m_prefixLoaded = true; +} + + std::string IShaderCompiler::CPreprocessCache::buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const { if (!m_hasEntry) return std::string(body); + ensurePrefixLoaded(); std::string out; size_t reserve = m_entry.preprocessedPrefix.size() + body.size(); for (const auto& m : m_entry.macroDefs) diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index 3f0c882a72..4893737da7 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -200,7 +200,8 @@ inline void from_json(const json& j, SEntry& entry) j.at("compilerArgs").get_to(entry.compilerArgs); j.at("hash").get_to(entry.hash.data); j.at("lookupHash").get_to(entry.lookupHash); - j.at("dependencies").get_to(entry.dependencies); + if (j.contains("dependencies")) + j.at("dependencies").get_to(entry.dependencies); j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); j.at("uncompressedSize").get_to(entry.uncompressedSize); entry.spirv = nullptr; diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index a5275acad8..75992181e4 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -17,6 +17,7 @@ #include #include "nbl/asset/metadata/CHLSLMetadata.h" #include "nbl/asset/utils/shaderCompiler_serialization.h" +#include "nbl/core/hash/blake.h" #include "nbl/core/hash/fnv1a64.h" #include "nlohmann/json.hpp" @@ -86,8 +87,8 @@ class ShaderLogger final : public IThreadsafeLogger return; const auto parent = std::filesystem::path(m_logPath).parent_path(); - if (!parent.empty() && !std::filesystem::exists(parent)) - std::filesystem::create_directories(parent); + if (!parent.empty() && m_system && !m_system->exists(parent, IFileBase::ECF_READ)) + m_system->createDirectory(parent); for (auto attempt = 0u; attempt < kDeleteRetries; ++attempt) { @@ -386,6 +387,33 @@ class ShaderCompiler final : public IApplicationFramework m_include_search_paths.emplace_back(m_arguments[i + 1]); } + auto addIncludePath = [&](const std::filesystem::path& path) + { + if (path.empty()) + return; + std::error_code ec; + const auto normalized = std::filesystem::weakly_canonical(path, ec).generic_string(); + if (normalized.empty()) + return; + if (std::find(m_include_search_paths.begin(), m_include_search_paths.end(), normalized) == m_include_search_paths.end()) + m_include_search_paths.emplace_back(normalized); + }; + + if (!rawArgs.empty()) + { + std::error_code ec; + std::filesystem::path exePath = rawArgs.front(); + if (std::filesystem::exists(exePath, ec)) + { + exePath = std::filesystem::weakly_canonical(exePath, ec); + if (!ec) + { + const auto root = exePath.parent_path().parent_path().parent_path(); + addIncludePath(root / "include"); + } + } + } + if (verbose) { auto join = [](const std::vector& items) @@ -427,7 +455,7 @@ class ShaderCompiler final : public IApplicationFramework const auto start = std::chrono::high_resolution_clock::now(); const std::string preprocessedOutputPath = outputFilepath + ".pre.hlsl"; - const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, preprocessedOutputPath, verbose); + const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, outputFilepath, preprocessedOutputPath, verbose); const auto end = std::chrono::high_resolution_clock::now(); const char* const op = preprocessOnly ? "preprocessing" : "compilation"; @@ -447,24 +475,50 @@ class ShaderCompiler final : public IApplicationFramework } const auto outParent = std::filesystem::path(outputFilepath).parent_path(); - if (!outParent.empty() && !std::filesystem::exists(outParent)) + if (!outParent.empty() && m_system && !m_system->exists(outParent, IFileBase::ECF_READ)) { - if (!std::filesystem::create_directories(outParent)) + if (!m_system->createDirectory(outParent)) { m_logger->log("Failed to create parent directory for output %s.", ILogger::ELL_ERROR, outputFilepath.c_str()); return false; } } - if (!writeBinaryFile(m_system.get(), std::filesystem::path(outputFilepath), job.view.data(), job.view.size())) + if (!job.view.empty()) { - m_logger->log("Failed to write output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - return false; + const auto writeStart = std::chrono::high_resolution_clock::now(); + if (!writeBinaryFile(m_system.get(), std::filesystem::path(outputFilepath), job.view.data(), job.view.size())) + { + m_logger->log("Failed to write output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + return false; + } + OutputHashRecord record = {}; + record.size = job.view.size(); + { + core::blake3_hasher hasher; + hasher.update(job.view.data(), job.view.size()); + record.hash = static_cast(hasher); + } + const auto hashPath = makeOutputHashPath(std::filesystem::path(outputFilepath)); + if (!writeBinaryFile(m_system.get(), hashPath, &record, sizeof(record))) + m_logger->log("Failed to write output hash file: %s", ILogger::ELL_WARNING, hashPath.string().c_str()); + const auto writeEnd = std::chrono::high_resolution_clock::now(); + if (verbose) + { + const auto duration = std::chrono::duration_cast(writeEnd - writeStart).count(); + m_logger->log("Write output took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(duration)); + } + } + else if (verbose) + { + m_logger->log("Output up to date. Skipping write.", ILogger::ELL_DEBUG); } const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); m_logger->log("Total took: %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + flushSystemQueue(m_system.get(), std::filesystem::path(outputFilepath)); + return true; } @@ -507,19 +561,31 @@ class ShaderCompiler final : public IApplicationFramework std::string_view view; }; + struct OutputHashRecord + { + core::blake3_hash_t hash = {}; + uint64_t size = 0; + }; + static std::filesystem::path makeCachePath(std::filesystem::path outputPath) { outputPath += ".ppcache"; return outputPath; } + static std::filesystem::path makeOutputHashPath(std::filesystem::path outputPath) + { + outputPath += ".hash"; + return outputPath; + } + static std::filesystem::path makePreprocessCachePath(std::filesystem::path outputPath) { outputPath += ".ppcache.pre"; return outputPath; } - static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status) + static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status, bool skipDependencies) { status = CacheLoadStatus::Missing; if (!system) @@ -531,17 +597,21 @@ class ShaderCompiler final : public IApplicationFramework if (!system->exists(path, IFileBase::ECF_READ)) return nullptr; - ISystem::future_t> future; - system->createFile(future, path, IFileBase::ECF_READ); - if (!future.wait()) + auto openFile = [&](const core::bitflag flags) -> smart_refctd_ptr { - status = CacheLoadStatus::Invalid; - return nullptr; - } + ISystem::future_t> future; + system->createFile(future, path, flags); + if (!future.wait()) + return nullptr; + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + return file; + }; - smart_refctd_ptr file; - if (auto lock = future.acquire(); lock) - lock.move_into(file); + smart_refctd_ptr file = openFile(bitflag(IFileBase::ECF_READ) | IFileBase::ECF_MAPPABLE); + if (!file) + file = openFile(bitflag(IFileBase::ECF_READ)); if (!file) { status = CacheLoadStatus::Invalid; @@ -555,16 +625,27 @@ class ShaderCompiler final : public IApplicationFramework return nullptr; } - std::vector data(size); - IFile::success_t succ; - file->read(succ, data.data(), 0, size); - if (!succ || succ.getBytesProcessed(true) != size) + const auto* mapped = static_cast(file->getMappedPointer()); + std::vector data; + std::span serialized; + if (mapped) { - status = CacheLoadStatus::Invalid; - return nullptr; + serialized = std::span(mapped, size); + } + else + { + data.resize(size); + IFile::success_t succ; + file->read(succ, data.data(), 0, size); + if (!succ || succ.getBytesProcessed(true) != size) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + serialized = std::span(data.data(), data.size()); } - auto cache = IShaderCompiler::CCache::deserialize(std::span(data.data(), data.size())); + auto cache = IShaderCompiler::CCache::deserialize(serialized, skipDependencies); if (!cache) { status = CacheLoadStatus::Invalid; @@ -575,19 +656,84 @@ class ShaderCompiler final : public IApplicationFramework return cache; } + static bool getFileInfo(system::ISystem* system, const std::filesystem::path& path, uint64_t& sizeOut, int64_t& timeOut) + { + if (!system || !system->exists(path, IFileBase::ECF_READ)) + return false; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + + sizeOut = file->getSize(); + timeOut = file->getLastWriteTime().time_since_epoch().count(); + return sizeOut != 0; + } + + static bool readBinaryFile(system::ISystem* system, const std::filesystem::path& path, void* data, size_t size) + { + if (!system) + return false; + if (!system->exists(path, IFileBase::ECF_READ)) + return false; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() != size) + return false; + + IFile::success_t succ; + file->read(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; + } + static bool writeBinaryFile(system::ISystem* system, const std::filesystem::path& path, const void* data, size_t size) { if (!system) return false; const auto parent = path.parent_path(); - if (!parent.empty() && !std::filesystem::exists(parent)) - std::filesystem::create_directories(parent); + if (!parent.empty() && !system->exists(parent, IFileBase::ECF_READ)) + system->createDirectory(parent); - system->deleteFile(path); + if (!system->exists(path, IFileBase::ECF_READ)) + { + ISystem::future_t> future; + system->createFile(future, path, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + + IFile::success_t succ; + file->write(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; + } + + std::filesystem::path tempPath = path; + tempPath += ".tmp"; + tempPath += std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + system->deleteFile(tempPath); ISystem::future_t> future; - system->createFile(future, path, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); + system->createFile(future, tempPath, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); if (!future.wait()) return false; @@ -599,7 +745,34 @@ class ShaderCompiler final : public IApplicationFramework IFile::success_t succ; file->write(succ, data, 0, size); - return succ.getBytesProcessed(true) == size; + if (succ.getBytesProcessed(true) != size) + { + system->deleteFile(tempPath); + return false; + } + + file = nullptr; + system->deleteFile(path); + const std::error_code moveError = system->moveFileOrDirectory(tempPath, path); + if (moveError) + { + system->deleteFile(tempPath); + return false; + } + return true; + } + + static void flushSystemQueue(system::ISystem* system, const std::filesystem::path& path) + { + if (!system) + return; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return; + if (auto lock = future.acquire(); lock) + lock.discard(); } static bool writeShaderCache(system::ISystem* system, const std::filesystem::path& path, const IShaderCompiler::CCache& cache) @@ -630,6 +803,7 @@ class ShaderCompiler final : public IApplicationFramework for (const auto& a : args) { + if (split(a, "-I")) continue; if (split(a, "-MF")) continue; if (split(a, "-Fo")) continue; if (split(a, "-Fc")) continue; @@ -694,7 +868,7 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Saved \"%s\"", ILogger::ELL_INFO, oPath.string().c_str()); } - RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view preprocessedOutputPath, const bool verbose) + RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view outputFilepath, std::string_view preprocessedOutputPath, const bool verbose) { RunResult r; auto makeIncludeFinder = [&]() @@ -707,12 +881,15 @@ class ShaderCompiler final : public IApplicationFramework }; const char* codePtr = (const char*)shader->getContent()->getPointer(); - std::string_view code(codePtr, std::strlen(codePtr)); + const size_t codeSize = shader->getContent()->getSize(); + std::string_view code(codePtr, codeSize); + if (!code.empty() && code.back() == '\0') + code.remove_suffix(1); CHLSLCompiler::SPreprocessorOptions preOpt = {}; preOpt.sourceIdentifier = sourceIdentifier; preOpt.logger = m_logger.get(); preOpt.forceIncludes = std::span(m_force_includes); - preOpt.depfile = dep.enabled; + preOpt.depfile = false; preOpt.depfilePath = dep.path; preOpt.codeForCache = code; @@ -731,20 +908,25 @@ class ShaderCompiler final : public IApplicationFramework const bool useShaderCache = shaderCache.enabled && !preprocessOnly; const bool usePreCache = preCache.enabled && !preprocessOnly; + const bool validateCacheDeps = true; struct ShaderCacheProbeResult { CacheLoadStatus status = CacheLoadStatus::Missing; bool hit = false; bool entryReady = false; + bool depsUpdated = false; smart_refctd_ptr cacheObj; IShaderCompiler::CCache::SEntry entry; std::chrono::nanoseconds duration = {}; + std::chrono::nanoseconds loadDuration = {}; + std::chrono::nanoseconds validateDuration = {}; }; struct PreprocessCacheProbeResult { bool skipped = false; + bool updateSkipped = false; bool ok = false; IShaderCompiler::SPreprocessCacheResult result = {}; IShaderCompiler::CPreprocessCache::ELoadStatus loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; @@ -760,15 +942,21 @@ class ShaderCompiler final : public IApplicationFramework if (useShaderCache) { const auto start = clock_t::now(); - auto finder = makeIncludeFinder(); - shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status); + const auto loadStart = clock_t::now(); + shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false); + const auto loadEnd = clock_t::now(); if (!shaderProbe.cacheObj) shaderProbe.cacheObj = make_smart_refctd_ptr(); if (shaderProbe.status == CacheLoadStatus::Loaded) { - shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder.get(), shaderProbe.entry); + auto finder = makeIncludeFinder(); + const auto validateStart = clock_t::now(); + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder.get(), shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); + const auto validateEnd = clock_t::now(); shaderProbe.entryReady = shaderProbe.hit; + shaderProbe.validateDuration = validateEnd - validateStart; } + shaderProbe.loadDuration = loadEnd - loadStart; shaderProbe.duration = clock_t::now() - start; } @@ -784,7 +972,7 @@ class ShaderCompiler final : public IApplicationFramework { const auto start = clock_t::now(); auto finder = makeIncludeFinder(); - preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus); + preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, false); if (!preProbe.cacheObj) preProbe.cacheObj = make_smart_refctd_ptr(); @@ -819,7 +1007,7 @@ class ShaderCompiler final : public IApplicationFramework return std::chrono::duration_cast(duration).count(); }; - auto writeDepfileFromDependencies = [&](const IShaderCompiler::CCache::SEntry::dependency_container_t& dependencies) -> bool + auto writeDepfileFromDependencies = [&](const IShaderCompiler::CCache::SEntry::dependency_container_t& dependencies, bool allowSkipIfExists) -> bool { if (!dep.enabled) return true; @@ -828,26 +1016,125 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Depfile path is empty.", ILogger::ELL_ERROR); return false; } - IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = preOpt.depfilePath.generic_string(); - params.depfilePath = depfilePathString; - params.sourceIdentifier = preOpt.sourceIdentifier; - if (!params.sourceIdentifier.empty()) - params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); - params.system = m_system.get(); - return IShaderCompiler::writeDepfile(params, dependencies, nullptr, preOpt.logger); + if (allowSkipIfExists && m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ)) + return true; + + auto escapeDepPath = [](const std::string& path) -> std::string + { + std::string normalized = path; + std::replace(normalized.begin(), normalized.end(), '\\', '/'); + std::string out; + out.reserve(normalized.size()); + for (const char c : normalized) + { + if (c == ' ' || c == '#') + out.push_back('\\'); + if (c == '$') + { + out.push_back('$'); + out.push_back('$'); + continue; + } + out.push_back(c); + } + return out; + }; + + std::vector depPaths; + depPaths.reserve(dependencies.size() + 1); + + auto addDepPath = [&](std::filesystem::path path) + { + if (path.empty()) + return; + if (path.is_relative()) + return; + auto normalized = path.generic_string(); + if (normalized.empty() || normalized.find_first_of("\r\n") != std::string::npos) + return; + depPaths.emplace_back(std::move(normalized)); + }; + + if (!preOpt.sourceIdentifier.empty()) + addDepPath(std::filesystem::path(std::string(preOpt.sourceIdentifier))); + + for (const auto& depEntry : dependencies) + { + if (!depEntry.getHasFileInfo()) + continue; + const auto& absPath = depEntry.getAbsolutePath(); + if (absPath.empty()) + continue; + addDepPath(absPath); + } + + std::sort(depPaths.begin(), depPaths.end()); + depPaths.erase(std::unique(depPaths.begin(), depPaths.end()), depPaths.end()); + + std::filesystem::path targetPath = preOpt.depfilePath; + if (targetPath.extension() == ".d") + targetPath.replace_extension(); + const std::string target = escapeDepPath(targetPath.generic_string()); + + std::string depfileContents; + depfileContents.append(target); + depfileContents.append(":"); + if (!depPaths.empty()) + { + depfileContents.append(" \\\n"); + for (size_t index = 0; index < depPaths.size(); ++index) + { + depfileContents.append(" "); + depfileContents.append(escapeDepPath(depPaths[index])); + if (index + 1 < depPaths.size()) + depfileContents.append(" \\\n"); + } + } + depfileContents.append("\n"); + + return writeBinaryFile(m_system.get(), std::filesystem::path(preOpt.depfilePath), depfileContents.data(), depfileContents.size()); + }; + + auto isOutputUpToDate = [&](const IShaderCompiler::CCache::SEntry& entry) -> bool + { + if (outputFilepath.empty()) + return false; + uint64_t outSize = 0; + int64_t outTime = 0; + if (!getFileInfo(m_system.get(), std::filesystem::path(outputFilepath), outSize, outTime)) + return false; + if (entry.uncompressedSize == 0 || outSize != entry.uncompressedSize) + return false; + const auto hashPath = makeOutputHashPath(std::filesystem::path(outputFilepath)); + OutputHashRecord record = {}; + const bool hashOk = readBinaryFile(m_system.get(), hashPath, &record, sizeof(record)); + if (!hashOk || record.size != entry.uncompressedSize || record.hash != entry.uncompressedContentHash) + return false; + uint64_t hashSize = 0; + int64_t hashTime = 0; + if (!getFileInfo(m_system.get(), hashPath, hashSize, hashTime)) + return false; + return outTime <= hashTime; }; if (verbose && (useShaderCache || usePreCache)) { if (useShaderCache) + { + if (shaderProbe.loadDuration.count()) + m_logger->log("Shader cache load took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.loadDuration))); + if (shaderProbe.validateDuration.count()) + m_logger->log("Shader cache validate took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.validateDuration))); m_logger->log("Shader cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.duration))); + } if (usePreCache) m_logger->log("Preprocess cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(preProbe.duration))); m_logger->log("Total cache probe took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(std::chrono::duration_cast(probeEnd - probeStart)))); } smart_refctd_ptr cacheObj = shaderProbe.cacheObj; + if (!cacheObj && dep.enabled && !preprocessOnly) + cacheObj = make_smart_refctd_ptr(); CacheLoadStatus cacheStatus = shaderProbe.status; const bool shaderCacheHitExpected = shaderProbe.hit; @@ -898,7 +1185,6 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Preprocess cache failed (ignored, shader cache hit).", ILogger::ELL_DEBUG); } } - if (usePreCache && preProbe.result.cacheUpdated && preProbe.cacheObj) IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj); @@ -906,14 +1192,50 @@ class ShaderCompiler final : public IApplicationFramework { if (verbose) m_logger->log("Shader cache hit: using cached SPIR-V.", ILogger::ELL_DEBUG); + if (shaderProbe.depsUpdated) + { + const auto cacheWriteStart = clock_t::now(); + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + if (verbose) + { + const auto cacheWriteEnd = clock_t::now(); + m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + } + } + if (isOutputUpToDate(shaderProbe.entry)) + { + const auto hitDepfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) + return r; + const auto hitDepfileEnd = clock_t::now(); + r.ok = true; + if (verbose) + { + m_logger->log("HIT timings: decompress=0 ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(hitDepfileEnd - hitDepfileStart))); + } + return r; + } + const auto hitDecompressStart = clock_t::now(); r.compiled = cacheObj->decompressEntry(shaderProbe.entry); + const auto hitDecompressEnd = clock_t::now(); r.ok = bool(r.compiled); if (!r.ok) return r; r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; - if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies)) + const auto hitDepfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) return r; + const auto hitDepfileEnd = clock_t::now(); + if (verbose) + { + m_logger->log("HIT timings: decompress=%lld ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(hitDecompressEnd - hitDecompressStart)), + static_cast(toMs(hitDepfileEnd - hitDepfileStart))); + } return r; } @@ -946,6 +1268,10 @@ class ShaderCompiler final : public IApplicationFramework opt.writeCache = cacheObj.get(); opt.cacheHit = &cacheHit; } + else if (dep.enabled && cacheObj) + { + opt.writeCache = cacheObj.get(); + } if (preprocessedReady) { @@ -957,7 +1283,9 @@ class ShaderCompiler final : public IApplicationFramework auto compileFinder = makeIncludeFinder(); opt.preprocessorOptions.includeFinder = compileFinder.get(); + const auto compileStart = clock_t::now(); r.compiled = hlslcompiler->compileToSPIRV(codeToCompile, opt); + const auto compileEnd = clock_t::now(); r.ok = bool(r.compiled); if (r.ok) r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; @@ -980,6 +1308,34 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); } + if (dep.enabled && r.ok) + { + const IShaderCompiler::CCache::SEntry::dependency_container_t* deps = nullptr; + IShaderCompiler::CCache::SEntry depEntry; + if (preCacheObj && preCacheObj->hasEntry()) + { + deps = &preCacheObj->getEntry().dependencies; + } + else if (cacheObj) + { + if (cacheObj->findEntryForCode(code, opt, compileFinder.get(), depEntry, validateCacheDeps)) + deps = &depEntry.dependencies; + } + + if (!deps) + { + m_logger->log("Depfile requested but dependencies unavailable.", ILogger::ELL_ERROR); + r.ok = false; + return r; + } + + if (!writeDepfileFromDependencies(*deps, false)) + { + r.ok = false; + return r; + } + } + return r; } From ec0239f48fdaa16225860457dfb6b368e1daa665 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 17 Jan 2026 11:18:23 +0100 Subject: [PATCH 08/14] improve preprocess cache lookup time --- src/nbl/asset/utils/IShaderCompiler.cpp | 31 ++++++++++++------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 5aa16e645d..fae28ceaa4 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -239,12 +239,15 @@ inline void collectFileInfoMismatchesParallel(const DepContainer& deps, std::vec for (size_t i = 0; i < count; ++i) { const auto& dep = deps[i]; - if (!dep.getHasFileInfo()) + const auto& path = dep.getAbsolutePath(); + const bool hasAbsolutePath = !path.empty() && path.is_absolute(); + const bool hasFileInfo = dep.getHasFileInfo() && hasAbsolutePath; + if (!hasFileInfo) { #ifdef NBL_EMBED_BUILTIN_RESOURCES - if (!dep.getAbsolutePath().empty()) + if (!path.empty()) { - if (matchBuiltinResourceHash(dep.getAbsolutePath(), dep.getHash())) + if (matchBuiltinResourceHash(path, dep.getHash())) continue; } else @@ -257,12 +260,6 @@ inline void collectFileInfoMismatchesParallel(const DepContainer& deps, std::vec out.push_back(i); continue; } - const auto& path = dep.getAbsolutePath(); - if (path.empty()) - { - out.push_back(i); - continue; - } if (seenPaths.emplace(path, true).second) fileInfoIndices.push_back(i); } @@ -886,18 +883,19 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ } valid = true; - if (header.hasFileInfo) + if (header.hasFileInfo && dependency.getAbsolutePath().is_absolute()) { dependency.setFileInfo(header.fileSize, header.lastWriteTime, true); updated = true; } } - if (valid && dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + if (valid && dependency.getHasFileInfo() && dependency.getAbsolutePath().is_absolute()) { uint64_t size = 0; int64_t ticks = 0; - if (getFileInfoCached(dependency.getAbsolutePath(), size, ticks, system)) + if (getFileInfoCached(dependency.getAbsolutePath(), size, ticks, system) && + (dependency.getFileSize() != size || dependency.getLastWriteTime() != ticks)) { dependency.setFileInfo(size, ticks, true); updated = true; @@ -1866,7 +1864,7 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde if (hash == dep.getHash()) { valid = true; - if (!dep.getHasFileInfo()) + if (!dep.getHasFileInfo() && dep.getAbsolutePath().is_absolute()) { dep.setFileInfo(file->getSize(), file->getLastWriteTime().time_since_epoch().count(), true); updated = true; @@ -1903,18 +1901,19 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde } valid = true; - if (header.hasFileInfo) + if (header.hasFileInfo && dep.getAbsolutePath().is_absolute()) { dep.setFileInfo(header.fileSize, header.lastWriteTime, true); updated = true; } } - if (valid && dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + if (valid && dep.getHasFileInfo() && dep.getAbsolutePath().is_absolute()) { uint64_t size = 0; int64_t ticks = 0; - if (getFileInfoCached(dep.getAbsolutePath(), size, ticks, system)) + if (getFileInfoCached(dep.getAbsolutePath(), size, ticks, system) && + (dep.getFileSize() != size || dep.getLastWriteTime() != ticks)) { dep.setFileInfo(size, ticks, true); updated = true; From f7194af0f4f4cc53dd2de3d8929363fdcc264a06 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 18 Jan 2026 11:07:17 +0100 Subject: [PATCH 09/14] further improvemnts, saving work --- src/nbl/asset/utils/CHLSLCompiler.cpp | 66 +- src/nbl/asset/utils/CWaveStringResolver.cpp | 26 +- src/nbl/asset/utils/IShaderCompiler.cpp | 54 +- tools/nsc/main.cpp | 1037 ++++++++++++++++++- 4 files changed, 1142 insertions(+), 41 deletions(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 2e5733c9b5..e8d4e231b6 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -22,8 +22,6 @@ #include #include #include -#include -#include using namespace nbl; using namespace nbl::asset; @@ -367,8 +365,21 @@ namespace nbl::wave std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies, std::vector* macro_defs) const { + using clock_t = std::chrono::high_resolution_clock; + const auto preprocessStart = clock_t::now(); + auto forceIncludesStart = preprocessStart; + auto forceIncludesEnd = preprocessStart; + auto pragmaStart = preprocessStart; + auto pragmaEnd = preprocessStart; + auto waveStart = preprocessStart; + auto waveEnd = preprocessStart; + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + { + forceIncludesStart = clock_t::now(); code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); + forceIncludesEnd = clock_t::now(); + } std::vector localDependencies; auto* dependenciesOut = dependencies; @@ -377,18 +388,46 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE // HACK: we do a pre-pre-process here to add \n after every #pragma to neutralize boost::wave's actions // See https://github.com/Devsh-Graphics-Programming/Nabla/issues/746 - size_t line_index = 0; - for (size_t i = 0; i < code.size(); i++) { - if (code[i] == '\n') { - auto line = code.substr(line_index, i - line_index); - boost::trim(line); - if (boost::starts_with(line, "#pragma")) - code.insert(i++, 1, '\n'); - line_index = i; + pragmaStart = clock_t::now(); + size_t extra_newlines = 0; + size_t line_start = 0; + for (size_t i = 0; i < code.size(); ++i) + { + if (code[i] != '\n') + continue; + size_t j = line_start; + while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) + ++j; + if (j + 7 <= i && code.compare(j, 7, "#pragma") == 0) + ++extra_newlines; + line_start = i + 1; + } + if (extra_newlines) + { + std::string patched; + patched.reserve(code.size() + extra_newlines); + line_start = 0; + for (size_t i = 0; i < code.size(); ++i) + { + if (code[i] != '\n') + continue; + size_t j = line_start; + while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) + ++j; + const bool is_pragma = (j + 7 <= i) && (code.compare(j, 7, "#pragma") == 0); + patched.append(code, line_start, i - line_start + 1); + if (is_pragma) + patched.push_back('\n'); + line_start = i + 1; } + if (line_start < code.size()) + patched.append(code, line_start, code.size() - line_start); + code = std::move(patched); } + pragmaEnd = clock_t::now(); // preprocess + waveStart = clock_t::now(); core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependenciesOut), [&dxc_compile_flags_override, &stage, &dependenciesOut, macro_defs](nbl::wave::context& context) -> void { @@ -434,6 +473,7 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE context.dump_macro_definitions(*macro_defs); } ); + waveEnd = clock_t::now(); // for debugging cause MSVC doesn't like to show more than 21k LoC in TextVisualizer if constexpr (false) @@ -451,6 +491,12 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (resolvedString.empty()) return resolvedString; + preprocessOptions.logger.log("Preprocess breakdown: force_includes=%lld ms, pragma_pass=%lld ms, wave_total=%lld ms, total=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(forceIncludesEnd - forceIncludesStart).count()), + static_cast(std::chrono::duration_cast(pragmaEnd - pragmaStart).count()), + static_cast(std::chrono::duration_cast(waveEnd - waveStart).count()), + static_cast(std::chrono::duration_cast(waveEnd - preprocessStart).count())); + return resolvedString; } diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index 456aadb4ac..bfd6674f2a 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -43,11 +43,14 @@ using namespace nbl; using namespace nbl::asset; #include "nbl/asset/utils/waveContext.h" +#include namespace nbl::wave { std::string preprocess(std::string& code, const nbl::asset::IShaderCompiler::SPreprocessorOptions& preprocessOptions, bool withCaching, std::function post) { + using clock_t = std::chrono::high_resolution_clock; + const auto setupStart = clock_t::now(); nbl::wave::context context(code.begin(), code.end(), preprocessOptions.sourceIdentifier.data(), { preprocessOptions }); context.set_caching(withCaching); context.add_macro_definition("__HLSL_VERSION"); @@ -63,12 +66,20 @@ namespace nbl::wave // preprocess core::string resolvedString; + const auto setupEnd = clock_t::now(); + auto lexStart = setupEnd; + auto lexEnd = setupEnd; try { - auto stream = std::stringstream(); - for (auto i= context.begin(); i!= context.end(); i++) - stream << i->get_value(); - resolvedString = stream.str(); + const size_t reserve = code.size() + (code.size() / 2); + resolvedString.reserve(reserve); + lexStart = clock_t::now(); + for (auto i = context.begin(); i != context.end(); ++i) + { + const auto& value = i->get_value(); + resolvedString.append(value.c_str(), value.size()); + } + lexEnd = clock_t::now(); } catch (const boost::wave::cpp_exception& e) { @@ -91,7 +102,14 @@ namespace nbl::wave return {}; } + const auto postStart = clock_t::now(); post(context); + const auto postEnd = clock_t::now(); + + preprocessOptions.logger.log("Wave timings: setup=%lld ms, lex=%lld ms, post=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(setupEnd - setupStart).count()), + static_cast(std::chrono::duration_cast(lexEnd - lexStart).count()), + static_cast(std::chrono::duration_cast(postEnd - postStart).count())); return resolvedString; } diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index fae28ceaa4..4f2ef62ab6 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -46,6 +46,17 @@ struct FileInfoCacheEntry std::unordered_map g_fileInfoCache; std::mutex g_fileInfoCacheMutex; +struct IncludeCacheEntry +{ + uint64_t size = 0; + int64_t ticks = 0; + nbl::core::blake3_hash_t hash = {}; + std::string contents; +}; + +std::unordered_map g_includeCache; +std::mutex g_includeCacheMutex; + #ifdef NBL_EMBED_BUILTIN_RESOURCES inline bool tryGetBuiltinResource(const std::string& normalized, const nbl::system::SBuiltinFile*& outFile, std::string& outRel, std::string_view& outPrefix) { @@ -526,6 +537,27 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s if (std::filesystem::exists(path)) path = std::filesystem::canonical(path); + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + const bool infoOk = getFileInfoFast(path, fileSize, lastWriteTime, m_system.get()); + if (infoOk) + { + std::lock_guard lock(g_includeCacheMutex); + auto it = g_includeCache.find(path); + if (it != g_includeCache.end() && it->second.size == fileSize && it->second.ticks == lastWriteTime) + { + found_t ret = {}; + ret.absolutePath = path; + ret.contents = it->second.contents; + ret.hash = it->second.hash; + ret.hasHash = true; + ret.fileSize = fileSize; + ret.lastWriteTime = lastWriteTime; + ret.hasFileInfo = true; + return ret; + } + } + core::smart_refctd_ptr f; { system::ISystem::future_t> future; @@ -558,11 +590,27 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s } else { - ret.fileSize = size; - const auto fileTime = f->getLastWriteTime(); - ret.lastWriteTime = fileTime.time_since_epoch().count(); + ret.fileSize = infoOk ? fileSize : size; + ret.lastWriteTime = infoOk ? lastWriteTime : f->getLastWriteTime().time_since_epoch().count(); ret.hasFileInfo = true; } + if (!ret.hasHash) + { + std::array hash = {}; + core::XXHash_256(ret.contents.data(), ret.contents.size(), hash.data()); + std::memcpy(ret.hash.data, hash.data(), sizeof(ret.hash.data)); + ret.hasHash = true; + } + if (infoOk) + { + IncludeCacheEntry entry = {}; + entry.size = fileSize; + entry.ticks = lastWriteTime; + entry.hash = ret.hash; + entry.contents = ret.contents; + std::lock_guard lock(g_includeCacheMutex); + g_includeCache[path] = std::move(entry); + } return ret; } diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 75992181e4..6fef48539a 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -20,6 +20,9 @@ #include "nbl/core/hash/blake.h" #include "nbl/core/hash/fnv1a64.h" #include "nlohmann/json.hpp" +#ifdef _WIN32 +#include +#endif using json = nlohmann::json; using namespace nbl; @@ -585,7 +588,516 @@ class ShaderCompiler final : public IApplicationFramework return outputPath; } - static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status, bool skipDependencies) + static std::filesystem::path makeShaderCacheIndexPath(std::filesystem::path cachePath) + { + cachePath += ".idx"; + return cachePath; + } + + static std::filesystem::path makePreprocessCacheIndexPath(std::filesystem::path cachePath) + { + cachePath += ".idx"; + return cachePath; + } + + static void writeShaderCacheIndex(system::ISystem* system, const std::filesystem::path& path, const uint8_t* data, size_t size) + { + if (!system) + return; + if (size < IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES) + return; + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, path, cacheSize, cacheTime)) + return; + + uint64_t shaderBufferSize = 0; + std::memcpy(&shaderBufferSize, data, IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES); + if (size < IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + shaderBufferSize) + return; + + bool hasBinaryDeps = false; + uint64_t jsonSize = 0; + size_t jsonOffset = 0; + size_t shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES; + + const size_t minNewHeader = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (size >= minNewHeader) + { + std::memcpy(&jsonSize, data + IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES, sizeof(jsonSize)); + const size_t candidateJsonOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (candidateJsonOffset + jsonSize <= size) + { + hasBinaryDeps = true; + jsonOffset = candidateJsonOffset; + shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); + } + } + + if (!hasBinaryDeps) + { + jsonOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + jsonSize = size - jsonOffset; + shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES; + } + + std::string_view containerJsonString(reinterpret_cast(data + jsonOffset), jsonSize); + json containerJson = json::parse(containerJsonString); + std::vector entries; + std::vector shaderCreationParams; + containerJson.at("entries").get_to(entries); + containerJson.at("shaderCreationParams").get_to(shaderCreationParams); + if (entries.size() != shaderCreationParams.size()) + return; + + if (hasBinaryDeps) + { + const size_t depsOffset = jsonOffset + jsonSize; + auto read_bytes = [data, size](size_t& offset, void* dst, size_t count) -> bool + { + if (offset + count > size) + return false; + std::memcpy(dst, data + offset, count); + offset += count; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, data, size](size_t& offset, std::string& out) -> bool + { + uint32_t count = 0; + if (!read_u32(offset, count)) + return false; + if (offset + count > size) + return false; + out.assign(reinterpret_cast(data + offset), count); + offset += count; + return true; + }; + + size_t offset = depsOffset; + uint32_t entryCount = 0; + if (!read_u32(offset, entryCount)) + return; + if (entryCount != entries.size()) + return; + + for (uint32_t i = 0; i < entryCount; ++i) + { + uint32_t depCount = 0; + if (!read_u32(offset, depCount)) + return; + auto& deps = entries[i].dependencies; + deps.clear(); + deps.reserve(depCount); + for (uint32_t d = 0; d < depCount; ++d) + { + std::string dir; + std::string identifier; + std::string absolutePath; + uint8_t standardInclude = 0; + core::blake3_hash_t hash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, absolutePath) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, hash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return; + } + + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + } + } + + std::vector out; + auto write_bytes = [&out](const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](uint32_t value) { write_bytes(&value, sizeof(value)); }; + auto write_u64 = [&write_bytes](uint64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_i64 = [&write_bytes](int64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_hash = [&write_bytes](const core::blake3_hash_t& hash) { write_bytes(hash.data, sizeof(hash.data)); }; + auto write_string = [&write_u32, &write_bytes](std::string_view value) + { + write_u32(static_cast(value.size())); + if (!value.empty()) + write_bytes(value.data(), value.size()); + }; + + const uint32_t magic = 0x4E534349u; + const uint32_t version = 1u; + write_u32(magic); + write_u32(version); + write_string(std::string_view(IShaderCompiler::CCache::VERSION)); + write_u64(cacheSize); + write_i64(cacheTime); + write_u32(static_cast(entries.size())); + + for (size_t i = 0; i < entries.size(); ++i) + { + const auto& entry = entries[i]; + const auto& params = shaderCreationParams[i]; + const uint64_t spirvOffset = shaderOffset + params.offset; + const uint64_t spirvSize = params.codeByteSize; + + write_hash(entry.hash); + write_u64(spirvOffset); + write_u64(spirvSize); + write_u64(entry.uncompressedSize); + write_hash(entry.uncompressedContentHash); + write_u32(static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + write_string(dep.getRequestingSourceDir().generic_string()); + write_string(dep.getIdentifier()); + write_string(dep.getAbsolutePath().generic_string()); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(&standardInclude, sizeof(standardInclude)); + write_hash(dep.getHash()); + const uint64_t fileSize = dep.getFileSize(); + write_u64(fileSize); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_i64(lastWriteTime); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(&hasFileInfo, sizeof(hasFileInfo)); + } + } + + const auto indexPath = makeShaderCacheIndexPath(path); + writeBinaryFile(system, indexPath, out.data(), out.size()); + } + + struct PreprocessCacheIndexEntry + { + core::blake3_hash_t prefixHash = {}; + uint32_t pragmaStage = 0; + uint32_t prefixSize = 0; + uint64_t prefixOffset = 0; + std::vector macroDefs; + std::vector dxcFlags; + IShaderCompiler::CCache::SEntry::dependency_container_t dependencies; + }; + + static uint64_t computePreprocessPrefixOffset(const IShaderCompiler::CPreprocessCache::SEntry& entry) + { + uint64_t offset = 0; + auto add_u32 = [&offset]() { offset += sizeof(uint32_t); }; + auto add_string = [&offset](std::string_view value) + { + offset += sizeof(uint32_t); + offset += static_cast(value.size()); + }; + + add_u32(); + add_string(IShaderCompiler::CPreprocessCache::VERSION); + offset += sizeof(entry.prefixHash); + add_u32(); + add_u32(); + + add_u32(); + for (const auto& macro : entry.macroDefs) + add_string(macro); + + add_u32(); + for (const auto& flag : entry.dxcFlags) + add_string(flag); + + add_u32(); + for (const auto& dep : entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + const auto abs = dep.getAbsolutePath().generic_string(); + add_string(dir); + add_string(dep.getIdentifier()); + add_string(abs); + offset += sizeof(uint8_t); + offset += sizeof(core::blake3_hash_t); + offset += sizeof(uint64_t); + offset += sizeof(int64_t); + offset += sizeof(uint8_t); + } + return offset; + } + + static void writePreprocessCacheIndex(system::ISystem* system, const std::filesystem::path& path, const IShaderCompiler::CPreprocessCache& cache) + { + if (!system || !cache.hasEntry()) + return; + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, path, cacheSize, cacheTime)) + return; + + const auto& entry = cache.getEntry(); + const uint32_t prefixSize = static_cast(entry.preprocessedPrefix.size()); + const uint64_t prefixOffset = computePreprocessPrefixOffset(entry); + if (prefixOffset + prefixSize > cacheSize) + return; + + std::vector out; + auto write_bytes = [&out](const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](uint32_t value) { write_bytes(&value, sizeof(value)); }; + auto write_u64 = [&write_bytes](uint64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_i64 = [&write_bytes](int64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_hash = [&write_bytes](const core::blake3_hash_t& hash) { write_bytes(hash.data, sizeof(hash.data)); }; + auto write_string = [&write_u32, &write_bytes](std::string_view value) + { + write_u32(static_cast(value.size())); + if (!value.empty()) + write_bytes(value.data(), value.size()); + }; + + const uint32_t magic = 0x4E504349u; + const uint32_t version = 1u; + write_u32(magic); + write_u32(version); + write_string(IShaderCompiler::CPreprocessCache::VERSION); + write_u64(cacheSize); + write_i64(cacheTime); + write_hash(entry.prefixHash); + write_u32(entry.pragmaStage); + write_u32(prefixSize); + write_u64(prefixOffset); + + write_u32(static_cast(entry.macroDefs.size())); + for (const auto& macro : entry.macroDefs) + write_string(macro); + write_u32(static_cast(entry.dxcFlags.size())); + for (const auto& flag : entry.dxcFlags) + write_string(flag); + + write_u32(static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(dir); + write_string(dep.getIdentifier()); + write_string(abs); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(&standardInclude, sizeof(standardInclude)); + write_hash(dep.getHash()); + const uint64_t fileSize = dep.getFileSize(); + write_u64(fileSize); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_i64(lastWriteTime); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(&hasFileInfo, sizeof(hasFileInfo)); + } + + writeBinaryFile(system, makePreprocessCacheIndexPath(path), out.data(), out.size()); + } + + static bool tryLoadPreprocessCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, const core::blake3_hash_t& prefixHash, PreprocessCacheIndexEntry& outEntry, std::string* reason) + { + if (!system) + { + if (reason) + *reason = "no system"; + return false; + } + + const auto indexPath = makePreprocessCacheIndexPath(cachePath); + if (!system->exists(indexPath, IFileBase::ECF_READ)) + { + if (reason) + *reason = "index missing"; + return false; + } + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, cachePath, cacheSize, cacheTime)) + { + if (reason) + *reason = "cache info"; + return false; + } + + ISystem::future_t> future; + system->createFile(future, indexPath, IFileBase::ECF_READ); + if (!future.wait()) + { + if (reason) + *reason = "index open"; + return false; + } + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() == 0) + { + if (reason) + *reason = "index empty"; + return false; + } + + std::vector data(file->getSize()); + IFile::success_t succ; + file->read(succ, data.data(), 0, data.size()); + if (!succ || succ.getBytesProcessed(true) != data.size()) + { + if (reason) + *reason = "index read"; + return false; + } + + auto read_bytes = [&data](size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, &data](size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = 0; + uint32_t magic = 0; + uint32_t version = 0; + if (!read_u32(offset, magic) || !read_u32(offset, version)) + return false; + if (magic != 0x4E504349u || version != 1u) + { + if (reason) + *reason = "index header"; + return false; + } + + std::string cacheVersion; + if (!read_string(offset, cacheVersion)) + return false; + if (cacheVersion != IShaderCompiler::CPreprocessCache::VERSION) + { + if (reason) + *reason = "cache version"; + return false; + } + + uint64_t indexCacheSize = 0; + int64_t indexCacheTime = 0; + if (!read_u64(offset, indexCacheSize) || !read_i64(offset, indexCacheTime)) + return false; + if (indexCacheSize != cacheSize || indexCacheTime != cacheTime) + { + if (reason) + *reason = "cache mismatch size=" + std::to_string(indexCacheSize) + "/" + std::to_string(cacheSize) + + " time=" + std::to_string(indexCacheTime) + "/" + std::to_string(cacheTime); + return false; + } + + core::blake3_hash_t storedPrefixHash = {}; + if (!read_hash(offset, storedPrefixHash)) + return false; + if (storedPrefixHash != prefixHash) + { + if (reason) + *reason = "prefix mismatch"; + return false; + } + + uint32_t pragmaStage = 0; + uint32_t prefixSize = 0; + uint64_t prefixOffset = 0; + if (!read_u32(offset, pragmaStage) || !read_u32(offset, prefixSize) || !read_u64(offset, prefixOffset)) + return false; + if (prefixOffset + prefixSize > cacheSize) + { + if (reason) + *reason = "prefix range"; + return false; + } + + uint32_t macroCount = 0; + if (!read_u32(offset, macroCount)) + return false; + std::vector macroDefs; + macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(offset, macro)) + return false; + macroDefs.emplace_back(std::move(macro)); + } + + uint32_t flagCount = 0; + if (!read_u32(offset, flagCount)) + return false; + std::vector dxcFlags; + dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(offset, flag)) + return false; + dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(offset, depCount)) + return false; + IShaderCompiler::CCache::SEntry::dependency_container_t deps; + deps.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + std::string abs; + uint8_t standardInclude = 0; + core::blake3_hash_t depHash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, abs) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, depHash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return false; + } + + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); + } + + outEntry.prefixHash = storedPrefixHash; + outEntry.pragmaStage = pragmaStage; + outEntry.prefixSize = prefixSize; + outEntry.prefixOffset = prefixOffset; + outEntry.macroDefs = std::move(macroDefs); + outEntry.dxcFlags = std::move(dxcFlags); + outEntry.dependencies = std::move(deps); + return true; + } + + static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status, bool skipDependencies, bool refreshIndex) { status = CacheLoadStatus::Missing; if (!system) @@ -645,6 +1157,10 @@ class ShaderCompiler final : public IApplicationFramework serialized = std::span(data.data(), data.size()); } + const auto indexPath = makeShaderCacheIndexPath(path); + if (refreshIndex || !system->exists(indexPath, IFileBase::ECF_READ)) + writeShaderCacheIndex(system, path, serialized.data(), serialized.size()); + auto cache = IShaderCompiler::CCache::deserialize(serialized, skipDependencies); if (!cache) { @@ -658,7 +1174,44 @@ class ShaderCompiler final : public IApplicationFramework static bool getFileInfo(system::ISystem* system, const std::filesystem::path& path, uint64_t& sizeOut, int64_t& timeOut) { - if (!system || !system->exists(path, IFileBase::ECF_READ)) + if (path.empty()) + return false; + (void)system; +#ifdef _WIN32 + WIN32_FILE_ATTRIBUTE_DATA data = {}; + if (GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &data)) + { + ULARGE_INTEGER size = {}; + size.HighPart = data.nFileSizeHigh; + size.LowPart = data.nFileSizeLow; + ULARGE_INTEGER time = {}; + time.HighPart = data.ftLastWriteTime.dwHighDateTime; + time.LowPart = data.ftLastWriteTime.dwLowDateTime; + sizeOut = size.QuadPart; + timeOut = static_cast(time.QuadPart); + return sizeOut != 0; + } +#endif + std::error_code ec; + std::filesystem::directory_entry entry(path, ec); + if (ec) + return false; + const auto size = entry.file_size(ec); + if (ec) + return false; + const auto time = entry.last_write_time(ec); + if (ec) + return false; + sizeOut = size; + timeOut = time.time_since_epoch().count(); + return sizeOut != 0; + } + + static bool readBinaryFile(system::ISystem* system, const std::filesystem::path& path, void* data, size_t size) + { + if (!system) + return false; + if (!system->exists(path, IFileBase::ECF_READ)) return false; ISystem::future_t> future; @@ -669,15 +1222,15 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr file; if (auto lock = future.acquire(); lock) lock.move_into(file); - if (!file) + if (!file || file->getSize() != size) return false; - sizeOut = file->getSize(); - timeOut = file->getLastWriteTime().time_since_epoch().count(); - return sizeOut != 0; + IFile::success_t succ; + file->read(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; } - static bool readBinaryFile(system::ISystem* system, const std::filesystem::path& path, void* data, size_t size) + static bool readBinaryFileRange(system::ISystem* system, const std::filesystem::path& path, size_t offset, size_t size, std::vector& out) { if (!system) return false; @@ -692,11 +1245,12 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr file; if (auto lock = future.acquire(); lock) lock.move_into(file); - if (!file || file->getSize() != size) + if (!file || file->getSize() < offset + size) return false; + out.resize(size); IFile::success_t succ; - file->read(succ, data, 0, size); + file->read(succ, out.data(), offset, size); return succ.getBytesProcessed(true) == size; } @@ -780,7 +1334,301 @@ class ShaderCompiler final : public IApplicationFramework auto buffer = cache.serialize(); if (!buffer) return false; - return writeBinaryFile(system, path, buffer->getPointer(), buffer->getSize()); + if (!writeBinaryFile(system, path, buffer->getPointer(), buffer->getSize())) + return false; + + const auto* data = static_cast(buffer->getPointer()); + writeShaderCacheIndex(system, path, data, buffer->getSize()); + return true; + } + + static bool computeShaderCacheHash(std::string_view code, const IShaderCompiler::SCompilerOptions& options, core::blake3_hash_t& out) + { + if (options.spirvOptimizer) + return false; + + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + size_t reserveSize = 0; + reserveSize += options.preprocessorOptions.sourceIdentifier.size(); + for (const auto& def : options.preprocessorOptions.extraDefines) + reserveSize += def.identifier.size() + def.definition.size(); + for (const auto& inc : options.preprocessorOptions.forceIncludes) + reserveSize += inc.size(); + reserveSize += sizeof(options.stage) + sizeof(options.preprocessorOptions.targetSpirvVersion) + sizeof(options.debugInfoFlags.value); + reserveSize += cacheCode.size(); + + std::vector defines; + defines.reserve(options.preprocessorOptions.extraDefines.size()); + for (const auto& def : options.preprocessorOptions.extraDefines) + defines.emplace_back(def); + std::sort(defines.begin(), defines.end(), [](const IShaderCompiler::SMacroDefinition& lhs, const IShaderCompiler::SMacroDefinition& rhs) + { + return lhs.identifier < rhs.identifier; + }); + + std::vector hashable; + hashable.reserve(reserveSize); + + hashable.insert(hashable.end(), options.preprocessorOptions.sourceIdentifier.begin(), options.preprocessorOptions.sourceIdentifier.end()); + for (const auto& def : defines) + { + hashable.insert(hashable.end(), def.identifier.begin(), def.identifier.end()); + hashable.insert(hashable.end(), def.definition.begin(), def.definition.end()); + } + for (const auto& inc : options.preprocessorOptions.forceIncludes) + hashable.insert(hashable.end(), inc.begin(), inc.end()); + + const auto stage = options.stage; + const auto spirvVersion = options.preprocessorOptions.targetSpirvVersion; + const auto debugFlags = options.debugInfoFlags.value; + hashable.insert(hashable.end(), reinterpret_cast(&stage), reinterpret_cast(&stage) + sizeof(stage)); + hashable.insert(hashable.end(), reinterpret_cast(&spirvVersion), reinterpret_cast(&spirvVersion) + sizeof(spirvVersion)); + hashable.insert(hashable.end(), reinterpret_cast(&debugFlags), reinterpret_cast(&debugFlags) + sizeof(debugFlags)); + + hashable.insert(hashable.end(), cacheCode.begin(), cacheCode.end()); + + core::blake3_hasher hasher; + hasher.update(hashable.data(), hashable.size()); + out = static_cast(hasher); + return true; + } + + static bool fillCompilerArgsFromOptions(const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry::SCompilerArgs& out) + { + if (options.spirvOptimizer) + return false; + + json pre; + pre["sourceIdentifier"] = std::string(options.preprocessorOptions.sourceIdentifier); + json extraDefines = json::array(); + std::vector defines; + defines.reserve(options.preprocessorOptions.extraDefines.size()); + for (const auto& def : options.preprocessorOptions.extraDefines) + defines.emplace_back(def); + std::sort(defines.begin(), defines.end(), [](const IShaderCompiler::SMacroDefinition& lhs, const IShaderCompiler::SMacroDefinition& rhs) + { + return lhs.identifier < rhs.identifier; + }); + for (const auto& def : defines) + { + extraDefines.push_back({ + { "identifier", std::string(def.identifier) }, + { "definition", std::string(def.definition) } + }); + } + pre["extraDefines"] = std::move(extraDefines); + json forceIncludes = json::array(); + for (const auto& inc : options.preprocessorOptions.forceIncludes) + forceIncludes.push_back(inc); + pre["forceIncludes"] = std::move(forceIncludes); + + json j; + j["shaderStage"] = static_cast(options.stage); + j["spirvVersion"] = static_cast(options.preprocessorOptions.targetSpirvVersion); + j["optimizerPasses"] = json::array(); + j["debugFlags"] = static_cast(options.debugInfoFlags.value); + j["preprocessorArgs"] = std::move(pre); + from_json(j, out); + return true; + } + + static bool tryLoadShaderCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, std::string_view code, const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry& outEntry, std::string* reason) + { + if (!system) + { + if (reason) + *reason = "no system"; + return false; + } + + const auto indexPath = makeShaderCacheIndexPath(cachePath); + if (!system->exists(indexPath, IFileBase::ECF_READ)) + { + if (reason) + *reason = "index missing"; + return false; + } + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, cachePath, cacheSize, cacheTime)) + { + if (reason) + *reason = "cache info"; + return false; + } + + ISystem::future_t> future; + system->createFile(future, indexPath, IFileBase::ECF_READ); + if (!future.wait()) + { + if (reason) + *reason = "index open"; + return false; + } + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() == 0) + { + if (reason) + *reason = "index empty"; + return false; + } + + std::vector data(file->getSize()); + IFile::success_t succ; + file->read(succ, data.data(), 0, data.size()); + if (!succ || succ.getBytesProcessed(true) != data.size()) + { + if (reason) + *reason = "index read"; + return false; + } + + auto read_bytes = [&data](size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, &data](size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = 0; + uint32_t magic = 0; + uint32_t version = 0; + if (!read_u32(offset, magic) || !read_u32(offset, version)) + return false; + if (magic != 0x4E534349u || version != 1u) + { + if (reason) + *reason = "index header"; + return false; + } + + std::string cacheVersion; + if (!read_string(offset, cacheVersion)) + return false; + if (cacheVersion != IShaderCompiler::CCache::VERSION) + { + if (reason) + *reason = "cache version"; + return false; + } + + uint64_t indexCacheSize = 0; + int64_t indexCacheTime = 0; + if (!read_u64(offset, indexCacheSize) || !read_i64(offset, indexCacheTime)) + return false; + if (indexCacheSize != cacheSize || indexCacheTime != cacheTime) + { + if (reason) + *reason = "cache mismatch size=" + std::to_string(indexCacheSize) + "/" + std::to_string(cacheSize) + + " time=" + std::to_string(indexCacheTime) + "/" + std::to_string(cacheTime); + return false; + } + + uint32_t entryCount = 0; + if (!read_u32(offset, entryCount)) + return false; + + core::blake3_hash_t targetHash = {}; + if (!computeShaderCacheHash(code, options, targetHash)) + { + if (reason) + *reason = "hash compute"; + return false; + } + + for (uint32_t i = 0; i < entryCount; ++i) + { + core::blake3_hash_t hash = {}; + if (!read_hash(offset, hash)) + return false; + + uint64_t spirvOffset = 0; + uint64_t spirvSize = 0; + uint64_t uncompressedSize = 0; + core::blake3_hash_t uncompressedHash = {}; + uint32_t depCount = 0; + if (!read_u64(offset, spirvOffset) || !read_u64(offset, spirvSize) || !read_u64(offset, uncompressedSize) || !read_hash(offset, uncompressedHash) || !read_u32(offset, depCount)) + return false; + + const bool match = (hash == targetHash); + std::vector deps; + if (match) + deps.reserve(depCount); + + for (uint32_t d = 0; d < depCount; ++d) + { + std::string dir; + std::string identifier; + std::string abs; + uint8_t standardInclude = 0; + core::blake3_hash_t depHash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, abs) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, depHash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return false; + } + + if (match) + { + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); + } + } + + if (!match) + continue; + + std::vector compressed; + if (!readBinaryFileRange(system, cachePath, static_cast(spirvOffset), static_cast(spirvSize), compressed)) + { + if (reason) + *reason = "cache read"; + return false; + } + + auto memoryResource = core::make_smart_refctd_ptr>(std::move(compressed)); + auto spirv = ICPUBuffer::create({ { spirvSize }, memoryResource->getBacker().data(), std::move(memoryResource) }, core::adopt_memory); + + outEntry = {}; + outEntry.mainFileContents.assign(code.begin(), code.end()); + if (!fillCompilerArgsFromOptions(options, outEntry.compilerArgs)) + return false; + outEntry.hash = targetHash; + outEntry.lookupHash = std::hash{}(outEntry.hash); + outEntry.dependencies = std::move(deps); + outEntry.uncompressedSize = uncompressedSize; + outEntry.uncompressedContentHash = uncompressedHash; + outEntry.spirv = std::move(spirv); + return true; + } + + if (reason) + *reason = "entry not found"; + return false; } @@ -909,6 +1757,7 @@ class ShaderCompiler final : public IApplicationFramework const bool useShaderCache = shaderCache.enabled && !preprocessOnly; const bool usePreCache = preCache.enabled && !preprocessOnly; const bool validateCacheDeps = true; + bool usedIndex = false; struct ShaderCacheProbeResult { @@ -943,8 +1792,32 @@ class ShaderCompiler final : public IApplicationFramework { const auto start = clock_t::now(); const auto loadStart = clock_t::now(); - shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false); + bool indexExists = false; + if (m_system) + { + const auto indexPath = makeShaderCacheIndexPath(shaderCache.path); + indexExists = m_system->exists(indexPath, IFileBase::ECF_READ); + } + IShaderCompiler::CCache::SEntry indexedEntry; + std::string indexReason; + if (tryLoadShaderCacheIndex(m_system.get(), shaderCache.path, code, opt, indexedEntry, &indexReason)) + { + shaderProbe.cacheObj = make_smart_refctd_ptr(); + shaderProbe.cacheObj->insert(std::move(indexedEntry)); + shaderProbe.status = CacheLoadStatus::Loaded; + usedIndex = true; + if (verbose && m_logger) + m_logger->log("Shader cache index hit.", ILogger::ELL_DEBUG); + } + else + { + if (indexExists && verbose && m_logger) + m_logger->log("Shader cache index miss (%s).", ILogger::ELL_DEBUG, indexReason.empty() ? "unknown" : indexReason.c_str()); + const bool refreshIndex = indexExists && indexReason.rfind("cache mismatch", 0) == 0; + shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false, refreshIndex); + } const auto loadEnd = clock_t::now(); + shaderProbe.loadDuration = loadEnd - loadStart; if (!shaderProbe.cacheObj) shaderProbe.cacheObj = make_smart_refctd_ptr(); if (shaderProbe.status == CacheLoadStatus::Loaded) @@ -955,8 +1828,27 @@ class ShaderCompiler final : public IApplicationFramework const auto validateEnd = clock_t::now(); shaderProbe.entryReady = shaderProbe.hit; shaderProbe.validateDuration = validateEnd - validateStart; + if (!shaderProbe.hit && usedIndex) + { + if (verbose && m_logger) + m_logger->log("Shader cache index entry rejected, loading full cache.", ILogger::ELL_DEBUG); + const auto reloadStart = clock_t::now(); + shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false, false); + const auto reloadEnd = clock_t::now(); + shaderProbe.loadDuration += reloadEnd - reloadStart; + if (!shaderProbe.cacheObj) + shaderProbe.cacheObj = make_smart_refctd_ptr(); + if (shaderProbe.status == CacheLoadStatus::Loaded) + { + auto reloadFinder = makeIncludeFinder(); + const auto validateStart2 = clock_t::now(); + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, reloadFinder.get(), shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); + const auto validateEnd2 = clock_t::now(); + shaderProbe.entryReady = shaderProbe.hit; + shaderProbe.validateDuration += (validateEnd2 - validateStart2); + } + } } - shaderProbe.loadDuration = loadEnd - loadStart; shaderProbe.duration = clock_t::now() - start; } @@ -972,17 +1864,111 @@ class ShaderCompiler final : public IApplicationFramework { const auto start = clock_t::now(); auto finder = makeIncludeFinder(); - preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, false); - if (!preProbe.cacheObj) - preProbe.cacheObj = make_smart_refctd_ptr(); - - auto localCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SPreprocessorOptions preOptThread = preOpt; - preOptThread.includeFinder = finder.get(); - IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); - preProbe.result = localCompiler->preprocessWithCache(code, stageOverrideThread, preOptThread, *preProbe.cacheObj, preProbe.loadStatus, sourceIdentifier); - preProbe.ok = preProbe.result.ok; - preProbe.duration = clock_t::now() - start; + bool preIndexExists = false; + if (m_system) + preIndexExists = m_system->exists(makePreprocessCacheIndexPath(preCache.path), IFileBase::ECF_READ); + + const auto codeProbe = IShaderCompiler::CPreprocessCache::probe(code, nullptr, IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded, preOpt); + PreprocessCacheIndexEntry preIndexEntry; + std::string preIndexReason; + bool preIndexHit = false; + bool preIndexNeedsRefresh = !preIndexExists; + bool preIndexSkipLoad = !codeProbe.hasPrefix; + + if (codeProbe.hasPrefix && tryLoadPreprocessCacheIndex(m_system.get(), preCache.path, codeProbe.prefixHash, preIndexEntry, &preIndexReason)) + { + preIndexHit = true; + if (verbose && m_logger) + m_logger->log("Preprocess cache index hit.", ILogger::ELL_DEBUG); + } + else + { + if (preIndexExists && verbose && m_logger) + m_logger->log("Preprocess cache index miss (%s).", ILogger::ELL_DEBUG, preIndexReason.empty() ? "unknown" : preIndexReason.c_str()); + if (preIndexReason.rfind("cache mismatch", 0) == 0) + preIndexNeedsRefresh = true; + if (preIndexReason == "prefix mismatch" || preIndexReason == "cache version" || preIndexReason == "index header" || preIndexReason == "index empty") + preIndexSkipLoad = true; + } + + if (preIndexHit) + { + std::vector prefixBytes; + if (preIndexEntry.prefixSize) + { + if (!readBinaryFileRange(m_system.get(), preCache.path, static_cast(preIndexEntry.prefixOffset), static_cast(preIndexEntry.prefixSize), prefixBytes)) + preIndexHit = false; + } + if (preIndexHit) + { + auto cacheObj = make_smart_refctd_ptr(); + IShaderCompiler::CPreprocessCache::SEntry entry; + entry.prefixHash = preIndexEntry.prefixHash; + entry.pragmaStage = preIndexEntry.pragmaStage; + entry.macroDefs = std::move(preIndexEntry.macroDefs); + entry.dxcFlags = std::move(preIndexEntry.dxcFlags); + entry.dependencies = std::move(preIndexEntry.dependencies); + if (!prefixBytes.empty()) + entry.preprocessedPrefix.assign(reinterpret_cast(prefixBytes.data()), prefixBytes.size()); + cacheObj->setEntry(std::move(entry)); + + bool depsUpdated = false; + const bool depsValid = cacheObj->validateDependencies(finder.get(), &depsUpdated); + if (depsValid) + { + IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); + if (preIndexEntry.pragmaStage != static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN)) + stageOverrideThread = static_cast(preIndexEntry.pragmaStage); + + preProbe.cacheObj = cacheObj; + preProbe.loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded; + preProbe.result.cacheUsed = true; + preProbe.result.cacheHit = true; + preProbe.result.cacheUpdated = depsUpdated; + preProbe.result.status = IShaderCompiler::CPreprocessCache::EProbeStatus::Hit; + preProbe.result.stage = stageOverrideThread; + preProbe.result.code = cacheObj->buildCombinedCode(codeProbe.body, sourceIdentifier); + preProbe.ok = !preProbe.result.code.empty(); + if (preProbe.ok) + preProbe.duration = clock_t::now() - start; + else + preIndexHit = false; + } + else + { + preIndexHit = false; + } + } + } + + if (!preIndexHit) + { + if (preIndexSkipLoad) + { + preProbe.cacheObj = make_smart_refctd_ptr(); + preProbe.loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; + } + else + { + preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, false); + if (!preProbe.cacheObj) + preProbe.cacheObj = make_smart_refctd_ptr(); + } + + auto localCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SPreprocessorOptions preOptThread = preOpt; + preOptThread.includeFinder = finder.get(); + IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); + preProbe.result = localCompiler->preprocessWithCache(code, stageOverrideThread, preOptThread, *preProbe.cacheObj, preProbe.loadStatus, sourceIdentifier); + preProbe.ok = preProbe.result.ok; + preProbe.duration = clock_t::now() - start; + + if (preIndexNeedsRefresh && preProbe.loadStatus == IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded && preProbe.cacheObj && preProbe.cacheObj->hasEntry()) + { + preProbe.cacheObj->buildCombinedCode(std::string_view{}, std::string_view{}); + writePreprocessCacheIndex(m_system.get(), preCache.path, *preProbe.cacheObj); + } + } } } @@ -1186,7 +2172,10 @@ class ShaderCompiler final : public IApplicationFramework } } if (usePreCache && preProbe.result.cacheUpdated && preProbe.cacheObj) - IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj); + { + if (IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj)) + writePreprocessCacheIndex(m_system.get(), preCache.path, *preProbe.cacheObj); + } if (useShaderCache && shaderProbe.hit && shaderProbe.entryReady) { From 52428c5989d243deb2c8744479f81bc62f336032 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 20 Jan 2026 12:01:04 +0100 Subject: [PATCH 10/14] 3 layer cache system, save work with tested stuff --- cmake/common.cmake | 12 +- docs/nsc-prebuilds.md | 20 +- include/nbl/asset/utils/CHLSLCompiler.h | 2 + include/nbl/asset/utils/IShaderCompiler.h | 1 + src/nbl/CMakeLists.txt | 23 +- src/nbl/asset/utils/CHLSLCompiler.cpp | 39 +- src/nbl/asset/utils/CWaveStringResolver.cpp | 24 +- src/nbl/asset/utils/IShaderCompiler.cpp | 28 ++ src/nbl/asset/utils/waveContext.h | 1 + tools/nsc/main.cpp | 525 +++++++++++++++++--- 10 files changed, 595 insertions(+), 80 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index a95590ce1f..e173e49f0e 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1147,6 +1147,8 @@ option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) option(NBL_NSC_VERBOSE "Enable NSC verbose logging to .log" ON) option(NSC_SHADER_CACHE "Enable NSC shader cache" ON) option(NSC_PREPROCESS_CACHE "Enable NSC preprocess cache" ON) +option(NSC_PREPROCESS_PREAMBLE "Enable NSC preprocess preamble" ON) +option(NSC_STDOUT_LOG "Mirror NSC log to stdout" OFF) set(NSC_CACHE_DIR "" CACHE PATH "Optional root directory for NSC cache files (shader/preprocess)") function(NBL_CREATE_NSC_COMPILE_RULES) @@ -2169,16 +2171,22 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE set(NBL_NSC_CACHE_ARGS "") if(NSC_SHADER_CACHE) - list(APPEND NBL_NSC_CACHE_ARGS -shader-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache) if(NSC_CACHE_DIR) list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") endif() endif() if(NSC_PREPROCESS_CACHE) - list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-cache) if(NSC_CACHE_DIR) list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache-file "${NBL_NSC_PREPROCESS_CACHE_PATH}") endif() + if(NSC_PREPROCESS_PREAMBLE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-preamble) + endif() + endif() + if(NSC_STDOUT_LOG) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-stdout-log) endif() set(NBL_NSC_COMPILE_COMMAND diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 97d915a1fe..d124f3679f 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -109,11 +109,12 @@ By default `NBL_CREATE_NSC_COMPILE_RULES` also collects `*.hlsl` files for IDE v ## Cache layers (SPIR-V + preprocess) -There are two independent caches: +There are three independent cache layers: - `NSC_SHADER_CACHE` (default `ON`) -> SPIR-V cache (`.spv.ppcache`) for full compilation results. - `NSC_PREPROCESS_CACHE` (default `ON`) -> preprocessor prefix cache (`.spv.ppcache.pre`) to avoid repeating Boost.Wave include work when only the main shader changes. -- Both caches are used only for compilation (not `-P` preprocess-only runs). +- `NSC_PREPROCESS_PREAMBLE` (default `ON`) -> preamble mode: reuse cached preprocessed prefix + macro state and run Wave only on the body, then compile without re-lexing the prefix. +- All layers are used only for compilation (not `-P` preprocess-only runs). - When preprocess cache is enabled and used, NSC also writes a combined preprocessed view (`.spv.pre.hlsl`) next to the outputs. - This file is the exact input fed to DXC on the preprocess-cache path, so it's ready to paste into Godbolt for repros (use the same flags/includes). @@ -128,13 +129,26 @@ With `-verbose`, `.log` shows: - `Shader cache write took: ...` (only when deps metadata changed on hit) - `Preprocess cache lookup took: ...` - `Total cache probe took: ...` + - `Preamble body preprocess took: ...` (only when preamble mode is used) - `Preprocess took: ...` (only on compile path) - `Compile took: ...` (only on compile path) - `Total build time: ...` (preprocess + compile) - `Write output took: ...` (only when output file is written) - `Total took: ...` (overall tool runtime) -You can redirect both caches into a shared directory with: +You can also toggle layers directly on the `nsc` CLI: + +- `-nbl-shader-cache` +- `-nbl-preprocess-cache` +- `-nbl-preprocess-preamble` +- `-nbl-stdout-log` (mirror the log file output to stdout) + +Related CMake options: + +- `NSC_PREPROCESS_PREAMBLE` (default `ON`) +- `NSC_STDOUT_LOG` (default `OFF`) + +You can redirect the caches into a shared directory with: - `NSC_CACHE_DIR` (path). The cache files keep the same relative layout as `BINARY_DIR` (including `/`), but live under the given root. This is handy for CI or persistent cache volumes. diff --git a/include/nbl/asset/utils/CHLSLCompiler.h b/include/nbl/asset/utils/CHLSLCompiler.h index b093ff98ed..87c496a1b2 100644 --- a/include/nbl/asset/utils/CHLSLCompiler.h +++ b/include/nbl/asset/utils/CHLSLCompiler.h @@ -33,6 +33,8 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler struct SOptions : IShaderCompiler::SCompilerOptions { std::span dxcOptions; // TODO: span is a VIEW to memory, so to something which we should treat immutable - why not span of string_view then? Since its span we force users to keep those std::strings alive anyway but now we cannnot even make nice constexpr & pass such expression here directly + std::span dxcCompileFlagsOverride = {}; + bool assumePreprocessed = false; IShader::E_CONTENT_TYPE getCodeContentType() const override { return IShader::E_CONTENT_TYPE::ECT_HLSL; }; }; diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index c8dd6accb5..3df25a6f68 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -508,6 +508,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted core::blake3_hash_t prefixHash = {}; std::string preprocessedPrefix; std::vector macroDefs; + std::string macroBlock; std::vector dxcFlags; uint32_t pragmaStage = static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN); CCache::SEntry::dependency_container_t dependencies; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 512633536f..18561015c2 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -395,12 +395,21 @@ nbl_adjust_definitions() option(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION "Enable to optimise CWaveStringResolver.cpp in Debug configuration, uses RWDI compile options for the TU" ON) if(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION) - set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES - # just enabling inlining and optimisations will help a lot - COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS}>" - # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours - SKIP_PRECOMPILE_HEADERS ON - ) + if(MSVC) + set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES + # just enabling inlining and optimisations will help a lot + COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS};/Zi>" + # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours + SKIP_PRECOMPILE_HEADERS ON + ) + else() + set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES + # just enabling inlining and optimisations will help a lot + COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS}>" + # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours + SKIP_PRECOMPILE_HEADERS ON + ) + endif() endif() if(NBL_EXPLICIT_MODULE_LOAD_LOG) @@ -868,4 +877,4 @@ source_group(TREE "${NBL_ROOT_PATH}" source_group(TREE "${NBL_ROOT_PATH}" PREFIX "Source Files" FILES ${NABLA_SOURCE_FILES} -) \ No newline at end of file +) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index e8d4e231b6..2bb4d2bea9 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -27,6 +27,32 @@ using namespace nbl; using namespace nbl::asset; using Microsoft::WRL::ComPtr; +static std::string buildMacroBlock(const std::vector& macros) +{ + if (macros.empty()) + return {}; + size_t reserve = 0; + for (const auto& macro : macros) + reserve += macro.size() + 12; + std::string out; + out.reserve(reserve); + for (const auto& macro : macros) + { + const size_t eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) + { + out.push_back(' '); + out.append(def); + } + out.push_back('\n'); + } + return out; +} + static constexpr const wchar_t* SHADER_MODEL_PROFILE = L"XX_6_8"; static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) { switch (stage) @@ -519,6 +545,7 @@ bool CHLSLCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_S outEntry.dependencies = std::move(deps); outEntry.dxcFlags = std::move(dxcFlags); outEntry.macroDefs = std::move(macroDefs); + outEntry.macroBlock = buildMacroBlock(outEntry.macroDefs); outEntry.pragmaStage = static_cast(stage); return true; } @@ -537,7 +564,17 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st using clock_t = std::chrono::high_resolution_clock; const auto preprocessStart = clock_t::now(); - auto newCode = preprocessShader(std::string(code), stage, hlslOptions.preprocessorOptions, dxc_compile_flags, dependencies); + std::string newCode; + if (hlslOptions.assumePreprocessed) + { + newCode = std::string(code); + if (!hlslOptions.dxcCompileFlagsOverride.empty()) + dxc_compile_flags.assign(hlslOptions.dxcCompileFlagsOverride.begin(), hlslOptions.dxcCompileFlagsOverride.end()); + } + else + { + newCode = preprocessShader(std::string(code), stage, hlslOptions.preprocessorOptions, dxc_compile_flags, dependencies); + } const auto preprocessEnd = clock_t::now(); logger.log("Preprocess took: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(preprocessEnd - preprocessStart).count())); if (newCode.empty()) return nullptr; diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index bfd6674f2a..95a7e5bbd7 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -52,21 +52,29 @@ namespace nbl::wave using clock_t = std::chrono::high_resolution_clock; const auto setupStart = clock_t::now(); nbl::wave::context context(code.begin(), code.end(), preprocessOptions.sourceIdentifier.data(), { preprocessOptions }); + const auto contextEnd = clock_t::now(); + context.set_caching(withCaching); context.add_macro_definition("__HLSL_VERSION"); context.add_macro_definition("__SPIRV_MAJOR_VERSION__=" + std::to_string(IShaderCompiler::getSpirvMajor(preprocessOptions.targetSpirvVersion))); context.add_macro_definition("__SPIRV_MINOR_VERSION__=" + std::to_string(IShaderCompiler::getSpirvMinor(preprocessOptions.targetSpirvVersion))); + const auto builtinsEnd = clock_t::now(); - // instead of defining extraDefines as "NBL_GLSL_LIMIT_MAX_IMAGE_DIMENSION_1D 32768", - // now define them as "NBL_GLSL_LIMIT_MAX_IMAGE_DIMENSION_1D=32768" - // to match boost wave syntax - // https://www.boost.org/doc/libs/1_82_0/libs/wave/doc/class_reference_context.html#:~:text=Maintain%20defined%20macros-,add_macro_definition,-bool%20add_macro_definition + const auto extraStart = builtinsEnd; for (const auto& define : preprocessOptions.extraDefines) - context.add_macro_definition(define.identifier.data() + core::string("=") + define.definition.data()); + { + core::string macro; + macro.reserve(define.identifier.size() + define.definition.size() + 1); + macro.append(define.identifier.data(), define.identifier.size()); + macro.push_back('='); + macro.append(define.definition.data(), define.definition.size()); + context.add_macro_definition(macro); + } + const auto extraEnd = clock_t::now(); // preprocess core::string resolvedString; - const auto setupEnd = clock_t::now(); + const auto setupEnd = extraEnd; auto lexStart = setupEnd; auto lexEnd = setupEnd; try @@ -106,6 +114,10 @@ namespace nbl::wave post(context); const auto postEnd = clock_t::now(); + preprocessOptions.logger.log("Wave setup breakdown: context=%lld ms, builtins=%lld ms, extra_defines=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(contextEnd - setupStart).count()), + static_cast(std::chrono::duration_cast(builtinsEnd - contextEnd).count()), + static_cast(std::chrono::duration_cast(extraEnd - builtinsEnd).count())); preprocessOptions.logger.log("Wave timings: setup=%lld ms, lex=%lld ms, post=%lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(setupEnd - setupStart).count()), static_cast(std::chrono::duration_cast(lexEnd - lexStart).count()), diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 4f2ef62ab6..195cf35d3d 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -339,6 +339,32 @@ using namespace nbl::asset; namespace { + std::string buildMacroBlock(const std::vector& macros) + { + if (macros.empty()) + return {}; + size_t reserve = 0; + for (const auto& macro : macros) + reserve += macro.size() + 12; + std::string out; + out.reserve(reserve); + for (const auto& macro : macros) + { + const size_t eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) + { + out.push_back(' '); + out.append(def); + } + out.push_back('\n'); + } + return out; + } + void splitPrefix(std::string_view code, std::string_view& prefix, std::string_view& body) { size_t pos = 0; @@ -1544,6 +1570,7 @@ core::smart_refctd_ptr IShaderCompiler::CPrep return nullptr; entry.macroDefs.emplace_back(std::move(macro)); } + entry.macroBlock = buildMacroBlock(entry.macroDefs); uint32_t flagCount = 0; if (!read_u32(serializedCache, offset, flagCount)) @@ -1704,6 +1731,7 @@ core::smart_refctd_ptr IShaderCompiler::CPrep } entry.macroDefs.emplace_back(std::move(macro)); } + entry.macroBlock = buildMacroBlock(entry.macroDefs); uint32_t flagCount = 0; if (!read_u32(flagCount)) diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index 1958be6109..a6510b5fba 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 6fef48539a..4a81a6c83d 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,13 @@ class ShaderCompiler final : public IApplicationFramework const auto rawArgs = std::vector(argv.begin(), argv.end()); const auto expandedArgs = expandJoinedArgs(rawArgs); m_logger = make_smart_refctd_ptr(bitflag(ILogger::ELL_ALL)); + if (!rawArgs.empty()) + { + std::error_code ec; + m_executablePath = std::filesystem::absolute(std::filesystem::path(rawArgs.front()), ec); + if (ec) + m_executablePath = std::filesystem::path(rawArgs.front()); + } argparse::ArgumentParser program("nsc"); program.add_argument("--dump-build-info").default_value(false).implicit_value(true); @@ -194,6 +202,10 @@ class ShaderCompiler final : public IApplicationFramework program.add_argument("-shader-cache-file").default_value(std::string{}); program.add_argument("-preprocess-cache").default_value(false).implicit_value(true); program.add_argument("-preprocess-cache-file").default_value(std::string{}); + program.add_argument("-nbl-shader-cache").default_value(false).implicit_value(true); + program.add_argument("-nbl-preprocess-cache").default_value(false).implicit_value(true); + program.add_argument("-nbl-preprocess-preamble").default_value(false).implicit_value(true); + program.add_argument("-nbl-stdout-log").default_value(false).implicit_value(true); std::vector unknownArgs; try @@ -274,14 +286,16 @@ class ShaderCompiler final : public IApplicationFramework const bool quiet = program.get("-quiet"); const bool verbose = program.get("-verbose"); - bool shaderCacheEnabled = program.get("-shader-cache"); + const bool stdoutLog = program.get("-nbl-stdout-log"); + bool shaderCacheEnabled = program.get("-shader-cache") || program.get("-nbl-shader-cache"); const std::string shaderCachePathOverride = program.is_used("-shader-cache-file") ? program.get("-shader-cache-file") : std::string{}; if (!shaderCachePathOverride.empty()) shaderCacheEnabled = true; - bool preprocessCacheEnabled = program.get("-preprocess-cache"); + bool preprocessCacheEnabled = program.get("-preprocess-cache") || program.get("-nbl-preprocess-cache"); const std::string preprocessCachePathOverride = program.is_used("-preprocess-cache-file") ? program.get("-preprocess-cache-file") : std::string{}; if (!preprocessCachePathOverride.empty()) preprocessCacheEnabled = true; + bool preambleEnabled = program.get("-nbl-preprocess-preamble"); if (quiet && verbose) { if (m_logger) @@ -300,7 +314,9 @@ class ShaderCompiler final : public IApplicationFramework const auto logPath = logPathOverride.empty() ? std::filesystem::path(outputFilepath).concat(".log") : std::filesystem::path(logPathOverride); const auto fileMask = bitflag(ILogger::ELL_ALL); - const auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + if (stdoutLog) + consoleMask = fileMask; m_logger = make_smart_refctd_ptr(m_system, logPath, fileMask, consoleMask, noLog); const auto configName = std::filesystem::path(outputFilepath).parent_path().filename().string(); @@ -366,6 +382,7 @@ class ShaderCompiler final : public IApplicationFramework PreprocessCacheConfig preCache; preCache.enabled = preprocessCacheEnabled && !preprocessOnly; preCache.verbose = verbose; + preCache.preamble = preCache.enabled && preambleEnabled; if (preCache.enabled) preCache.path = preprocessCachePathOverride.empty() ? makePreprocessCachePath(outputFilepath) : std::filesystem::path(preprocessCachePathOverride); @@ -546,6 +563,7 @@ class ShaderCompiler final : public IApplicationFramework { bool enabled = false; bool verbose = false; + bool preamble = false; std::filesystem::path path; }; @@ -570,6 +588,12 @@ class ShaderCompiler final : public IApplicationFramework uint64_t size = 0; }; + struct ShaderCacheIndexLocation + { + uint64_t spirvOffset = 0; + uint64_t spirvSize = 0; + }; + static std::filesystem::path makeCachePath(std::filesystem::path outputPath) { outputPath += ".ppcache"; @@ -1432,7 +1456,7 @@ class ShaderCompiler final : public IApplicationFramework return true; } - static bool tryLoadShaderCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, std::string_view code, const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry& outEntry, std::string* reason) + static bool tryLoadShaderCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, std::string_view code, const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry& outEntry, ShaderCacheIndexLocation* outLocation, std::string* reason) { if (!system) { @@ -1511,6 +1535,24 @@ class ShaderCompiler final : public IApplicationFramework return true; }; + auto skip_string = [&read_u32, &data](size_t& offset) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + offset += size; + return true; + }; + auto skip_bytes = [&data](size_t& offset, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + offset += size; + return true; + }; + size_t offset = 0; uint32_t magic = 0; uint32_t version = 0; @@ -1572,12 +1614,27 @@ class ShaderCompiler final : public IApplicationFramework return false; const bool match = (hash == targetHash); + if (match && (spirvSize == 0 || spirvOffset + spirvSize > cacheSize)) + { + if (reason) + *reason = "cache range"; + return false; + } std::vector deps; if (match) deps.reserve(depCount); for (uint32_t d = 0; d < depCount; ++d) { + if (!match) + { + if (!skip_string(offset) || !skip_string(offset) || !skip_string(offset)) + return false; + if (!skip_bytes(offset, sizeof(uint8_t) + sizeof(core::blake3_hash_t) + sizeof(uint64_t) + sizeof(int64_t) + sizeof(uint8_t))) + return false; + continue; + } + std::string dir; std::string identifier; std::string abs; @@ -1593,28 +1650,21 @@ class ShaderCompiler final : public IApplicationFramework return false; } - if (match) - { - deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); - } + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); } if (!match) continue; - std::vector compressed; - if (!readBinaryFileRange(system, cachePath, static_cast(spirvOffset), static_cast(spirvSize), compressed)) + if (outLocation) { - if (reason) - *reason = "cache read"; - return false; + outLocation->spirvOffset = spirvOffset; + outLocation->spirvSize = spirvSize; } - auto memoryResource = core::make_smart_refctd_ptr>(std::move(compressed)); - auto spirv = ICPUBuffer::create({ { spirvSize }, memoryResource->getBacker().data(), std::move(memoryResource) }, core::adopt_memory); - + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; outEntry = {}; - outEntry.mainFileContents.assign(code.begin(), code.end()); + outEntry.mainFileContents.assign(cacheCode.begin(), cacheCode.end()); if (!fillCompilerArgsFromOptions(options, outEntry.compilerArgs)) return false; outEntry.hash = targetHash; @@ -1622,7 +1672,6 @@ class ShaderCompiler final : public IApplicationFramework outEntry.dependencies = std::move(deps); outEntry.uncompressedSize = uncompressedSize; outEntry.uncompressedContentHash = uncompressedHash; - outEntry.spirv = std::move(spirv); return true; } @@ -1739,7 +1788,31 @@ class ShaderCompiler final : public IApplicationFramework preOpt.forceIncludes = std::span(m_force_includes); preOpt.depfile = false; preOpt.depfilePath = dep.path; - preOpt.codeForCache = code; + std::string codeForCacheStorage; + if (!sourceIdentifier.empty()) + { + uint64_t srcSize = 0; + int64_t srcTime = 0; + const std::filesystem::path srcPath{std::string(sourceIdentifier)}; + if (getFileInfo(m_system.get(), srcPath, srcSize, srcTime)) + { + codeForCacheStorage.reserve(code.size() + 64); + codeForCacheStorage.append(code.data(), code.size()); + codeForCacheStorage.append("\n// nsc-file-info "); + codeForCacheStorage.append(std::to_string(srcSize)); + codeForCacheStorage.push_back(':'); + codeForCacheStorage.append(std::to_string(srcTime)); + preOpt.codeForCache = codeForCacheStorage; + if (verbose) + { + m_logger->log("Cache key file info: size=%llu, mtime=%lld.", ILogger::ELL_PERFORMANCE, + static_cast(srcSize), + static_cast(srcTime)); + } + } + } + if (preOpt.codeForCache.empty()) + preOpt.codeForCache = code; CHLSLCompiler::SOptions opt = {}; opt.stage = static_cast(shaderStage); @@ -1757,7 +1830,6 @@ class ShaderCompiler final : public IApplicationFramework const bool useShaderCache = shaderCache.enabled && !preprocessOnly; const bool usePreCache = preCache.enabled && !preprocessOnly; const bool validateCacheDeps = true; - bool usedIndex = false; struct ShaderCacheProbeResult { @@ -1765,8 +1837,12 @@ class ShaderCompiler final : public IApplicationFramework bool hit = false; bool entryReady = false; bool depsUpdated = false; + bool usedIndex = false; + bool cachePartial = false; + bool hasIndexLocation = false; smart_refctd_ptr cacheObj; IShaderCompiler::CCache::SEntry entry; + ShaderCacheIndexLocation indexLocation = {}; std::chrono::nanoseconds duration = {}; std::chrono::nanoseconds loadDuration = {}; std::chrono::nanoseconds validateDuration = {}; @@ -1780,6 +1856,8 @@ class ShaderCompiler final : public IApplicationFramework IShaderCompiler::SPreprocessCacheResult result = {}; IShaderCompiler::CPreprocessCache::ELoadStatus loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; smart_refctd_ptr cacheObj; + std::string body; + bool hasPrefix = false; std::chrono::nanoseconds duration = {}; }; @@ -1799,13 +1877,17 @@ class ShaderCompiler final : public IApplicationFramework indexExists = m_system->exists(indexPath, IFileBase::ECF_READ); } IShaderCompiler::CCache::SEntry indexedEntry; + ShaderCacheIndexLocation indexLocation = {}; std::string indexReason; - if (tryLoadShaderCacheIndex(m_system.get(), shaderCache.path, code, opt, indexedEntry, &indexReason)) + if (tryLoadShaderCacheIndex(m_system.get(), shaderCache.path, code, opt, indexedEntry, &indexLocation, &indexReason)) { shaderProbe.cacheObj = make_smart_refctd_ptr(); shaderProbe.cacheObj->insert(std::move(indexedEntry)); shaderProbe.status = CacheLoadStatus::Loaded; - usedIndex = true; + shaderProbe.usedIndex = true; + shaderProbe.cachePartial = true; + shaderProbe.indexLocation = indexLocation; + shaderProbe.hasIndexLocation = indexLocation.spirvSize != 0; if (verbose && m_logger) m_logger->log("Shader cache index hit.", ILogger::ELL_DEBUG); } @@ -1815,6 +1897,7 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Shader cache index miss (%s).", ILogger::ELL_DEBUG, indexReason.empty() ? "unknown" : indexReason.c_str()); const bool refreshIndex = indexExists && indexReason.rfind("cache mismatch", 0) == 0; shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false, refreshIndex); + shaderProbe.cachePartial = false; } const auto loadEnd = clock_t::now(); shaderProbe.loadDuration = loadEnd - loadStart; @@ -1828,26 +1911,8 @@ class ShaderCompiler final : public IApplicationFramework const auto validateEnd = clock_t::now(); shaderProbe.entryReady = shaderProbe.hit; shaderProbe.validateDuration = validateEnd - validateStart; - if (!shaderProbe.hit && usedIndex) - { - if (verbose && m_logger) - m_logger->log("Shader cache index entry rejected, loading full cache.", ILogger::ELL_DEBUG); - const auto reloadStart = clock_t::now(); - shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false, false); - const auto reloadEnd = clock_t::now(); - shaderProbe.loadDuration += reloadEnd - reloadStart; - if (!shaderProbe.cacheObj) - shaderProbe.cacheObj = make_smart_refctd_ptr(); - if (shaderProbe.status == CacheLoadStatus::Loaded) - { - auto reloadFinder = makeIncludeFinder(); - const auto validateStart2 = clock_t::now(); - shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, reloadFinder.get(), shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); - const auto validateEnd2 = clock_t::now(); - shaderProbe.entryReady = shaderProbe.hit; - shaderProbe.validateDuration += (validateEnd2 - validateStart2); - } - } + if (!shaderProbe.hit && shaderProbe.usedIndex && verbose && m_logger) + m_logger->log("Shader cache index entry rejected, treating as miss.", ILogger::ELL_DEBUG); } shaderProbe.duration = clock_t::now() - start; } @@ -1869,6 +1934,11 @@ class ShaderCompiler final : public IApplicationFramework preIndexExists = m_system->exists(makePreprocessCacheIndexPath(preCache.path), IFileBase::ECF_READ); const auto codeProbe = IShaderCompiler::CPreprocessCache::probe(code, nullptr, IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded, preOpt); + if (preCache.preamble) + { + preProbe.hasPrefix = codeProbe.hasPrefix; + preProbe.body.assign(codeProbe.body.data(), codeProbe.body.size()); + } PreprocessCacheIndexEntry preIndexEntry; std::string preIndexReason; bool preIndexHit = false; @@ -1950,7 +2020,8 @@ class ShaderCompiler final : public IApplicationFramework } else { - preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, false); + const bool loadPrefix = preCache.preamble; + preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, loadPrefix); if (!preProbe.cacheObj) preProbe.cacheObj = make_smart_refctd_ptr(); } @@ -1976,6 +2047,12 @@ class ShaderCompiler final : public IApplicationFramework std::string preprocessedCode; bool preprocessedReady = false; + bool preprocessedNeedsWrite = false; + bool preambleUsed = false; + std::vector preambleDependencies; + std::vector preambleDxcFlags; + std::chrono::nanoseconds preambleDuration = {}; + std::span preambleDxcFlagsView = {}; std::string_view codeToCompile = code; smart_refctd_ptr preCacheObj; IShader::E_SHADER_STAGE stageOverride = static_cast(shaderStage); @@ -1993,6 +2070,37 @@ class ShaderCompiler final : public IApplicationFramework return std::chrono::duration_cast(duration).count(); }; + auto ensureIndexSpirvLoaded = [&](IShaderCompiler::CCache::SEntry& entry) -> bool + { + if (entry.spirv) + return true; + if (!shaderProbe.hasIndexLocation || shaderProbe.indexLocation.spirvSize == 0) + return false; + if (!m_system) + return false; + std::vector compressed; + if (!readBinaryFileRange(m_system.get(), shaderCache.path, static_cast(shaderProbe.indexLocation.spirvOffset), + static_cast(shaderProbe.indexLocation.spirvSize), compressed)) + return false; + auto memoryResource = core::make_smart_refctd_ptr>(std::move(compressed)); + entry.spirv = ICPUBuffer::create({ { static_cast(shaderProbe.indexLocation.spirvSize) }, + memoryResource->getBacker().data(), std::move(memoryResource) }, core::adopt_memory); + return static_cast(entry.spirv); + }; + + auto ensureFullCacheForWrite = [&](smart_refctd_ptr& cacheObj) -> bool + { + if (!shaderProbe.cachePartial) + return true; + CacheLoadStatus fullStatus = CacheLoadStatus::Missing; + auto fullCache = loadShaderCache(m_system.get(), shaderCache.path, fullStatus, false, false); + if (!fullCache) + return false; + cacheObj = std::move(fullCache); + shaderProbe.cachePartial = false; + return true; + }; + auto writeDepfileFromDependencies = [&](const IShaderCompiler::CCache::SEntry::dependency_container_t& dependencies, bool allowSkipIfExists) -> bool { if (!dep.enabled) @@ -2043,6 +2151,8 @@ class ShaderCompiler final : public IApplicationFramework if (!preOpt.sourceIdentifier.empty()) addDepPath(std::filesystem::path(std::string(preOpt.sourceIdentifier))); + if (!m_executablePath.empty()) + addDepPath(m_executablePath); for (const auto& depEntry : dependencies) { @@ -2145,12 +2255,14 @@ class ShaderCompiler final : public IApplicationFramework } if (preProbe.result.cacheUsed) { - preprocessedCode = std::move(preProbe.result.code); - preprocessedReady = true; stageOverride = preProbe.result.stage; preCacheObj = preProbe.cacheObj; - if (!preprocessedOutputPath.empty() && !writeTextFile(preprocessedOutputPath, preprocessedCode)) - return r; + if (!preCache.preamble) + { + preprocessedCode = std::move(preProbe.result.code); + preprocessedReady = true; + preprocessedNeedsWrite = !preprocessedOutputPath.empty(); + } } } else if (usePreCache && preCache.verbose) @@ -2173,8 +2285,25 @@ class ShaderCompiler final : public IApplicationFramework } if (usePreCache && preProbe.result.cacheUpdated && preProbe.cacheObj) { - if (IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj)) + const auto preCacheWriteStart = clock_t::now(); + const bool preCacheWritten = IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj); + const auto preCacheWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocess cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preCacheWriteEnd - preCacheWriteStart))); + } + if (preCacheWritten) + { + const auto preCacheIndexStart = clock_t::now(); writePreprocessCacheIndex(m_system.get(), preCache.path, *preProbe.cacheObj); + const auto preCacheIndexEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocess cache index write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preCacheIndexEnd - preCacheIndexStart))); + } + } } if (useShaderCache && shaderProbe.hit && shaderProbe.entryReady) @@ -2183,14 +2312,45 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Shader cache hit: using cached SPIR-V.", ILogger::ELL_DEBUG); if (shaderProbe.depsUpdated) { - const auto cacheWriteStart = clock_t::now(); - if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) - m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); - if (verbose) + bool canWrite = true; + if (shaderProbe.cachePartial) { - const auto cacheWriteEnd = clock_t::now(); - m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, - static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + if (ensureFullCacheForWrite(cacheObj)) + { + IShaderCompiler::CCache::SEntry fullEntry; + if (cacheObj->findEntryForCode(code, opt, nullptr, fullEntry, false, nullptr)) + { + fullEntry.dependencies.clear(); + fullEntry.dependencies.reserve(shaderProbe.entry.dependencies.size()); + for (auto& dep : shaderProbe.entry.dependencies) + fullEntry.dependencies.emplace_back(dep); + cacheObj->insert(std::move(fullEntry)); + } + else + { + canWrite = false; + if (verbose && m_logger) + m_logger->log("Shader cache write skipped (entry missing after reload).", ILogger::ELL_DEBUG); + } + } + else + { + canWrite = false; + if (verbose && m_logger) + m_logger->log("Shader cache write skipped (failed to load full cache).", ILogger::ELL_DEBUG); + } + } + if (canWrite) + { + const auto cacheWriteStart = clock_t::now(); + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + if (verbose) + { + const auto cacheWriteEnd = clock_t::now(); + m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + } } } if (isOutputUpToDate(shaderProbe.entry)) @@ -2208,6 +2368,8 @@ class ShaderCompiler final : public IApplicationFramework return r; } const auto hitDecompressStart = clock_t::now(); + if (!ensureIndexSpirvLoaded(shaderProbe.entry)) + return r; r.compiled = cacheObj->decompressEntry(shaderProbe.entry); const auto hitDecompressEnd = clock_t::now(); r.ok = bool(r.compiled); @@ -2230,6 +2392,194 @@ class ShaderCompiler final : public IApplicationFramework } auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + clock_t::duration preambleBodyDuration = {}; + clock_t::duration preambleAssembleDuration = {}; + clock_t::duration preambleProbeDuration = {}; + clock_t::duration preambleFinderDuration = {}; + const bool usePreamble = preCache.preamble && preCacheObj && preCacheObj->hasEntry(); + if (usePreamble) + { + const auto preambleStart = clock_t::now(); + std::string body = std::move(preProbe.body); + if (!preProbe.hasPrefix) + { + const auto preambleProbeStart = clock_t::now(); + const auto bodyProbe = IShaderCompiler::CPreprocessCache::probe(code, preCacheObj.get(), preProbe.loadStatus, preOpt); + preambleProbeDuration = clock_t::now() - preambleProbeStart; + if (!bodyProbe.hasPrefix) + return r; + body.assign(bodyProbe.body.data(), bodyProbe.body.size()); + } + + const auto& entry = preCacheObj->getEntry(); + + const auto preambleFinderStart = clock_t::now(); + auto finder = makeIncludeFinder(); + preambleFinderDuration = clock_t::now() - preambleFinderStart; + auto bodyStage = stageOverride; + CHLSLCompiler::SPreprocessorOptions bodyOpt = preOpt; + bodyOpt.applyForceIncludes = false; + bodyOpt.includeFinder = finder.get(); + if (!entry.macroBlock.empty()) + { + std::string withDefines; + withDefines.reserve(body.size() + entry.macroBlock.size()); + withDefines.append(entry.macroBlock); + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + else if (!entry.macroDefs.empty()) + { + size_t reserve = body.size(); + for (const auto& macro : entry.macroDefs) + reserve += macro.size() + 12; + std::string withDefines; + withDefines.reserve(reserve); + for (const auto& macro : entry.macroDefs) + { + const auto eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + withDefines.append("#define "); + withDefines.append(name); + if (!def.empty()) + { + withDefines.push_back(' '); + withDefines.append(def); + } + withDefines.push_back('\n'); + } + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + + std::vector bodyDeps; + std::vector bodyDxcFlags; + std::string bodyPreprocessed; + if (!body.empty()) + { + const auto bodyHasInclude = [](std::string_view text) -> bool + { + size_t pos = 0; + while (pos < text.size()) + { + size_t lineEnd = text.find('\n', pos); + if (lineEnd == std::string_view::npos) + lineEnd = text.size(); + size_t i = pos; + while (i < lineEnd && (text[i] == ' ' || text[i] == '\t' || text[i] == '\r')) + ++i; + if (i < lineEnd && text[i] == '#') + { + ++i; + while (i < lineEnd && (text[i] == ' ' || text[i] == '\t')) + ++i; + if (lineEnd - i >= 7 && text.compare(i, 7, "include") == 0) + return true; + } + pos = lineEnd + 1; + } + return false; + }; + const bool hasInclude = bodyHasInclude(body); + auto* bodyDepsOut = hasInclude ? &bodyDeps : nullptr; + const auto bodyPreprocessStart = clock_t::now(); + bodyPreprocessed = hlslcompiler->preprocessShader(std::move(body), bodyStage, bodyOpt, bodyDxcFlags, bodyDepsOut, nullptr); + preambleBodyDuration = clock_t::now() - bodyPreprocessStart; + if (bodyPreprocessed.empty()) + return r; + } + + stageOverride = bodyStage; + if (!bodyDxcFlags.empty()) + { + preambleDxcFlags = std::move(bodyDxcFlags); + preambleDxcFlagsView = std::span(preambleDxcFlags.data(), preambleDxcFlags.size()); + } + else + { + preambleDxcFlagsView = std::span(entry.dxcFlags.data(), entry.dxcFlags.size()); + } + + const auto preambleAssembleStart = clock_t::now(); + preprocessedCode.clear(); + size_t reserve = entry.preprocessedPrefix.size() + bodyPreprocessed.size() + 64; + for (const auto& flag : preambleDxcFlagsView) + reserve += flag.size() + 1; + preprocessedCode.reserve(reserve); + if (!preambleDxcFlagsView.empty()) + { + preprocessedCode.append("#pragma dxc_compile_flags "); + for (size_t i = 0; i < preambleDxcFlagsView.size(); ++i) + { + if (i) + preprocessedCode.push_back(' '); + preprocessedCode.append(preambleDxcFlagsView[i]); + } + preprocessedCode.push_back('\n'); + } + if (!entry.preprocessedPrefix.empty()) + { + preprocessedCode.append(entry.preprocessedPrefix); + if (preprocessedCode.back() != '\n') + preprocessedCode.push_back('\n'); + } + preprocessedCode.append(bodyPreprocessed); + + preambleDependencies.clear(); + preambleDependencies.reserve(entry.dependencies.size() + bodyDeps.size()); + for (const auto& dep : entry.dependencies) + preambleDependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); + for (const auto& dep : bodyDeps) + preambleDependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); + + preprocessedReady = true; + preprocessedNeedsWrite = !preprocessedOutputPath.empty(); + preambleUsed = true; + preambleAssembleDuration = clock_t::now() - preambleAssembleStart; + preambleDuration = clock_t::now() - preambleStart; + } + + if (preprocessedNeedsWrite) + { + const auto preprocessedWriteStart = clock_t::now(); + if (!writeTextFile(preprocessedOutputPath, preprocessedCode)) + return r; + const auto preprocessedWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocessed output write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preprocessedWriteEnd - preprocessedWriteStart))); + } + } + + if (verbose && preambleUsed) + { + if (preambleProbeDuration.count()) + { + m_logger->log("Preamble body probe took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleProbeDuration))); + } + if (preambleFinderDuration.count()) + { + m_logger->log("Preamble finder setup took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleFinderDuration))); + } + if (preambleBodyDuration.count()) + { + m_logger->log("Preamble body lex took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleBodyDuration))); + } + if (preambleAssembleDuration.count()) + { + m_logger->log("Preamble assemble took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleAssembleDuration))); + } + m_logger->log("Preamble body preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleDuration))); + } if (preprocessOnly) { @@ -2251,10 +2601,20 @@ class ShaderCompiler final : public IApplicationFramework opt.stage = stageOverride; bool cacheHit = false; + bool canWriteCache = shaderCache.enabled && cacheObj; + if (canWriteCache && shaderProbe.cachePartial) + { + if (!ensureFullCacheForWrite(cacheObj)) + { + canWriteCache = false; + if (verbose && m_logger) + m_logger->log("Shader cache write disabled (failed to load full cache).", ILogger::ELL_DEBUG); + } + } if (shaderCache.enabled && cacheObj) { opt.readCache = cacheObj.get(); - opt.writeCache = cacheObj.get(); + opt.writeCache = canWriteCache ? cacheObj.get() : nullptr; opt.cacheHit = &cacheHit; } else if (dep.enabled && cacheObj) @@ -2265,8 +2625,16 @@ class ShaderCompiler final : public IApplicationFramework if (preprocessedReady) { opt.preprocessorOptions.applyForceIncludes = false; - if (preCacheObj && preCacheObj->hasEntry()) + if (preambleUsed) + { + opt.assumePreprocessed = true; + opt.dxcCompileFlagsOverride = preambleDxcFlagsView; + opt.dependencyOverrides = &preambleDependencies; + } + else if (preCacheObj && preCacheObj->hasEntry()) + { opt.dependencyOverrides = &preCacheObj->getEntry().dependencies; + } codeToCompile = preprocessedCode; } @@ -2278,6 +2646,11 @@ class ShaderCompiler final : public IApplicationFramework r.ok = bool(r.compiled); if (r.ok) r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + if (verbose) + { + m_logger->log("Compile call took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(compileEnd - compileStart))); + } if (shaderCache.enabled && cacheObj) { @@ -2293,21 +2666,43 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Cache miss! Cold run (%s).", ILogger::ELL_DEBUG, cacheMissReason(cacheStatus)); } } - if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) - m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + if (canWriteCache) + { + const auto cacheWriteStart = clock_t::now(); + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + const auto cacheWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + } + } } if (dep.enabled && r.ok) { const IShaderCompiler::CCache::SEntry::dependency_container_t* deps = nullptr; IShaderCompiler::CCache::SEntry depEntry; - if (preCacheObj && preCacheObj->hasEntry()) + if (preambleUsed) + { + deps = &preambleDependencies; + } + else if (preCacheObj && preCacheObj->hasEntry()) { deps = &preCacheObj->getEntry().dependencies; } else if (cacheObj) { - if (cacheObj->findEntryForCode(code, opt, compileFinder.get(), depEntry, validateCacheDeps)) + const auto depLookupStart = clock_t::now(); + const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder.get(), depEntry, validateCacheDeps); + const auto depLookupEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Depfile dependency lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(depLookupEnd - depLookupStart))); + } + if (depFound) deps = &depEntry.dependencies; } @@ -2318,11 +2713,18 @@ class ShaderCompiler final : public IApplicationFramework return r; } + const auto depfileStart = clock_t::now(); if (!writeDepfileFromDependencies(*deps, false)) { r.ok = false; return r; } + const auto depfileEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Depfile write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(depfileEnd - depfileStart))); + } } return r; @@ -2369,6 +2771,7 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr m_logger; std::vector m_arguments, m_include_search_paths, m_force_includes; smart_refctd_ptr m_assetMgr; + std::filesystem::path m_executablePath; }; NBL_MAIN_FUNC(ShaderCompiler) From cb6da1b6f4c967f27372360b6987eabd7a31011f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 20 Jan 2026 17:12:38 +0100 Subject: [PATCH 11/14] shader cache compression --- cmake/common.cmake | 2 + docs/nsc-prebuilds.md | 3 + include/nbl/asset/utils/IShaderCompiler.h | 24 +++++++- src/nbl/asset/utils/IShaderCompiler.cpp | 17 ++++++ .../utils/shaderCompiler_serialization.h | 13 ++++ tools/nsc/main.cpp | 61 +++++++++++++++++-- 6 files changed, 113 insertions(+), 7 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index e173e49f0e..aca164949c 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1146,6 +1146,7 @@ option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) option(NBL_NSC_VERBOSE "Enable NSC verbose logging to .log" ON) option(NSC_SHADER_CACHE "Enable NSC shader cache" ON) +set(NSC_SHADER_CACHE_COMPRESSION "raw" CACHE STRING "NSC shader cache compression (raw or lzma)") option(NSC_PREPROCESS_CACHE "Enable NSC preprocess cache" ON) option(NSC_PREPROCESS_PREAMBLE "Enable NSC preprocess preamble" ON) option(NSC_STDOUT_LOG "Mirror NSC log to stdout" OFF) @@ -2172,6 +2173,7 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE set(NBL_NSC_CACHE_ARGS "") if(NSC_SHADER_CACHE) list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache-compression "${NSC_SHADER_CACHE_COMPRESSION}") if(NSC_CACHE_DIR) list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") endif() diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index d124f3679f..d7eb09c168 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -112,6 +112,7 @@ By default `NBL_CREATE_NSC_COMPILE_RULES` also collects `*.hlsl` files for IDE v There are three independent cache layers: - `NSC_SHADER_CACHE` (default `ON`) -> SPIR-V cache (`.spv.ppcache`) for full compilation results. +- `NSC_SHADER_CACHE_COMPRESSION` (default `raw`) -> compression used for shader cache entries (`raw` or `lzma`). - `NSC_PREPROCESS_CACHE` (default `ON`) -> preprocessor prefix cache (`.spv.ppcache.pre`) to avoid repeating Boost.Wave include work when only the main shader changes. - `NSC_PREPROCESS_PREAMBLE` (default `ON`) -> preamble mode: reuse cached preprocessed prefix + macro state and run Wave only on the body, then compile without re-lexing the prefix. - All layers are used only for compilation (not `-P` preprocess-only runs). @@ -139,6 +140,7 @@ With `-verbose`, `.log` shows: You can also toggle layers directly on the `nsc` CLI: - `-nbl-shader-cache` +- `-nbl-shader-cache-compression ` - `-nbl-preprocess-cache` - `-nbl-preprocess-preamble` - `-nbl-stdout-log` (mirror the log file output to stdout) @@ -147,6 +149,7 @@ Related CMake options: - `NSC_PREPROCESS_PREAMBLE` (default `ON`) - `NSC_STDOUT_LOG` (default `OFF`) +- `NSC_SHADER_CACHE_COMPRESSION` (default `raw`) You can redirect the caches into a shared directory with: diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 3df25a6f68..bf6dcd74f1 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -258,10 +258,16 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.2.6"; + constexpr static inline std::string_view VERSION = "1.2.7"; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 + enum class ECompression : uint8_t + { + LZMA = 0u, + RAW = 1u + }; + struct SEntry { friend class CCache; @@ -416,7 +422,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline SEntry(const SEntry& other) : mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash), lookupHash(other.lookupHash), dependencies(other.dependencies), spirv(other.spirv), - uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize) {} + uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize), + compression(other.compression) {} inline SEntry& operator=(SEntry& other) = delete; inline SEntry(SEntry&& other) = default; @@ -436,6 +443,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted core::smart_refctd_ptr spirv; core::blake3_hash_t uncompressedContentHash; size_t uncompressedSize; + ECompression compression = ECompression::LZMA; }; inline void insert(SEntry&& entry) @@ -458,9 +466,20 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted auto retVal = core::make_smart_refctd_ptr(); for (auto& entry : m_container) retVal->m_container.emplace(entry); + retVal->m_defaultCompression = m_defaultCompression; return retVal; } + inline void setDefaultCompression(ECompression compression) + { + m_defaultCompression = compression; + } + + inline ECompression getDefaultCompression() const + { + return m_defaultCompression; + } + NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder) const; NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies = true, bool* depsUpdated = nullptr) const; @@ -494,6 +513,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted using EntrySet = core::unordered_set; EntrySet m_container; + ECompression m_defaultCompression = ECompression::LZMA; NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated) const; }; diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 195cf35d3d..605cee6c74 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -503,6 +503,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons if (options.writeCache) { + entry.compression = options.writeCache->getDefaultCompression(); if (entry.setContent(retVal->getContent())) options.writeCache->insert(std::move(entry)); } @@ -1024,6 +1025,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const { "lookupHash", entry.lookupHash }, { "uncompressedContentHash", entry.uncompressedContentHash.data }, { "uncompressedSize", entry.uncompressedSize }, + { "compression", static_cast(entry.compression) }, }; entries.emplace_back(std::move(entryJson)); @@ -1406,6 +1408,7 @@ IShaderCompiler::SPreprocessCacheResult IShaderCompiler::preprocessWithCache(std IShader::E_SHADER_STAGE prefixStage = stage; SPreprocessorOptions preCacheOpt = preprocessOptions; preCacheOpt.depfile = false; + preCacheOpt.applyForceIncludes = false; if (!preprocessPrefixForCache(probe.prefix, prefixStage, preCacheOpt, entry)) { result.ok = false; @@ -2104,6 +2107,12 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu uncompressedContentHash = uncompressedSpirvBuffer->getContentHash(); uncompressedSize = uncompressedSpirvBuffer->getSize(); + if (compression == ECompression::RAW) + { + spirv = core::smart_refctd_ptr(const_cast(uncompressedSpirvBuffer)); + return static_cast(spirv); + } + size_t propsSize = LZMA_PROPS_SIZE; size_t destLen = uncompressedSpirvBuffer->getSize() + uncompressedSpirvBuffer->getSize() / 3 + 128; core::vector compressedSpirv(propsSize + destLen); @@ -2131,6 +2140,14 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const { + if (compression == ECompression::RAW) + { + if (!spirv) + return nullptr; + auto buffer = spirv; + return core::make_smart_refctd_ptr(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data()); + } + auto uncompressedBuf = ICPUBuffer::create({ uncompressedSize }); uncompressedBuf->setContentHash(uncompressedContentHash); diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index 4893737da7..a5024a7d05 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -6,6 +6,7 @@ using json = nlohmann::json; using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; +using CacheCompression = nbl::asset::IShaderCompiler::CCache::ECompression; namespace nbl::asset @@ -183,6 +184,7 @@ inline void from_json(const json& j, CPUShaderCreationParams& creationParams) inline void to_json(json& j, const SEntry& entry) { + uint32_t compression = static_cast(entry.compression); j = json{ { "mainFileContents", entry.mainFileContents }, { "compilerArgs", entry.compilerArgs }, @@ -191,6 +193,7 @@ inline void to_json(json& j, const SEntry& entry) { "dependencies", entry.dependencies }, { "uncompressedContentHash", entry.uncompressedContentHash.data }, { "uncompressedSize", entry.uncompressedSize }, + { "compression", compression }, }; } @@ -204,6 +207,16 @@ inline void from_json(const json& j, SEntry& entry) j.at("dependencies").get_to(entry.dependencies); j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); j.at("uncompressedSize").get_to(entry.uncompressedSize); + if (j.contains("compression")) + { + uint32_t compression = 0; + j.at("compression").get_to(compression); + entry.compression = static_cast(compression); + } + else + { + entry.compression = CacheCompression::LZMA; + } entry.spirv = nullptr; } diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 4a81a6c83d..9ebeaf5205 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -200,9 +200,11 @@ class ShaderCompiler final : public IApplicationFramework program.add_argument("-verbose").default_value(false).implicit_value(true); program.add_argument("-shader-cache").default_value(false).implicit_value(true); program.add_argument("-shader-cache-file").default_value(std::string{}); + program.add_argument("-shader-cache-compression").default_value(std::string{}); program.add_argument("-preprocess-cache").default_value(false).implicit_value(true); program.add_argument("-preprocess-cache-file").default_value(std::string{}); program.add_argument("-nbl-shader-cache").default_value(false).implicit_value(true); + program.add_argument("-nbl-shader-cache-compression").default_value(std::string{}); program.add_argument("-nbl-preprocess-cache").default_value(false).implicit_value(true); program.add_argument("-nbl-preprocess-preamble").default_value(false).implicit_value(true); program.add_argument("-nbl-stdout-log").default_value(false).implicit_value(true); @@ -296,6 +298,24 @@ class ShaderCompiler final : public IApplicationFramework if (!preprocessCachePathOverride.empty()) preprocessCacheEnabled = true; bool preambleEnabled = program.get("-nbl-preprocess-preamble"); + const std::string compressionArgPrimary = program.get("-nbl-shader-cache-compression"); + std::string compressionArg = !compressionArgPrimary.empty() ? compressionArgPrimary : program.get("-shader-cache-compression"); + IShaderCompiler::CCache::ECompression shaderCacheCompression = IShaderCompiler::CCache::ECompression::LZMA; + if (!compressionArg.empty()) + { + std::transform(compressionArg.begin(), compressionArg.end(), compressionArg.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (compressionArg == "raw") + shaderCacheCompression = IShaderCompiler::CCache::ECompression::RAW; + else if (compressionArg == "lzma") + shaderCacheCompression = IShaderCompiler::CCache::ECompression::LZMA; + else + { + if (m_logger) + m_logger->log("Invalid shader cache compression: %s (expected raw or lzma).", ILogger::ELL_ERROR, compressionArg.c_str()); + return false; + } + } if (quiet && verbose) { if (m_logger) @@ -376,6 +396,7 @@ class ShaderCompiler final : public IApplicationFramework ShaderCacheConfig shaderCache; shaderCache.enabled = shaderCacheEnabled && !preprocessOnly; shaderCache.verbose = verbose; + shaderCache.compression = shaderCacheCompression; if (shaderCache.enabled) shaderCache.path = shaderCachePathOverride.empty() ? makeCachePath(outputFilepath) : std::filesystem::path(shaderCachePathOverride); @@ -557,6 +578,7 @@ class ShaderCompiler final : public IApplicationFramework bool enabled = false; bool verbose = false; std::filesystem::path path; + IShaderCompiler::CCache::ECompression compression = IShaderCompiler::CCache::ECompression::LZMA; }; struct PreprocessCacheConfig @@ -757,7 +779,7 @@ class ShaderCompiler final : public IApplicationFramework }; const uint32_t magic = 0x4E534349u; - const uint32_t version = 1u; + const uint32_t version = 2u; write_u32(magic); write_u32(version); write_string(std::string_view(IShaderCompiler::CCache::VERSION)); @@ -777,6 +799,8 @@ class ShaderCompiler final : public IApplicationFramework write_u64(spirvSize); write_u64(entry.uncompressedSize); write_hash(entry.uncompressedContentHash); + const uint8_t compression = static_cast(entry.compression); + write_bytes(&compression, sizeof(compression)); write_u32(static_cast(entry.dependencies.size())); for (const auto& dep : entry.dependencies) { @@ -1330,13 +1354,33 @@ class ShaderCompiler final : public IApplicationFramework } file = nullptr; - system->deleteFile(path); const std::error_code moveError = system->moveFileOrDirectory(tempPath, path); - if (moveError) + if (!moveError) + return true; + + if (!system->exists(path, IFileBase::ECF_READ)) + { + system->deleteFile(tempPath); + return false; + } + + std::filesystem::path backupPath = path; + backupPath += ".bak"; + backupPath += std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + system->deleteFile(backupPath); + if (system->moveFileOrDirectory(path, backupPath)) + { + system->deleteFile(tempPath); + return false; + } + if (system->moveFileOrDirectory(tempPath, path)) { + system->moveFileOrDirectory(backupPath, path); system->deleteFile(tempPath); + system->deleteFile(backupPath); return false; } + system->deleteFile(backupPath); return true; } @@ -1558,7 +1602,7 @@ class ShaderCompiler final : public IApplicationFramework uint32_t version = 0; if (!read_u32(offset, magic) || !read_u32(offset, version)) return false; - if (magic != 0x4E534349u || version != 1u) + if (magic != 0x4E534349u || version != 2u) { if (reason) *reason = "index header"; @@ -1609,8 +1653,10 @@ class ShaderCompiler final : public IApplicationFramework uint64_t spirvSize = 0; uint64_t uncompressedSize = 0; core::blake3_hash_t uncompressedHash = {}; + uint8_t compression = 0; uint32_t depCount = 0; - if (!read_u64(offset, spirvOffset) || !read_u64(offset, spirvSize) || !read_u64(offset, uncompressedSize) || !read_hash(offset, uncompressedHash) || !read_u32(offset, depCount)) + if (!read_u64(offset, spirvOffset) || !read_u64(offset, spirvSize) || !read_u64(offset, uncompressedSize) || !read_hash(offset, uncompressedHash) || + !read_bytes(offset, &compression, sizeof(compression)) || !read_u32(offset, depCount)) return false; const bool match = (hash == targetHash); @@ -1672,6 +1718,7 @@ class ShaderCompiler final : public IApplicationFramework outEntry.dependencies = std::move(deps); outEntry.uncompressedSize = uncompressedSize; outEntry.uncompressedContentHash = uncompressedHash; + outEntry.compression = static_cast(compression); return true; } @@ -1903,6 +1950,7 @@ class ShaderCompiler final : public IApplicationFramework shaderProbe.loadDuration = loadEnd - loadStart; if (!shaderProbe.cacheObj) shaderProbe.cacheObj = make_smart_refctd_ptr(); + shaderProbe.cacheObj->setDefaultCompression(shaderCache.compression); if (shaderProbe.status == CacheLoadStatus::Loaded) { auto finder = makeIncludeFinder(); @@ -2096,6 +2144,7 @@ class ShaderCompiler final : public IApplicationFramework auto fullCache = loadShaderCache(m_system.get(), shaderCache.path, fullStatus, false, false); if (!fullCache) return false; + fullCache->setDefaultCompression(shaderCache.compression); cacheObj = std::move(fullCache); shaderProbe.cachePartial = false; return true; @@ -2231,6 +2280,8 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr cacheObj = shaderProbe.cacheObj; if (!cacheObj && dep.enabled && !preprocessOnly) cacheObj = make_smart_refctd_ptr(); + if (cacheObj) + cacheObj->setDefaultCompression(shaderCache.compression); CacheLoadStatus cacheStatus = shaderProbe.status; const bool shaderCacheHitExpected = shaderProbe.hit; From 380a5939e93d3b9e1124bffb29ee382f55ee1ef3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 22 Jan 2026 07:48:09 +0100 Subject: [PATCH 12/14] nsc cache layers and test harness --- CMakeLists.txt | 1 + cmake/common.cmake | 62 +- docs/nsc-prebuilds.md | 10 + tools/nsc/CMakeLists.txt | 2 + tools/nsc/main.cpp | 387 +++++++--- tools/nsc/test/cache_layers/CMakeLists.txt | 256 +++++++ tools/nsc/test/cache_layers/README.md | 192 +++++ .../test/cache_layers/cache_layers_test.py | 675 ++++++++++++++++++ 8 files changed, 1491 insertions(+), 94 deletions(-) create mode 100644 tools/nsc/test/cache_layers/CMakeLists.txt create mode 100644 tools/nsc/test/cache_layers/README.md create mode 100644 tools/nsc/test/cache_layers/cache_layers_test.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 773c9c3563..49b2dc8eed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ project(Nabla LANGUAGES CXX C ) enable_language(C CXX ASM ASM_NASM) +enable_testing() include(GNUInstallDirs) include(CMakePackageConfigHelpers) diff --git a/cmake/common.cmake b/cmake/common.cmake index aca164949c..640a603539 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1150,6 +1150,7 @@ set(NSC_SHADER_CACHE_COMPRESSION "raw" CACHE STRING "NSC shader cache compressio option(NSC_PREPROCESS_CACHE "Enable NSC preprocess cache" ON) option(NSC_PREPROCESS_PREAMBLE "Enable NSC preprocess preamble" ON) option(NSC_STDOUT_LOG "Mirror NSC log to stdout" OFF) +option(NSC_JSON_REPORT "Write NSC JSON report alongside outputs" OFF) set(NSC_CACHE_DIR "" CACHE PATH "Optional root directory for NSC cache files (shader/preprocess)") function(NBL_CREATE_NSC_COMPILE_RULES) @@ -1221,7 +1222,7 @@ struct DeviceConfigCaps endif() set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) - set(OPTIONAL_SINGLE_ARGS GLOB_DIR) + set(OPTIONAL_SINGLE_ARGS GLOB_DIR EXPORT_RULES) cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB;DISABLE_CUSTOM_COMMANDS" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) @@ -1229,6 +1230,9 @@ struct DeviceConfigCaps if(NBL_NSC_DISABLE_CUSTOM_COMMANDS OR IMPL_DISABLE_CUSTOM_COMMANDS) set(_NBL_DISABLE_CUSTOM_COMMANDS TRUE) endif() + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_RULE_INDEX 0) + endif() set(IMPL_HLSL_GLOB "") if(NOT IMPL_DISCARD_DEFAULT_GLOB) @@ -2190,6 +2194,11 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_STDOUT_LOG) list(APPEND NBL_NSC_CACHE_ARGS -nbl-stdout-log) endif() + set(NBL_NSC_REPORT_ARGS "") + if(NSC_JSON_REPORT) + set(NBL_NSC_REPORT_PATH "${TARGET_OUTPUT}.report.json") + list(APPEND NBL_NSC_REPORT_ARGS -nbl-report "${NBL_NSC_REPORT_PATH}") + endif() set(NBL_NSC_COMPILE_COMMAND "$" @@ -2198,6 +2207,7 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE ${NBL_NSC_DEPFILE_ARGS} $<$:-verbose> ${NBL_NSC_CACHE_ARGS} + ${NBL_NSC_REPORT_ARGS} -FI "${CONFIG_FILE}" "${TARGET_INPUT}" ) @@ -2229,6 +2239,9 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESS_CACHE_PATH}") list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESSED_PATH}") endif() + if(NSC_JSON_REPORT) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_REPORT_PATH}") + endif() set(NBL_NSC_CUSTOM_COMMAND_ARGS OUTPUT "${TARGET_OUTPUT}" @@ -2245,6 +2258,37 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_INDEX "${_NBL_EXPORT_RULE_INDEX}") + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_INDEX} ${NBL_NSC_COMPILE_COMMAND} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_INDEX} "${TARGET_OUTPUT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_INDEX} "${NBL_NSC_LOG_PATH}" PARENT_SCOPE) + if(NSC_SHADER_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "${NBL_NSC_CACHE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_PREPROCESS_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESS_CACHE_PATH}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESSED_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_USE_DEPFILE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "${DEPFILE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_JSON_REPORT) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "${NBL_NSC_REPORT_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + math(EXPR _NBL_EXPORT_INDEX_NEXT "${_NBL_EXPORT_INDEX} + 1") + set(_NBL_EXPORT_RULE_INDEX "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + endif() if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) endif() @@ -2496,6 +2540,22 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE endif() set(${IMPL_OUTPUT_VAR} ${KEYS} PARENT_SCOPE) + if(IMPL_EXPORT_RULES) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_RULE_INDEX}" PARENT_SCOPE) + if(_NBL_EXPORT_RULE_INDEX GREATER 0) + math(EXPR _NBL_EXPORT_LAST "${_NBL_EXPORT_RULE_INDEX} - 1") + foreach(_NBL_EXPORT_IDX RANGE 0 ${_NBL_EXPORT_LAST}) + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_IDX} ${${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_IDX}} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + endforeach() + endif() + endif() endfunction() function(NBL_CREATE_RESOURCE_ARCHIVE) diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index d7eb09c168..44a35dbac4 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -137,6 +137,15 @@ With `-verbose`, `.log` shows: - `Write output took: ...` (only when output file is written) - `Total took: ...` (overall tool runtime) +When `-nbl-report` is provided, NSC also writes a JSON report containing: + +- `shader_cache` (hit/lookup/load/validate times) +- `preprocess_cache` (hit/lookup status) +- `preamble` (used + body/prefix timings) +- `compile` and `preprocess` timings +- `output` + `depfile` metadata +- `total_ms` and `total_with_output_ms` + You can also toggle layers directly on the `nsc` CLI: - `-nbl-shader-cache` @@ -144,6 +153,7 @@ You can also toggle layers directly on the `nsc` CLI: - `-nbl-preprocess-cache` - `-nbl-preprocess-preamble` - `-nbl-stdout-log` (mirror the log file output to stdout) +- `-nbl-report ` (write a JSON report with cache hits, timings, and output metadata) Related CMake options: diff --git a/tools/nsc/CMakeLists.txt b/tools/nsc/CMakeLists.txt index 2765f02fa5..25444050d1 100644 --- a/tools/nsc/CMakeLists.txt +++ b/tools/nsc/CMakeLists.txt @@ -60,6 +60,8 @@ add_test(NAME NBL_NSC_DUMP_BUILD_INFO_TEST COMMAND_EXPAND_LISTS ) +add_subdirectory(test/cache_layers) + if(NBL_ENABLE_DOCKER_INTEGRATION) find_program(DOCKER_EXE NAMES docker REQUIRED) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 9ebeaf5205..ca8494e4ec 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -208,6 +208,7 @@ class ShaderCompiler final : public IApplicationFramework program.add_argument("-nbl-preprocess-cache").default_value(false).implicit_value(true); program.add_argument("-nbl-preprocess-preamble").default_value(false).implicit_value(true); program.add_argument("-nbl-stdout-log").default_value(false).implicit_value(true); + program.add_argument("-nbl-report").default_value(std::string{}); std::vector unknownArgs; try @@ -289,6 +290,7 @@ class ShaderCompiler final : public IApplicationFramework const bool quiet = program.get("-quiet"); const bool verbose = program.get("-verbose"); const bool stdoutLog = program.get("-nbl-stdout-log"); + const std::string reportPath = program.get("-nbl-report"); bool shaderCacheEnabled = program.get("-shader-cache") || program.get("-nbl-shader-cache"); const std::string shaderCachePathOverride = program.is_used("-shader-cache-file") ? program.get("-shader-cache-file") : std::string{}; if (!shaderCachePathOverride.empty()) @@ -496,7 +498,7 @@ class ShaderCompiler final : public IApplicationFramework const auto start = std::chrono::high_resolution_clock::now(); const std::string preprocessedOutputPath = outputFilepath + ".pre.hlsl"; - const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, outputFilepath, preprocessedOutputPath, verbose); + auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, outputFilepath, preprocessedOutputPath, verbose, !reportPath.empty()); const auto end = std::chrono::high_resolution_clock::now(); const char* const op = preprocessOnly ? "preprocessing" : "compilation"; @@ -525,6 +527,9 @@ class ShaderCompiler final : public IApplicationFramework } } + bool outputWritten = false; + long long outputWriteMs = 0; + uint64_t outputSize = 0; if (!job.view.empty()) { const auto writeStart = std::chrono::high_resolution_clock::now(); @@ -533,6 +538,8 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Failed to write output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); return false; } + outputWritten = true; + outputSize = static_cast(job.view.size()); OutputHashRecord record = {}; record.size = job.view.size(); { @@ -544,10 +551,10 @@ class ShaderCompiler final : public IApplicationFramework if (!writeBinaryFile(m_system.get(), hashPath, &record, sizeof(record))) m_logger->log("Failed to write output hash file: %s", ILogger::ELL_WARNING, hashPath.string().c_str()); const auto writeEnd = std::chrono::high_resolution_clock::now(); + outputWriteMs = std::chrono::duration_cast(writeEnd - writeStart).count(); if (verbose) { - const auto duration = std::chrono::duration_cast(writeEnd - writeStart).count(); - m_logger->log("Write output took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(duration)); + m_logger->log("Write output took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(outputWriteMs)); } } else if (verbose) @@ -555,8 +562,31 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Output up to date. Skipping write.", ILogger::ELL_DEBUG); } - const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); - m_logger->log("Total took: %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + const auto totalMs = std::chrono::duration_cast(end - start).count(); + const auto overallEnd = std::chrono::high_resolution_clock::now(); + const auto totalWithOutputMs = std::chrono::duration_cast(overallEnd - start).count(); + m_logger->log("Total took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(totalMs)); + + if (!reportPath.empty() && !job.report.is_null()) + { + job.report["version"] = 1; + job.report["input"] = fileToCompile; + job.report["config"] = configLabel; + job.report["builtins"] = !noNblBuiltins; + job.report["preprocess_only"] = preprocessOnly; + job.report["output"]["path"] = outputFilepath; + job.report["output"]["written"] = outputWritten; + job.report["output"]["ms"] = outputWriteMs; + job.report["output"]["size"] = outputSize; + job.report["depfile"]["enabled"] = dep.enabled; + job.report["depfile"]["path"] = dep.path; + job.report["total_ms"] = totalMs; + job.report["total_with_output_ms"] = totalWithOutputMs; + + const auto reportDump = job.report.dump(2); + if (!writeBinaryFile(m_system.get(), std::filesystem::path(reportPath), reportDump.data(), reportDump.size())) + m_logger->log("Failed to write report: %s", ILogger::ELL_WARNING, reportPath.c_str()); + } flushSystemQueue(m_system.get(), std::filesystem::path(outputFilepath)); @@ -602,6 +632,7 @@ class ShaderCompiler final : public IApplicationFramework std::string text; smart_refctd_ptr compiled; std::string_view view; + json report; }; struct OutputHashRecord @@ -1812,9 +1843,20 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Saved \"%s\"", ILogger::ELL_INFO, oPath.string().c_str()); } - RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view outputFilepath, std::string_view preprocessedOutputPath, const bool verbose) + RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view outputFilepath, std::string_view preprocessedOutputPath, const bool verbose, const bool reportEnabled) { RunResult r; + if (reportEnabled) + { + r.report = json::object(); + r.report["shader_cache"] = json::object(); + r.report["preprocess_cache"] = json::object(); + r.report["preamble"] = json::object(); + r.report["compile"] = json::object(); + r.report["preprocess"] = json::object(); + r.report["output"] = json::object(); + r.report["depfile"] = json::object(); + } auto makeIncludeFinder = [&]() { auto finder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); @@ -1877,6 +1919,19 @@ class ShaderCompiler final : public IApplicationFramework const bool useShaderCache = shaderCache.enabled && !preprocessOnly; const bool usePreCache = preCache.enabled && !preprocessOnly; const bool validateCacheDeps = true; + if (reportEnabled) + { + r.report["shader_cache"]["enabled"] = useShaderCache; + r.report["preprocess_cache"]["enabled"] = usePreCache; + r.report["preamble"]["enabled"] = usePreCache && preCache.preamble; + r.report["compile"]["called"] = false; + r.report["compile"]["ms"] = 0; + r.report["preprocess"]["called"] = false; + r.report["preprocess"]["ms"] = 0; + r.report["depfile"]["enabled"] = dep.enabled; + r.report["depfile"]["written"] = false; + r.report["depfile"]["ms"] = 0; + } struct ShaderCacheProbeResult { @@ -1911,8 +1966,35 @@ class ShaderCompiler final : public IApplicationFramework ShaderCacheProbeResult shaderProbe; PreprocessCacheProbeResult preProbe; using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](const std::chrono::nanoseconds duration) -> long long + { + return std::chrono::duration_cast(duration).count(); + }; + auto cacheMissReason = [](CacheLoadStatus status) -> const char* + { + if (status == CacheLoadStatus::Missing) + return "cache file missing; first build, cleaned, output moved, or out of date"; + if (status == CacheLoadStatus::Invalid) + return "cache file invalid or version mismatch"; + return "input/deps/options changed; cache invalidated"; + }; const auto probeStart = clock_t::now(); + core::smart_refctd_ptr sharedFinder; + auto getFinder = [&]() -> IShaderCompiler::CIncludeFinder* + { + if (!sharedFinder) + sharedFinder = makeIncludeFinder(); + return sharedFinder.get(); + }; + core::smart_refctd_ptr sharedCompiler; + auto getCompiler = [&]() -> CHLSLCompiler* + { + if (!sharedCompiler) + sharedCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + return sharedCompiler.get(); + }; + if (useShaderCache) { const auto start = clock_t::now(); @@ -1953,9 +2035,9 @@ class ShaderCompiler final : public IApplicationFramework shaderProbe.cacheObj->setDefaultCompression(shaderCache.compression); if (shaderProbe.status == CacheLoadStatus::Loaded) { - auto finder = makeIncludeFinder(); + auto* finder = getFinder(); const auto validateStart = clock_t::now(); - shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder.get(), shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder, shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); const auto validateEnd = clock_t::now(); shaderProbe.entryReady = shaderProbe.hit; shaderProbe.validateDuration = validateEnd - validateStart; @@ -1976,7 +2058,7 @@ class ShaderCompiler final : public IApplicationFramework else { const auto start = clock_t::now(); - auto finder = makeIncludeFinder(); + auto* finder = getFinder(); bool preIndexExists = false; if (m_system) preIndexExists = m_system->exists(makePreprocessCacheIndexPath(preCache.path), IFileBase::ECF_READ); @@ -2031,7 +2113,7 @@ class ShaderCompiler final : public IApplicationFramework cacheObj->setEntry(std::move(entry)); bool depsUpdated = false; - const bool depsValid = cacheObj->validateDependencies(finder.get(), &depsUpdated); + const bool depsValid = cacheObj->validateDependencies(finder, &depsUpdated); if (depsValid) { IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); @@ -2074,9 +2156,9 @@ class ShaderCompiler final : public IApplicationFramework preProbe.cacheObj = make_smart_refctd_ptr(); } - auto localCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto* localCompiler = getCompiler(); CHLSLCompiler::SPreprocessorOptions preOptThread = preOpt; - preOptThread.includeFinder = finder.get(); + preOptThread.includeFinder = finder; IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); preProbe.result = localCompiler->preprocessWithCache(code, stageOverrideThread, preOptThread, *preProbe.cacheObj, preProbe.loadStatus, sourceIdentifier); preProbe.ok = preProbe.result.ok; @@ -2092,9 +2174,40 @@ class ShaderCompiler final : public IApplicationFramework } const auto probeEnd = clock_t::now(); + if (reportEnabled) + { + r.report["cache_probe_ms"] = toMs(probeEnd - probeStart); + auto& sc = r.report["shader_cache"]; + sc["hit"] = shaderProbe.hit; + sc["used_index"] = shaderProbe.usedIndex; + sc["probe_ms"] = toMs(shaderProbe.duration); + sc["load_ms"] = toMs(shaderProbe.loadDuration); + sc["validate_ms"] = toMs(shaderProbe.validateDuration); + sc["status"] = useShaderCache ? (shaderProbe.hit ? "hit" : "miss") : "disabled"; + if (useShaderCache && !shaderProbe.hit) + sc["miss_reason"] = cacheMissReason(shaderProbe.status); + + auto& pc = r.report["preprocess_cache"]; + pc["hit"] = preProbe.result.cacheHit; + pc["used"] = preProbe.result.cacheUsed; + pc["skipped"] = preProbe.skipped; + pc["updated"] = preProbe.result.cacheUpdated; + pc["probe_ms"] = toMs(preProbe.duration); + if (!usePreCache) + pc["status"] = "disabled"; + else if (preProbe.skipped) + pc["status"] = "skipped"; + else if (preProbe.result.cacheHit) + pc["status"] = "hit"; + else + pc["status"] = "miss"; + if (usePreCache && !preProbe.skipped && !preProbe.result.cacheHit) + pc["miss_reason"] = IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status); + } std::string preprocessedCode; bool preprocessedReady = false; + bool preprocessedFromFullPreprocess = false; bool preprocessedNeedsWrite = false; bool preambleUsed = false; std::vector preambleDependencies; @@ -2104,20 +2217,6 @@ class ShaderCompiler final : public IApplicationFramework std::string_view codeToCompile = code; smart_refctd_ptr preCacheObj; IShader::E_SHADER_STAGE stageOverride = static_cast(shaderStage); - auto cacheMissReason = [](CacheLoadStatus status) -> const char* - { - if (status == CacheLoadStatus::Missing) - return "cache file missing; first build, cleaned, output moved, or out of date"; - if (status == CacheLoadStatus::Invalid) - return "cache file invalid or version mismatch"; - return "input/deps/options changed; cache invalidated"; - }; - - auto toMs = [](const std::chrono::nanoseconds duration) -> long long - { - return std::chrono::duration_cast(duration).count(); - }; - auto ensureIndexSpirvLoaded = [&](IShaderCompiler::CCache::SEntry& entry) -> bool { if (entry.spirv) @@ -2308,11 +2407,12 @@ class ShaderCompiler final : public IApplicationFramework { stageOverride = preProbe.result.stage; preCacheObj = preProbe.cacheObj; - if (!preCache.preamble) + if (!preCache.preamble || !preProbe.result.cacheHit) { preprocessedCode = std::move(preProbe.result.code); preprocessedReady = true; preprocessedNeedsWrite = !preprocessedOutputPath.empty(); + preprocessedFromFullPreprocess = true; } } } @@ -2406,16 +2506,21 @@ class ShaderCompiler final : public IApplicationFramework } if (isOutputUpToDate(shaderProbe.entry)) { - const auto hitDepfileStart = clock_t::now(); - if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) - return r; - const auto hitDepfileEnd = clock_t::now(); - r.ok = true; - if (verbose) - { - m_logger->log("HIT timings: decompress=0 ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, - static_cast(toMs(hitDepfileEnd - hitDepfileStart))); - } + const auto hitDepfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) + return r; + const auto hitDepfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(hitDepfileEnd - hitDepfileStart); + } + r.ok = true; + if (verbose) + { + m_logger->log("HIT timings: decompress=0 ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(hitDepfileEnd - hitDepfileStart))); + } return r; } const auto hitDecompressStart = clock_t::now(); @@ -2432,6 +2537,11 @@ class ShaderCompiler final : public IApplicationFramework if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) return r; const auto hitDepfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(hitDepfileEnd - hitDepfileStart); + } if (verbose) { m_logger->log("HIT timings: decompress=%lld ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, @@ -2442,12 +2552,12 @@ class ShaderCompiler final : public IApplicationFramework return r; } - auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto* hlslcompiler = getCompiler(); clock_t::duration preambleBodyDuration = {}; clock_t::duration preambleAssembleDuration = {}; clock_t::duration preambleProbeDuration = {}; clock_t::duration preambleFinderDuration = {}; - const bool usePreamble = preCache.preamble && preCacheObj && preCacheObj->hasEntry(); + const bool usePreamble = preCache.preamble && preCacheObj && preCacheObj->hasEntry() && !preprocessedFromFullPreprocess; if (usePreamble) { const auto preambleStart = clock_t::now(); @@ -2464,54 +2574,16 @@ class ShaderCompiler final : public IApplicationFramework const auto& entry = preCacheObj->getEntry(); - const auto preambleFinderStart = clock_t::now(); - auto finder = makeIncludeFinder(); - preambleFinderDuration = clock_t::now() - preambleFinderStart; auto bodyStage = stageOverride; CHLSLCompiler::SPreprocessorOptions bodyOpt = preOpt; bodyOpt.applyForceIncludes = false; - bodyOpt.includeFinder = finder.get(); - if (!entry.macroBlock.empty()) - { - std::string withDefines; - withDefines.reserve(body.size() + entry.macroBlock.size()); - withDefines.append(entry.macroBlock); - withDefines.append(body); - body = std::move(withDefines); - bodyOpt.extraDefines = {}; - } - else if (!entry.macroDefs.empty()) - { - size_t reserve = body.size(); - for (const auto& macro : entry.macroDefs) - reserve += macro.size() + 12; - std::string withDefines; - withDefines.reserve(reserve); - for (const auto& macro : entry.macroDefs) - { - const auto eq = macro.find('='); - const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); - const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); - withDefines.append("#define "); - withDefines.append(name); - if (!def.empty()) - { - withDefines.push_back(' '); - withDefines.append(def); - } - withDefines.push_back('\n'); - } - withDefines.append(body); - body = std::move(withDefines); - bodyOpt.extraDefines = {}; - } std::vector bodyDeps; std::vector bodyDxcFlags; std::string bodyPreprocessed; if (!body.empty()) { - const auto bodyHasInclude = [](std::string_view text) -> bool + const auto bodyHasDirective = [](std::string_view text) -> bool { size_t pos = 0; while (pos < text.size()) @@ -2524,23 +2596,127 @@ class ShaderCompiler final : public IApplicationFramework ++i; if (i < lineEnd && text[i] == '#') { - ++i; - while (i < lineEnd && (text[i] == ' ' || text[i] == '\t')) - ++i; - if (lineEnd - i >= 7 && text.compare(i, 7, "include") == 0) - return true; + return true; } pos = lineEnd + 1; } return false; }; - const bool hasInclude = bodyHasInclude(body); - auto* bodyDepsOut = hasInclude ? &bodyDeps : nullptr; - const auto bodyPreprocessStart = clock_t::now(); - bodyPreprocessed = hlslcompiler->preprocessShader(std::move(body), bodyStage, bodyOpt, bodyDxcFlags, bodyDepsOut, nullptr); - preambleBodyDuration = clock_t::now() - bodyPreprocessStart; - if (bodyPreprocessed.empty()) - return r; + const auto macroName = [](const std::string& macro) -> std::string_view + { + std::string_view name(macro); + const auto eq = name.find('='); + if (eq != std::string_view::npos) + name = name.substr(0, eq); + const auto paren = name.find('('); + if (paren != std::string_view::npos) + name = name.substr(0, paren); + while (!name.empty() && (name.back() == ' ' || name.back() == '\t')) + name.remove_suffix(1); + return name; + }; + const auto bodyUsesMacros = [&](std::string_view text) -> bool + { + static constexpr std::string_view kBuiltinMacros[] = + { + "__LINE__", + "__FILE__", + "__COUNTER__", + "__DATE__", + "__TIME__", + "__TIMESTAMP__" + }; + for (const auto builtin : kBuiltinMacros) + { + if (text.find(builtin) != std::string_view::npos) + return true; + } + for (const auto& macro : entry.macroDefs) + { + const std::string_view name = macroName(macro); + if (!name.empty() && text.find(name) != std::string_view::npos) + return true; + } + return false; + }; + const bool hasDirective = bodyHasDirective(body); + const bool needsPreprocess = hasDirective || bodyUsesMacros(body); + if (needsPreprocess) + { + if (!entry.macroBlock.empty()) + { + std::string withDefines; + withDefines.reserve(body.size() + entry.macroBlock.size()); + withDefines.append(entry.macroBlock); + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + else if (!entry.macroDefs.empty()) + { + size_t reserve = body.size(); + for (const auto& macro : entry.macroDefs) + reserve += macro.size() + 12; + std::string withDefines; + withDefines.reserve(reserve); + for (const auto& macro : entry.macroDefs) + { + const auto eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + withDefines.append("#define "); + withDefines.append(name); + if (!def.empty()) + { + withDefines.push_back(' '); + withDefines.append(def); + } + withDefines.push_back('\n'); + } + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + + const auto bodyHasInclude = [](std::string_view text) -> bool + { + size_t pos = 0; + while (pos < text.size()) + { + size_t lineEnd = text.find('\n', pos); + if (lineEnd == std::string_view::npos) + lineEnd = text.size(); + size_t i = pos; + while (i < lineEnd && (text[i] == ' ' || text[i] == '\t' || text[i] == '\r')) + ++i; + if (i < lineEnd && text[i] == '#') + { + ++i; + while (i < lineEnd && (text[i] == ' ' || text[i] == '\t')) + ++i; + if (lineEnd - i >= 7 && text.compare(i, 7, "include") == 0) + return true; + } + pos = lineEnd + 1; + } + return false; + }; + const bool hasInclude = bodyHasInclude(body); + auto* bodyDepsOut = hasInclude ? &bodyDeps : nullptr; + const auto preambleFinderStart = clock_t::now(); + auto* finder = getFinder(); + preambleFinderDuration = clock_t::now() - preambleFinderStart; + bodyOpt.includeFinder = finder; + const auto bodyPreprocessStart = clock_t::now(); + bodyPreprocessed = hlslcompiler->preprocessShader(std::move(body), bodyStage, bodyOpt, bodyDxcFlags, bodyDepsOut, nullptr); + preambleBodyDuration = clock_t::now() - bodyPreprocessStart; + if (bodyPreprocessed.empty()) + return r; + } + else + { + bodyPreprocessed = body; + } } stageOverride = bodyStage; @@ -2631,6 +2807,16 @@ class ShaderCompiler final : public IApplicationFramework m_logger->log("Preamble body preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(preambleDuration))); } + if (reportEnabled) + { + auto& p = r.report["preamble"]; + p["used"] = preambleUsed; + p["probe_ms"] = toMs(preambleProbeDuration); + p["finder_ms"] = toMs(preambleFinderDuration); + p["body_ms"] = toMs(preambleBodyDuration); + p["assemble_ms"] = toMs(preambleAssembleDuration); + p["total_ms"] = toMs(preambleDuration); + } if (preprocessOnly) { @@ -2646,6 +2832,11 @@ class ShaderCompiler final : public IApplicationFramework const auto duration = std::chrono::duration_cast(preprocessEnd - preprocessStart).count(); m_logger->log("Preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(duration)); } + if (reportEnabled) + { + r.report["preprocess"]["called"] = true; + r.report["preprocess"]["ms"] = std::chrono::duration_cast(preprocessEnd - preprocessStart).count(); + } return r; } @@ -2689,14 +2880,19 @@ class ShaderCompiler final : public IApplicationFramework codeToCompile = preprocessedCode; } - auto compileFinder = makeIncludeFinder(); - opt.preprocessorOptions.includeFinder = compileFinder.get(); + auto* compileFinder = getFinder(); + opt.preprocessorOptions.includeFinder = compileFinder; const auto compileStart = clock_t::now(); r.compiled = hlslcompiler->compileToSPIRV(codeToCompile, opt); const auto compileEnd = clock_t::now(); r.ok = bool(r.compiled); if (r.ok) r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + if (reportEnabled) + { + r.report["compile"]["called"] = true; + r.report["compile"]["ms"] = toMs(compileEnd - compileStart); + } if (verbose) { m_logger->log("Compile call took: %lld ms.", ILogger::ELL_PERFORMANCE, @@ -2746,7 +2942,7 @@ class ShaderCompiler final : public IApplicationFramework else if (cacheObj) { const auto depLookupStart = clock_t::now(); - const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder.get(), depEntry, validateCacheDeps); + const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder, depEntry, validateCacheDeps); const auto depLookupEnd = clock_t::now(); if (verbose) { @@ -2771,6 +2967,11 @@ class ShaderCompiler final : public IApplicationFramework return r; } const auto depfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(depfileEnd - depfileStart); + } if (verbose) { m_logger->log("Depfile write took: %lld ms.", ILogger::ELL_PERFORMANCE, diff --git a/tools/nsc/test/cache_layers/CMakeLists.txt b/tools/nsc/test/cache_layers/CMakeLists.txt new file mode 100644 index 0000000000..580a427241 --- /dev/null +++ b/tools/nsc/test/cache_layers/CMakeLists.txt @@ -0,0 +1,256 @@ +include(common) + +set(NSC_JSON_REPORT ON) +set(NSC_SHADER_CACHE ON) +set(NSC_PREPROCESS_CACHE ON) + +if(NOT Python3_EXECUTABLE) + find_package(Python3 COMPONENTS Interpreter REQUIRED) +endif() + +set(NBL_NSC_CACHE_TEST_SEED "0" CACHE STRING "Seed for NSC cache layer tests (0 = deterministic)") +set(NBL_NSC_CACHE_TEST_ITERATIONS "5" CACHE STRING "Iterations for NSC cache layer stress test") +set(NBL_NSC_CACHE_TEST_PARALLEL_JOBS "3" CACHE STRING "Parallel jobs for NSC cache layer test") +set(NBL_NSC_CACHE_PREAMBLE_BUDGET_MS "0" CACHE STRING "Optional max total_with_output_ms for preamble hit time test (0 disables check)") + +set(NBL_NSC_CACHE_LAYER_ROOT "${CMAKE_CURRENT_BINARY_DIR}/cache_layers") +set(NBL_NSC_CACHE_LAYER_SRC "${NBL_NSC_CACHE_LAYER_ROOT}/src") +file(MAKE_DIRECTORY "${NBL_NSC_CACHE_LAYER_SRC}") + +set(NBL_NSC_CACHE_PROXY "${NBL_NSC_CACHE_LAYER_SRC}/proxy.hlsl") +file(WRITE "${NBL_NSC_CACHE_PROXY}" [=[ +#ifndef NBL_NSC_CACHE_TEST_PROXY_HLSL +#define NBL_NSC_CACHE_TEST_PROXY_HLSL +// NBL_NSC_CACHE_TEST_DEFINES_BEGIN +// NBL_NSC_CACHE_TEST_DEFINES_END +// NBL_NSC_CACHE_TEST_INCLUDES_BEGIN +#include +#include +#include +// NBL_NSC_CACHE_TEST_INCLUDES_END +#endif +]=]) + +function(nbl_nsc_write_input _path) + file(WRITE "${_path}" [=[ +#include "proxy.hlsl" + +[numthreads(1,1,1)] +[shader("compute")] +void main(uint3 tid : SV_DispatchThreadID) +{ + uint sink = 1u; + if (tid.x == 0u && sink == 0u) + return; +} +]=]) +endfunction() + +set(NBL_NSC_CACHE_INPUT "${NBL_NSC_CACHE_LAYER_SRC}/cache_layers_input.hlsl") +nbl_nsc_write_input("${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_SHADER_INPUT "${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_PREPROCESS_INPUT "${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_PREAMBLE_INPUT "${NBL_NSC_CACHE_INPUT}") + +function(nbl_nsc_add_cache_target _name _input _binary_dir _output_var _export_prefix) + set(JSON_TEMPLATE [=[[ + { + "INPUT": "@INPUT@", + "KEY": "@KEY@", + "COMPILE_OPTIONS": ["-T", "cs_6_7"], + "CAPS": [] + } +]]]=]) + set(INPUT "${_input}") + set(KEY "${_name}") + string(CONFIGURE "${JSON_TEMPLATE}" JSON @ONLY) + + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${_name}_spirv + BINARY_DIR "${_binary_dir}" + MOUNT_POINT_DEFINE NBL_NSC_CACHE_LAYERS_MOUNT + COMMON_OPTIONS -I "${NBL_NSC_CACHE_LAYER_SRC}" + OUTPUT_VAR ${_output_var} + EXPORT_RULES ${_export_prefix} + DISABLE_CUSTOM_COMMANDS + INCLUDE nbl/nsc/tests/cache_layers/keys.hpp + NAMESPACE nbl::nsc::tests::cache_layers + INPUTS ${JSON} + DISCARD_DEFAULT_GLOB + ) + + if(DEFINED ${_output_var}) + set(${_output_var} ${${_output_var}} PARENT_SCOPE) + endif() + if(_export_prefix AND DEFINED ${_export_prefix}_COUNT) + set(${_export_prefix}_COUNT "${${_export_prefix}_COUNT}" PARENT_SCOPE) + if(${_export_prefix}_COUNT GREATER 0) + math(EXPR _export_last "${${_export_prefix}_COUNT} - 1") + foreach(_export_idx RANGE 0 ${_export_last}) + set(${_export_prefix}_COMMAND_${_export_idx} ${${_export_prefix}_COMMAND_${_export_idx}} PARENT_SCOPE) + set(${_export_prefix}_OUTPUT_${_export_idx} "${${_export_prefix}_OUTPUT_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_LOG_${_export_idx} "${${_export_prefix}_LOG_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_DEPFILE_${_export_idx} "${${_export_prefix}_DEPFILE_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_REPORT_${_export_idx} "${${_export_prefix}_REPORT_${_export_idx}}" PARENT_SCOPE) + endforeach() + endif() + endif() +endfunction() + +set(_BIN_SHADER "${NBL_NSC_CACHE_LAYER_ROOT}/shader_cache") +set(_BIN_PREPROCESS "${NBL_NSC_CACHE_LAYER_ROOT}/preprocess_cache") +set(_BIN_PREAMBLE "${NBL_NSC_CACHE_LAYER_ROOT}/preamble_cache") + +set(NSC_PREPROCESS_PREAMBLE ON) +set(_EXPORT_SHADER NSC_CACHE_SHADER) +nbl_nsc_add_cache_target(nsc_cache_shader_hit "${NBL_NSC_SHADER_INPUT}" "${_BIN_SHADER}" KEYS_SHADER_CACHE ${_EXPORT_SHADER}) + +set(NSC_PREPROCESS_PREAMBLE OFF) +set(_EXPORT_PREPROCESS NSC_CACHE_PREPROCESS) +nbl_nsc_add_cache_target(nsc_cache_preprocess_hit "${NBL_NSC_PREPROCESS_INPUT}" "${_BIN_PREPROCESS}" KEYS_PREPROCESS_CACHE ${_EXPORT_PREPROCESS}) + +set(NSC_PREPROCESS_PREAMBLE ON) +set(_EXPORT_PREAMBLE NSC_CACHE_PREAMBLE) +nbl_nsc_add_cache_target(nsc_cache_preamble_hit "${NBL_NSC_PREAMBLE_INPUT}" "${_BIN_PREAMBLE}" KEYS_PREAMBLE_CACHE ${_EXPORT_PREAMBLE}) + +set(NBL_NSC_CACHE_TEST_BASE + "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/cache_layers_test.py" +) + +function(nbl_nsc_make_test_args _out _mode _input _output _report _log _depfile _shader_cache _preprocess_cache _preprocessed _command) + set(_args + ${NBL_NSC_CACHE_TEST_BASE} + --mode ${_mode} + --input "$" + --output "$" + --report "$" + --log "$" + --depfile "$" + --shader-cache "$" + --preprocess-cache "$" + --preprocessed "$" + --seed "${NBL_NSC_CACHE_TEST_SEED}" + ) + if(ARGN) + list(APPEND _args ${ARGN}) + endif() + list(APPEND _args --command ${_command}) + set(${_out} ${_args} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_add_cache_tests _prefix _fixture _cold_args _hit_args) + add_test(NAME ${_prefix}_COLD_RUN_TEST + COMMAND ${_cold_args} + COMMAND_EXPAND_LISTS + ) + add_test(NAME ${_prefix}_HIT_TEST + COMMAND ${_hit_args} + COMMAND_EXPAND_LISTS + ) + set_tests_properties(${_prefix}_COLD_RUN_TEST PROPERTIES FIXTURES_SETUP ${_fixture} RESOURCE_LOCK nbl_nsc_cache_layers) + set_tests_properties(${_prefix}_HIT_TEST PROPERTIES FIXTURES_REQUIRED ${_fixture} RESOURCE_LOCK nbl_nsc_cache_layers) +endfunction() + +function(nbl_nsc_add_single_test _name _args) + add_test(NAME ${_name} + COMMAND ${_args} + COMMAND_EXPAND_LISTS + ) + set_tests_properties(${_name} PROPERTIES RESOURCE_LOCK nbl_nsc_cache_layers) +endfunction() + +function(nbl_nsc_make_args_from_export _out _mode _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out_path "${${_export_prefix}_OUTPUT_0}") + set(_report_path "${${_export_prefix}_REPORT_0}") + set(_log_path "${${_export_prefix}_LOG_0}") + set(_depfile_path "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_make_test_args(${_out} ${_mode} "${_input}" "${_out_path}" "${_report_path}" "${_log_path}" "${_depfile_path}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}" ${ARGN}) + set(${_out} ${${_out}} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_path_with_suffix _out _path _suffix) + cmake_path(GET _path PARENT_PATH _dir) + cmake_path(GET _path STEM _stem) + cmake_path(GET _path EXTENSION _ext) + set(${_out} "${_dir}/${_stem}${_suffix}${_ext}" PARENT_SCOPE) +endfunction() + +function(nbl_nsc_make_no_cache_args _out _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out_path "${${_export_prefix}_OUTPUT_0}") + set(_report_path "${${_export_prefix}_REPORT_0}") + set(_log_path "${${_export_prefix}_LOG_0}") + set(_depfile_path "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_path_with_suffix(_out_no_cache "${_out_path}" ".no_cache") + nbl_nsc_path_with_suffix(_report_no_cache "${_report_path}" ".no_cache") + nbl_nsc_path_with_suffix(_log_no_cache "${_log_path}" ".no_cache") + nbl_nsc_path_with_suffix(_depfile_no_cache "${_depfile_path}" ".no_cache") + nbl_nsc_path_with_suffix(_preprocessed_no_cache "${_preprocessed}" ".no_cache") + + nbl_nsc_make_test_args(${_out} no_cache_cold "${_input}" "${_out_no_cache}" "${_report_no_cache}" "${_log_no_cache}" "${_depfile_no_cache}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed_no_cache}" "${_command}") + set(${_out} ${${_out}} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_register_cache_layer _prefix _fixture _mode_base _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out "${${_export_prefix}_OUTPUT_0}") + set(_report "${${_export_prefix}_REPORT_0}") + set(_log "${${_export_prefix}_LOG_0}") + set(_depfile "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_make_test_args(_cold_args "${_mode_base}_cold" "${_input}" "${_out}" "${_report}" "${_log}" "${_depfile}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}") + nbl_nsc_make_test_args(_hit_args "${_mode_base}_hit" "${_input}" "${_out}" "${_report}" "${_log}" "${_depfile}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}") + + nbl_nsc_add_cache_tests(${_prefix} ${_fixture} "${_cold_args}" "${_hit_args}") +endfunction() + +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_SHADER nbl_nsc_shader_cache shader_cache "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_PREPROCESS nbl_nsc_preprocess_cache preprocess_cache "${NBL_NSC_PREPROCESS_INPUT}" ${_EXPORT_PREPROCESS}) +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_PREAMBLE nbl_nsc_preamble_cache preamble_cache "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) + +nbl_nsc_make_args_from_export(_NBL_NSC_SHADER_DISABLED_ARGS shader_cache_disabled "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREPROCESS_DISABLED_ARGS preprocess_cache_disabled "${NBL_NSC_PREPROCESS_INPUT}" ${_EXPORT_PREPROCESS}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREAMBLE_DISABLED_ARGS preamble_cache_disabled "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) +nbl_nsc_make_args_from_export(_NBL_NSC_ISOLATION_ARGS shader_cache_isolation "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_DEPS_ARGS deps_invalidation "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) +nbl_nsc_make_args_from_export(_NBL_NSC_PATH_ARGS path_normalization "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_RANDOM_DEFINES_ARGS random_defines "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PARALLEL_ARGS parallel_smoke "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER} --parallel-jobs ${NBL_NSC_CACHE_TEST_PARALLEL_JOBS}) +nbl_nsc_make_args_from_export(_NBL_NSC_STRESS_ARGS stress "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER} --iterations ${NBL_NSC_CACHE_TEST_ITERATIONS}) +nbl_nsc_make_args_from_export(_NBL_NSC_REPORT_SCHEMA_ARGS report_schema "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_DEPFILE_ARGS depfile_contents "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_CACHE_OVERRIDE_ARGS cache_path_override "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_LARGE_GRAPH_ARGS large_include_graph "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_UNUSED_INCLUDE_ARGS unused_include "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREAMBLE_TIME_ARGS preamble_cache_hit_time "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE} --budget-ms ${NBL_NSC_CACHE_PREAMBLE_BUDGET_MS}) +nbl_nsc_make_no_cache_args(_NBL_NSC_NO_CACHE_ARGS "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) + +nbl_nsc_add_single_test(NBL_NSC_CACHE_SHADER_DISABLED_TEST "${_NBL_NSC_SHADER_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREPROCESS_DISABLED_TEST "${_NBL_NSC_PREPROCESS_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREAMBLE_DISABLED_TEST "${_NBL_NSC_PREAMBLE_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_SHADER_ISOLATION_TEST "${_NBL_NSC_ISOLATION_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_DEPS_INVALIDATION_TEST "${_NBL_NSC_DEPS_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PATH_NORMALIZATION_TEST "${_NBL_NSC_PATH_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_RANDOM_DEFINES_TEST "${_NBL_NSC_RANDOM_DEFINES_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PARALLEL_SMOKE_TEST "${_NBL_NSC_PARALLEL_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_STRESS_TEST "${_NBL_NSC_STRESS_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_REPORT_SCHEMA_TEST "${_NBL_NSC_REPORT_SCHEMA_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_DEPFILE_CONTENTS_TEST "${_NBL_NSC_DEPFILE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PATH_OVERRIDE_TEST "${_NBL_NSC_CACHE_OVERRIDE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_LARGE_GRAPH_TEST "${_NBL_NSC_LARGE_GRAPH_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_UNUSED_INCLUDE_TEST "${_NBL_NSC_UNUSED_INCLUDE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST "${_NBL_NSC_PREAMBLE_TIME_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_NO_CACHE_COLD_TEST "${_NBL_NSC_NO_CACHE_ARGS}") +set_tests_properties(NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST PROPERTIES FIXTURES_REQUIRED nbl_nsc_preamble_cache) diff --git a/tools/nsc/test/cache_layers/README.md b/tools/nsc/test/cache_layers/README.md new file mode 100644 index 0000000000..8867d00fe4 --- /dev/null +++ b/tools/nsc/test/cache_layers/README.md @@ -0,0 +1,192 @@ +# NSC Cache Layer Tests + +This directory defines a cache-layer test suite for `nsc`. +The tests are driven by `tools/nsc/test/cache_layers/CMakeLists.txt` and the +Python runner `tools/nsc/test/cache_layers/cache_layers_test.py`. + +## What is being tested + +NSC has three cache layers that must stay correct and fast: + +1) Shader cache +- Stores final SPIR-V. +- Hit path skips preprocess + compile and returns cached SPIR-V. + +2) Preprocess cache +- Stores preprocessed source + dependency graph. +- Hit path skips full include processing, but still compiles. + +3) Preamble cache (preamble/prefix cache) +- Stores preprocessed prefix for heavy include graphs. +- Hit path only preprocesses the body and reuses prefix. +- If the body has no preprocessor directives and no macro usage, it is passed through without running Wave. + +## How the layers interact + +The compile flow is: +- Shader cache probe (key + deps). On hit, return cached SPIR-V and skip all other work. +- If shader cache misses, probe preprocess cache. + - If preprocess cache hits, compile using preprocessed code. + - If preamble is enabled and available, only preprocess the body and reuse the cached prefix. +- On preprocess miss, do full preprocess + compile, then update caches. + +All three layers together give: +- cold run: full preprocess + compile (safe baseline) +- body-only change: shader cache miss, preprocess cache hit, compile runs +- deps change: cold run (all caches miss) +- preamble hit: avoids re-lexing heavy includes on body edits + +The build system enables all three by default, but each layer can be toggled +with CLI flags to verify behavior. + +## Why use all three layers + +- Shader cache gives the fastest hit path for unchanged inputs. +- Preprocess cache avoids re-walking includes when only the body changes. +- Preamble cache cuts Wave time for large include graphs even when the body changes. + +Dropping any layer regresses a specific edit pattern. Using all three maximizes +iteration speed while keeping correctness, because every layer is validated by +its dependency tracking. + +## Correctness and safety + +Each cache entry is validated against its dependency graph and compilation +inputs. Any change in inputs, options, or includes invalidates the cache and +forces a cold run. The tests do not enable "fast unsafe" paths. + +## Test overview (CMake/CTest) + +The suite defines a set of CTest entries with explicit cold/hit tests plus +additional integrity checks. All tests are executed in the same build +configuration you configured (`builtins` ON/OFF are respected automatically). + +Core cache tests: +- `NBL_NSC_CACHE_SHADER_COLD_RUN_TEST` +- `NBL_NSC_CACHE_SHADER_HIT_TEST` +- `NBL_NSC_CACHE_PREPROCESS_COLD_RUN_TEST` +- `NBL_NSC_CACHE_PREPROCESS_HIT_TEST` +- `NBL_NSC_CACHE_PREAMBLE_COLD_RUN_TEST` +- `NBL_NSC_CACHE_PREAMBLE_HIT_TEST` + +Extra correctness tests (no cross-config builds): +- Cache layer disable checks: + - `NBL_NSC_CACHE_SHADER_DISABLED_TEST` + - `NBL_NSC_CACHE_PREPROCESS_DISABLED_TEST` + - `NBL_NSC_CACHE_PREAMBLE_DISABLED_TEST` +- Isolation / invalidation: + - `NBL_NSC_CACHE_SHADER_ISOLATION_TEST` + - `NBL_NSC_CACHE_DEPS_INVALIDATION_TEST` +- Path normalization: + - `NBL_NSC_CACHE_PATH_NORMALIZATION_TEST` +- Randomized defines: + - `NBL_NSC_CACHE_RANDOM_DEFINES_TEST` +- Parallel smoke (multi-process nsc calls with unique outputs): + - `NBL_NSC_CACHE_PARALLEL_SMOKE_TEST` +- Stress (multiple repeated runs, timing stats only): + - `NBL_NSC_CACHE_STRESS_TEST` +- Report schema sanity: + - `NBL_NSC_CACHE_REPORT_SCHEMA_TEST` +- Depfile content check: + - `NBL_NSC_CACHE_DEPFILE_CONTENTS_TEST` +- Cache override paths: + - `NBL_NSC_CACHE_PATH_OVERRIDE_TEST` +- Large include graph: + - `NBL_NSC_CACHE_LARGE_GRAPH_TEST` +- Unused include is excluded from depfile: + - `NBL_NSC_CACHE_UNUSED_INCLUDE_TEST` +- Preamble hit timing (logs total time, optional budget): + - `NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST` +- No-cache cold baseline: + - `NBL_NSC_CACHE_NO_CACHE_COLD_TEST` + +## How it works + +The tests compile a small HLSL shader that includes a local `proxy.hlsl`. +The proxy includes the same heavy builtins used in the cache test targets and +has injected markers for adding/removing `#define`s and include variants. + +The Python runner: +- edits the body or proxy as required by a test mode +- runs `nsc` directly using the exact command line exported by + `NBL_CREATE_NSC_COMPILE_RULES` +- reads the JSON report (`.spv.report.json`) to assert hit/miss and behavior + +The JSON report fields used by the tests include: +- `shader_cache.hit`, `shader_cache.status` +- `preprocess_cache.status`, `preprocess_cache.hit` +- `preamble.enabled`, `preamble.used` +- `compile.called` + +## Example timings (Release) + +Measured from JSON reports (total_with_output_ms). +Cold-run and hit numbers are medians of 7 runs. Each hit sample is preceded by its cold-run seed. +Baseline is "No cache cold" per builtins mode. Relative vs no-cache is baseline / row. +Values below 1.0x mean slower than baseline. +Machine: AMD Ryzen 5 5600G with Radeon Graphics. +Config: Release, builtins OFF/ON (two baselines). +Includes stress: the proxy pulls three heavy builtins (intrinsics/matrix/vector). The full preprocessed output is ~10.6k lines (measured from the Release preprocess-cache `.spv.pre.hlsl`). + +Cold runs (no cache hits; preamble split can still be used): + +| Scenario | Caches enabled | Preprocess path | total_with_output_ms (builtins OFF) | Relative vs no-cache (OFF) | total_with_output_ms (builtins ON) | Relative vs no-cache (ON) | +| --- | --- | --- | --- | --- | --- | --- | +| Baseline no-cache cold | none | full preprocess | 1302 | 1.00x | 684 | 1.00x | +| Cold run (preprocess cache enabled) | shader + preprocess | full preprocess | 1529 | 0.85x | 890 | 0.77x | +| Cold run (all caches enabled) | shader + preprocess + preamble | full preprocess | 1502 | 0.87x | 895 | 0.76x | + +Note: "Cold run (all caches enabled)" is still a cache miss and uses full preprocess; `preamble.used` stays false on miss. Small deltas between the cold rows are measurement noise. + +Hit paths (caches enabled as configured by the test target): + +| Scenario | Caches enabled | Hit path | total_with_output_ms (builtins OFF) | Speedup vs no-cache (OFF) | total_with_output_ms (builtins ON) | Speedup vs no-cache (ON) | +| --- | --- | --- | --- | --- | --- | --- | +| Shader cache hit | shader + preprocess + preamble | cached SPIR-V | 19 | 68.5x | 17 | 40.2x | +| Preprocess cache hit | shader + preprocess | preprocessed code + compile | 421 | 3.09x | 381 | 1.80x | +| Preamble cache hit | shader + preprocess + preamble | prefix reuse + body preprocess + compile | 226 | 5.76x | 207 | 3.30x | + +These numbers are expected to vary across machines and drivers. +Builtins ON uses embedded archives, which reduces filesystem IO and typically improves cold-run times. + +## Running the suite + +From the build directory: + +``` +ctest -C Debug -R NBL_NSC_CACHE_ --output-on-failure +``` + +Repeat runs for sampling: + +``` +ctest -C Debug -R NBL_NSC_CACHE_ --repeat until-fail:5 --output-on-failure +``` + +## Tuning knobs + +These are regular CMake cache variables: + +- `NBL_NSC_CACHE_TEST_SEED` + - Seed used for randomized define/body changes (0 = deterministic). +- `NBL_NSC_CACHE_TEST_ITERATIONS` + - Number of iterations used by the stress test. +- `NBL_NSC_CACHE_TEST_PARALLEL_JOBS` + - Number of parallel jobs used in the parallel smoke test. +- `NBL_NSC_CACHE_PREAMBLE_BUDGET_MS` + - Optional budget for the preamble hit timing test (0 disables check). + +## Build system defaults + +The build system enables all three layers by default. CLI toggles exist for +validation: +- `-nbl-shader-cache` +- `-nbl-preprocess-cache` +- `-nbl-preprocess-preamble` + +## Notes + +- Tests are protected by a CTest `RESOURCE_LOCK` so they do not fight over the + same inputs. The parallel smoke test uses unique outputs internally. +- The suite uses the current build configuration only; no extra Release/Debug + builds are required. diff --git a/tools/nsc/test/cache_layers/cache_layers_test.py b/tools/nsc/test/cache_layers/cache_layers_test.py new file mode 100644 index 0000000000..019b8640d6 --- /dev/null +++ b/tools/nsc/test/cache_layers/cache_layers_test.py @@ -0,0 +1,675 @@ +import argparse +import concurrent.futures +import json +import random +import re +import statistics +import subprocess +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--cmake") + parser.add_argument("--build-dir") + parser.add_argument("--target") + parser.add_argument("--config", default="") + parser.add_argument("--mode", required=True) + parser.add_argument("--input", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--report", required=True) + parser.add_argument("--log", default="") + parser.add_argument("--depfile", default="") + parser.add_argument("--shader-cache", default="") + parser.add_argument("--preprocess-cache", default="") + parser.add_argument("--preprocessed", default="") + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--iterations", type=int, default=5) + parser.add_argument("--parallel-jobs", type=int, default=3) + parser.add_argument("--budget-ms", type=int, default=0) + parser.add_argument("--command", nargs=argparse.REMAINDER) + return parser.parse_args() + + +def normalize_command(cmd): + if not cmd: + return [] + return [arg for arg in cmd if arg] + + +def strip_option(cmd, flag, takes_value): + out = [] + skip = False + for arg in cmd: + if skip: + skip = False + continue + if arg == flag: + if takes_value: + skip = True + continue + out.append(arg) + return out + + +def strip_options(cmd, options_with_values, options_flags): + result = cmd + for flag in options_with_values: + result = strip_option(result, flag, True) + for flag in options_flags: + result = strip_option(result, flag, False) + return result + + +def replace_option_value(cmd, flag, value): + result = list(cmd) + for idx in range(len(result) - 1): + if result[idx] == flag: + result[idx + 1] = value + return result + + +def apply_output_overrides(cmd, args): + result = list(cmd) + if args.output: + result = replace_option_value(result, "-Fc", args.output) + if args.depfile: + result = replace_option_value(result, "-MF", args.depfile) + if args.report: + result = replace_option_value(result, "-nbl-report", args.report) + if args.log: + result = replace_option_value(result, "-log", args.log) + return result + + +def command_without_shader_cache(cmd): + return strip_options( + cmd, + options_with_values=["-shader-cache-file", "-nbl-shader-cache-compression", "-shader-cache-compression"], + options_flags=["-nbl-shader-cache", "-shader-cache"], + ) + + +def command_without_preprocess_cache(cmd): + return strip_options( + cmd, + options_with_values=["-preprocess-cache-file"], + options_flags=["-nbl-preprocess-cache", "-preprocess-cache"], + ) + + +def command_without_preamble(cmd): + return strip_options(cmd, options_with_values=[], options_flags=["-nbl-preprocess-preamble"]) + +def command_without_all_caches(cmd): + cmd = command_without_shader_cache(cmd) + cmd = command_without_preprocess_cache(cmd) + cmd = command_without_preamble(cmd) + return cmd + + +def run_cmd(args): + cmd = normalize_command(args) + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + if res.returncode != 0: + print(res.stdout) + raise RuntimeError("command failed") + + +def run_build(args, command_override=None): + if command_override is not None: + run_cmd(command_override) + return + if args.command: + run_cmd(args.command) + return + if not (args.cmake and args.build_dir and args.target): + raise RuntimeError("missing --command or --cmake/--build-dir/--target") + cmd = [args.cmake, "--build", args.build_dir, "--target", args.target] + if args.config: + cmd.extend(["--config", args.config]) + run_cmd(cmd) + + +def load_report(path): + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def set_body_value(path, value): + text = path.read_text(encoding="utf-8") + match = re.search(r"uint\s+sink\s*=\s*(\d+)u;", text) + if not match: + raise RuntimeError("missing body marker: uint sink = u;") + current = int(match.group(1)) + if current == value: + return + replacement = f"uint sink = {value}u;" + new_text = re.sub(r"uint\s+sink\s*=\s*\d+u;", replacement, text, count=1) + path.write_text(new_text, encoding="utf-8") + + +def pick_body_value(rng, exclude): + choices = [2, 3, 5, 7] + value = rng.choice(choices) + if value == exclude: + value = choices[(choices.index(value) + 1) % len(choices)] + return value + + +def pick_defines(rng): + pool = [ + "#define NBL_NSC_TEST_DEF_A 1", + "#define NBL_NSC_TEST_DEF_B 2", + "#define NBL_NSC_TEST_DEF_C 3", + "#define NBL_NSC_TEST_DEF_D 4", + ] + rng.shuffle(pool) + count = rng.randint(1, 3) + return pool[:count] + + +def normalized_includes(): + return [ + "#include ", + "#include ", + "#include ", + ] + + +def default_builtin_includes(): + return [ + "#include ", + "#include ", + "#include ", + ] + + +def replace_section(text, begin, end, lines): + begin_idx = text.find(begin) + end_idx = text.find(end, begin_idx) + if begin_idx == -1 or end_idx == -1: + raise RuntimeError(f"missing proxy markers: {begin} / {end}") + end_idx += len(end) + content = "\n".join(lines) + if content: + content = f"\n{content}\n" + else: + content = "\n" + return text[:begin_idx] + begin + content + end + text[end_idx:] + + +def set_proxy_defines(proxy_path, defines): + text = proxy_path.read_text(encoding="utf-8") + updated = replace_section( + text, + "// NBL_NSC_CACHE_TEST_DEFINES_BEGIN", + "// NBL_NSC_CACHE_TEST_DEFINES_END", + defines, + ) + proxy_path.write_text(updated, encoding="utf-8") + + +def set_proxy_includes(proxy_path, includes): + text = proxy_path.read_text(encoding="utf-8") + updated = replace_section( + text, + "// NBL_NSC_CACHE_TEST_INCLUDES_BEGIN", + "// NBL_NSC_CACHE_TEST_INCLUDES_END", + includes, + ) + proxy_path.write_text(updated, encoding="utf-8") + + +def delete_path(path): + if path.exists(): + path.unlink() + + +def assert_true(expr, message): + if not expr: + raise RuntimeError(message) + + +def assert_not_exists(path, message): + if path.exists(): + raise RuntimeError(message) + + +def assert_eq(actual, expected, message): + if actual != expected: + raise RuntimeError(f"{message}: expected {expected}, got {actual}") + +def cleanup_outputs(output_path, report_path, args): + delete_path(output_path) + delete_path(report_path) + + log_path = Path(args.log) if args.log else Path(str(output_path) + ".log") + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + shader_cache_path = Path(args.shader_cache) if args.shader_cache else Path(str(output_path) + ".ppcache") + preprocess_cache_path = Path(args.preprocess_cache) if args.preprocess_cache else Path(str(output_path) + ".ppcache.pre") + preprocessed_path = Path(args.preprocessed) if args.preprocessed else Path(str(output_path) + ".pre.hlsl") + + delete_path(log_path) + delete_path(depfile_path) + delete_path(shader_cache_path) + delete_path(preprocess_cache_path) + delete_path(preprocessed_path) + + +def assert_exists(path, message): + if not path.exists(): + raise RuntimeError(message) + + +def check_artifacts(output_path, report_path, args, expect_shader_cache=True, expect_preprocess_cache=True, expect_preprocessed=True): + log_path = Path(args.log) if args.log else Path(str(output_path) + ".log") + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + shader_cache_path = Path(args.shader_cache) if args.shader_cache else Path(str(output_path) + ".ppcache") + preprocess_cache_path = Path(args.preprocess_cache) if args.preprocess_cache else Path(str(output_path) + ".ppcache.pre") + preprocessed_path = Path(args.preprocessed) if args.preprocessed else Path(str(output_path) + ".pre.hlsl") + + assert_exists(output_path, "output .spv not found after cold run") + assert_exists(report_path, "report not found after cold run") + assert_exists(log_path, "log not found after cold run") + assert_exists(depfile_path, "depfile not found after cold run") + if expect_shader_cache: + assert_exists(shader_cache_path, "shader cache not found after cold run") + else: + assert_not_exists(shader_cache_path, "shader cache should not be created") + if expect_preprocess_cache: + assert_exists(preprocess_cache_path, "preprocess cache not found after cold run") + else: + assert_not_exists(preprocess_cache_path, "preprocess cache should not be created") + if expect_preprocessed: + assert_exists(preprocessed_path, "preprocessed output not found after cold run") + else: + assert_not_exists(preprocessed_path, "preprocessed output should not be created") + + +def normalize_dep_path(text): + return text.replace("\\", "/") + + +def assert_report_schema(report): + assert_true(isinstance(report, dict), "report should be an object") + required_sections = ["shader_cache", "preprocess_cache", "compile", "output", "input", "total_ms"] + for key in required_sections: + assert_true(key in report, f"report missing key: {key}") + assert_true(isinstance(report["shader_cache"], dict), "shader_cache should be object") + assert_true(isinstance(report["preprocess_cache"], dict), "preprocess_cache should be object") + assert_true(isinstance(report["compile"], dict), "compile should be object") + assert_true(isinstance(report["output"], dict), "output should be object") + assert_true(isinstance(report["input"], str), "input should be string") + assert_true(isinstance(report["total_ms"], int), "total_ms should be int") + if "preamble" in report: + assert_true(isinstance(report["preamble"], dict), "preamble should be object") + + +def percentile(values, pct): + if not values: + return 0 + ordered = sorted(values) + idx = int(round((pct / 100.0) * (len(ordered) - 1))) + return ordered[idx] + + +def report_time_ms(report): + return int(report.get("total_with_output_ms", report.get("total_ms", 0))) + + +def main(): + args = parse_args() + rng = random.Random(args.seed) + input_path = Path(args.input) + output_path = Path(args.output) + report_path = Path(args.report) + proxy_path = input_path.parent / "proxy.hlsl" + + if args.mode == "shader_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "shader_cache_hit": + set_body_value(input_path, 1) + delete_path(output_path) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected") + assert_eq(report["preprocess_cache"]["status"], "skipped", "preprocess cache should be skipped on shader hit") + assert_eq(report["compile"]["called"], False, "compile should be skipped on shader cache hit") + return + + if args.mode == "preprocess_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "preprocess_cache_hit": + set_body_value(input_path, 1) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preprocess cache hit") + assert_true(report["preamble"]["used"] is False, "preamble should be unused") + set_body_value(input_path, 1) + return + + if args.mode == "preamble_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "preamble_cache_hit": + set_body_value(input_path, 1) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preamble hit") + assert_true(report["preamble"]["used"] is True, "preamble should be used") + set_body_value(input_path, 1) + return + + if args.mode == "preamble_cache_hit_time": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preamble hit") + assert_true(report["preamble"]["used"] is True, "preamble should be used") + total_ms = report_time_ms(report) + print(f"preamble_hit_total_with_output_ms={total_ms}") + if args.budget_ms > 0: + assert_true(total_ms <= args.budget_ms, "preamble hit time budget exceeded") + set_body_value(input_path, 1) + return + + if args.mode == "no_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = apply_output_overrides(command_without_all_caches(normalize_command(args.command)), args) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=False, expect_preprocess_cache=False, expect_preprocessed=False) + report = load_report(report_path) + assert_true(report.get("shader_cache", {}).get("enabled") is False, "shader cache should be disabled") + assert_true(report.get("preprocess_cache", {}).get("enabled") is False, "preprocess cache should be disabled") + preamble = report.get("preamble", {}) + assert_true(preamble.get("enabled") is False, "preamble should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run with caches disabled") + return + + if args.mode == "shader_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_shader_cache(normalize_command(args.command)) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=False, expect_preprocess_cache=True, expect_preprocessed=True) + report = load_report(report_path) + assert_true(report.get("shader_cache", {}).get("enabled") is False, "shader cache should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run when shader cache is disabled") + return + + if args.mode == "preprocess_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preprocess_cache(normalize_command(args.command)) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=True, expect_preprocess_cache=False, expect_preprocessed=False) + report = load_report(report_path) + assert_true(report.get("preprocess_cache", {}).get("enabled") is False, "preprocess cache should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run when preprocess cache is disabled") + return + + if args.mode == "preamble_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preamble(normalize_command(args.command)) + run_build(args, cmd) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + preamble = report.get("preamble", {}) + assert_true(preamble.get("enabled") is False, "preamble should be disabled") + assert_true(preamble.get("used", False) is False, "preamble should not be used when disabled") + set_body_value(input_path, 1) + return + + if args.mode == "shader_cache_isolation": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preprocess_cache(command_without_preamble(normalize_command(args.command))) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on first run") + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should not hit on changed body") + assert_eq(report["compile"]["called"], True, "compile should run on shader cache miss") + set_body_value(input_path, 1) + return + + if args.mode == "deps_invalidation": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + set_proxy_defines(proxy_path, pick_defines(rng)) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected after dep change") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected after dep change") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "path_normalization": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + set_proxy_includes(proxy_path, normalized_includes()) + run_build(args) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected with normalized includes") + assert_eq(report["preprocess_cache"]["status"], "skipped", "preprocess cache should be skipped on shader hit") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "random_defines": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + set_proxy_defines(proxy_path, pick_defines(rng)) + run_build(args) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected after randomized defines") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "parallel_smoke": + base_cmd = normalize_command(args.command) + base_output = output_path + + def make_paths(idx): + new_output = base_output.with_name(f"{base_output.stem}.p{idx}{base_output.suffix}") + new_report = Path(str(new_output) + ".report.json") + new_log = Path(str(new_output) + ".log") + new_dep = Path(str(new_output) + ".dep") + return new_output, new_report, new_log, new_dep + + def worker(idx): + new_output, new_report, new_log, new_dep = make_paths(idx) + for p in [new_output, new_report, new_log, new_dep]: + delete_path(p) + cmd = replace_option_value(base_cmd, "-Fc", str(new_output)) + cmd = replace_option_value(cmd, "-MF", str(new_dep)) + cmd = replace_option_value(cmd, "-nbl-report", str(new_report)) + run_build(args, cmd) + assert_true(new_report.exists(), "parallel report not found") + load_report(new_report) + + with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.parallel_jobs)) as pool: + futures = [pool.submit(worker, idx) for idx in range(args.parallel_jobs)] + for fut in futures: + fut.result() + return + + if args.mode == "stress": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + totals = [] + totals_with_output = [] + for _ in range(args.iterations): + delete_path(report_path) + run_build(args) + report = load_report(report_path) + totals.append(report_time_ms(report)) + totals_with_output.append(int(report.get("total_with_output_ms", report_time_ms(report)))) + if totals: + print(f"stress total_ms median={statistics.median(totals)} p95={percentile(totals, 95)} samples={len(totals)}") + print(f"stress total_with_output_ms median={statistics.median(totals_with_output)} p95={percentile(totals_with_output, 95)} samples={len(totals_with_output)}") + return + + if args.mode == "report_schema": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + report = load_report(report_path) + assert_report_schema(report) + return + + if args.mode == "depfile_contents": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + input_text = normalize_dep_path(str(input_path)) + proxy_text = normalize_dep_path(str(proxy_path)) + assert_true(input_text in dep_text, "depfile missing input path") + assert_true(proxy_text in dep_text, "depfile missing proxy path") + return + + if args.mode == "cache_path_override": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + override_shader = Path(str(output_path) + ".override.ppcache") + override_preprocess = Path(str(output_path) + ".override.ppcache.pre") + delete_path(override_shader) + delete_path(override_preprocess) + cmd = normalize_command(args.command) + if not cmd: + raise RuntimeError("missing command for cache override test") + insert_at = max(len(cmd) - 1, 0) + cmd = ( + cmd[:insert_at] + + ["-shader-cache-file", str(override_shader), "-preprocess-cache-file", str(override_preprocess)] + + cmd[insert_at:] + ) + run_build(args, cmd) + assert_exists(override_shader, "shader cache override file not created") + assert_exists(override_preprocess, "preprocess cache override file not created") + return + + if args.mode == "large_include_graph": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + created = [] + try: + for idx in range(25): + inc_path = proxy_path.parent / f"dummy_inc_{idx}.hlsl" + inc_path.write_text(f"// dummy {idx}\n", encoding="utf-8") + created.append(inc_path) + includes = default_builtin_includes() + includes.extend([f"#include \"{p.name}\"" for p in created]) + set_proxy_includes(proxy_path, includes) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + for idx in [0, len(created) // 2, len(created) - 1]: + check_path = normalize_dep_path(str(created[idx])) + assert_true(check_path in dep_text, "depfile missing dummy include") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + for p in created: + delete_path(p) + return + + if args.mode == "unused_include": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + unused_path = proxy_path.parent / "unused_inc.hlsl" + try: + unused_path.write_text("// unused\n", encoding="utf-8") + includes = default_builtin_includes() + includes.extend(["#if 0", f"#include \"{unused_path.name}\"", "#endif"]) + set_proxy_includes(proxy_path, includes) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + assert_true(normalize_dep_path(str(unused_path)) not in dep_text, "depfile should not include unused include") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + delete_path(unused_path) + return + + raise RuntimeError(f"unknown mode: {args.mode}") + + +if __name__ == "__main__": + main() From 719b1d76724412f8cc9e7bea42b781b48767b02f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 22 Jan 2026 22:01:24 +0100 Subject: [PATCH 13/14] Update cache layer benchmarks and preprocessing settings --- src/nbl/asset/utils/waveContext.h | 73 +++++++++++++++++++++++---- tools/nsc/test/cache_layers/README.md | 26 +++++----- 2 files changed, 78 insertions(+), 21 deletions(-) diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index a6510b5fba..4fff0b556b 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -11,6 +11,7 @@ #include #include +#include #include "nbl/asset/utils/IShaderCompiler.h" @@ -47,6 +48,7 @@ struct preprocessing_hooks final : public boost::wave::context_policies::default { preprocessing_hooks(const nbl::asset::IShaderCompiler::SPreprocessorOptions& _preprocessOptions) : m_includeFinder(_preprocessOptions.includeFinder), m_logger(_preprocessOptions.logger), m_pragmaStage(nbl::asset::IShader::E_SHADER_STAGE::ESS_UNKNOWN), m_dxc_compile_flags_override() + , m_preserveComments(_preprocessOptions.preserveComments), m_emitLineDirectives(_preprocessOptions.emitLineDirectives), m_emitPragmaDirectives(_preprocessOptions.emitPragmaDirectives) { hash_token_occurences = 0; } @@ -158,6 +160,9 @@ struct preprocessing_hooks final : public boost::wave::context_policies::default asset::IShader::E_SHADER_STAGE m_pragmaStage; int hash_token_occurences; std::vector m_dxc_compile_flags_override; + const bool m_preserveComments; + const bool m_emitLineDirectives; + const bool m_emitPragmaDirectives; }; @@ -189,21 +194,29 @@ class context : private boost::noncopyable typedef typename iteration_context_stack_type::size_type iter_size_type; context* this_() { return this; } // avoid warning in constructor + static boost::wave::language_support make_language(const preprocessing_hooks& hooks) + { + boost::wave::language_support lang = support_cpp20; + if (hooks.m_preserveComments) + lang = boost::wave::language_support(lang | support_option_preserve_comments); + if (hooks.m_emitLineDirectives) + lang = boost::wave::language_support(lang | support_option_emit_line_directives); + if (hooks.m_emitPragmaDirectives) + lang = boost::wave::language_support(lang | support_option_emit_pragma_directives); + lang = boost::wave::language_support(lang | support_option_include_guard_detection); + return lang; + } public: context(target_iterator_type const& first_, target_iterator_type const& last_, char const* fname, preprocessing_hooks const& hooks_) : first(first_), last(last_), filename(fname) , has_been_initialized(false) , current_relative_filename(fname) +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + , current_filename(fname ? fname : "") +#endif , macros(*this_()) - , language(language_support( - support_cpp20 - | support_option_preserve_comments - | support_option_emit_line_directives - | support_option_emit_pragma_directives -// | support_option_emit_contnewlines -// | support_option_insert_whitespace - )) + , language(make_language(hooks_)) , hooks(hooks_) { macros.init_predefined_macros(fname); @@ -507,9 +520,34 @@ class context : private boost::noncopyable } public: +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + void set_current_filename(char const* real_name) + { + current_filename = real_name ? real_name : ""; + } + std::string const& get_current_filename() const + { + return current_filename; + } + bool has_pragma_once(std::string const& filename) + { + return pragma_once_headers.find(filename) != pragma_once_headers.end(); + } + bool add_pragma_once_header(std::string const& filename, std::string const& guard_name) + { + get_hooks().detected_include_guard(derived(), filename, guard_name); + return pragma_once_headers.insert(filename).second; + } + bool add_pragma_once_header(token_type const& pragma_, std::string const& filename) + { + get_hooks().detected_pragma_once(derived(), pragma_, filename); + return pragma_once_headers.insert(filename).second; + } +#endif + void set_current_relative_filename(char const* real_name) { - current_relative_filename = real_name; + current_relative_filename = real_name ? real_name : ""; } std::string const& get_current_relative_filename() const { @@ -532,6 +570,10 @@ class context : private boost::noncopyable bool has_been_initialized; // set cwd once std::string current_relative_filename; // real relative name of current preprocessed file +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + std::string current_filename; + std::unordered_set pragma_once_headers; +#endif // Nabla Additions Start // these are temporaries! @@ -591,6 +633,15 @@ template<> inline bool boost::wave::impl::pp_iterator_functor inline bool boost::wave::impl::pp_iterator_functor Date: Fri, 23 Jan 2026 11:28:11 +0100 Subject: [PATCH 14/14] Unity build mode for nsc rules --- .gitignore | 4 + 3rdparty/boost/CMakeLists.txt | 5 +- cmake/common.cmake | 538 +++++++++++++++++- docs/nsc-prebuilds.md | 64 ++- include/nbl/asset/utils/IShaderCompiler.h | 29 +- include/nbl/core/string/SpirvKeyHelpers.h | 32 ++ src/nbl/asset/utils/CHLSLCompiler.cpp | 57 +- src/nbl/asset/utils/IShaderCompiler.cpp | 75 ++- .../utils/shaderCompiler_serialization.h | 9 + tools/nsc/main.cpp | 253 +++++--- 10 files changed, 941 insertions(+), 125 deletions(-) diff --git a/.gitignore b/.gitignore index c7bbb2808e..9fd3357b9f 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,8 @@ tools/nsc/bin/* */__pycache__/* __pycache__/* *.pyc +Testing/ +nsc_cache_lzma/ +nsc_cache_*/ +_tmp/ diff --git a/3rdparty/boost/CMakeLists.txt b/3rdparty/boost/CMakeLists.txt index 1e7189fce1..131b287eed 100644 --- a/3rdparty/boost/CMakeLists.txt +++ b/3rdparty/boost/CMakeLists.txt @@ -120,10 +120,11 @@ endforeach() # include will lead to ABI mismatch hence we update the target and let inherit options target_compile_definitions(boost_wave PUBLIC BOOST_WAVE_ENABLE_COMMANDLINE_MACROS=1 - PUBLIC BOOST_WAVE_SUPPORT_PRAGMA_ONCE=0 + PUBLIC BOOST_WAVE_SUPPORT_PRAGMA_ONCE=1 PUBLIC BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES=1 PUBLIC BOOST_WAVE_SERIALIZATION=0 PUBLIC BOOST_WAVE_SUPPORT_INCLUDE_NEXT=0 + PUBLIC BOOST_WAVE_USE_STRICT_LEXER=0 # threading option: https://github.com/boostorg/wave/issues/237#issuecomment-2710251773 PUBLIC BOOST_WAVE_SUPPORT_THREADING=0 @@ -131,4 +132,4 @@ target_compile_definitions(boost_wave set(NBL_BOOST_TARGETS ${NBL_BOOST_TARGETS} -PARENT_SCOPE) \ No newline at end of file +PARENT_SCOPE) diff --git a/cmake/common.cmake b/cmake/common.cmake index 640a603539..2aad3c9b6f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1223,7 +1223,7 @@ struct DeviceConfigCaps set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) set(OPTIONAL_SINGLE_ARGS GLOB_DIR EXPORT_RULES) - cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB;DISABLE_CUSTOM_COMMANDS" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) + cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB;DISABLE_CUSTOM_COMMANDS;UNITY_BUILD" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS;ENTRYPOINTS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) set(_NBL_DISABLE_CUSTOM_COMMANDS FALSE) @@ -1292,6 +1292,7 @@ $ set(HEADER_ITEM_VIEW [=[ #include #include +#include "nbl/core/hash/fnv1a64.h" #include "nbl/core/string/SpirvKeyHelpers.h" ]=]) @@ -1311,15 +1312,29 @@ namespace @IMPL_NAMESPACE@ { requires ((... && !std::is_pointer_v>)) inline constexpr typename nbl::core::detail::StringLiteralBufferType::type get_spirv_key(const Args&... args) { - return nbl::core::detail::SpirvKeyBuilder::build(args...); + return nbl::core::detail::SpirvFileKeyBuilder::build(args...); } template inline std::string get_spirv_key(const Device* device, const Args&... args) { - const auto key = nbl::core::detail::SpirvKeyBuilder::build_from_device(device, args...); + const auto key = nbl::core::detail::SpirvFileKeyBuilder::build_from_device(device, args...); return std::string(key.view()); } + + template + requires ((... && !std::is_pointer_v>)) + inline constexpr auto get_spirv_entrypoint(const Args&... args) + { + return nbl::core::detail::SpirvEntrypointBuilder::build(args...); + } + + template + inline std::string get_spirv_entrypoint(const Device* device, const Args&... args) + { + const auto entry = nbl::core::detail::SpirvEntrypointBuilder::build_from_device(device, args...); + return std::string(entry.view()); + } } ]=]) @@ -1426,6 +1441,88 @@ namespace @IMPL_NAMESPACE@ { endif() endmacro() + macro(NBL_NSC_HAS_LIB_PROFILE _OUT_VAR) + set(_NBL_HAS_LIB_PROFILE FALSE) + set(_NBL_SEEN_T FALSE) + foreach(_NBL_OPT IN LISTS COMPILE_OPTIONS IMPL_COMMON_OPTIONS) + if(_NBL_OPT MATCHES "^\\$<") + continue() + endif() + if(_NBL_SEEN_T) + if(_NBL_OPT MATCHES "^lib_") + set(_NBL_HAS_LIB_PROFILE TRUE) + endif() + set(_NBL_SEEN_T FALSE) + elseif(_NBL_OPT STREQUAL "-T") + set(_NBL_SEEN_T TRUE) + elseif(_NBL_OPT MATCHES "^-Tlib_") + set(_NBL_HAS_LIB_PROFILE TRUE) + endif() + endforeach() + set(${_OUT_VAR} ${_NBL_HAS_LIB_PROFILE}) + endmacro() + + macro(NBL_NSC_HAS_ENTRYPOINT_OPTION _OUT_VAR) + set(_NBL_HAS_ENTRYPOINT FALSE) + set(_NBL_SEEN_E FALSE) + foreach(_NBL_OPT IN LISTS COMPILE_OPTIONS IMPL_COMMON_OPTIONS) + if(_NBL_OPT MATCHES "^\\$<") + continue() + endif() + if(_NBL_SEEN_E) + set(_NBL_HAS_ENTRYPOINT TRUE) + set(_NBL_SEEN_E FALSE) + elseif(_NBL_OPT STREQUAL "-E") + set(_NBL_SEEN_E TRUE) + elseif(_NBL_OPT MATCHES "^-E.+") + set(_NBL_HAS_ENTRYPOINT TRUE) + endif() + endforeach() + set(${_OUT_VAR} ${_NBL_HAS_ENTRYPOINT}) + endmacro() + + if(IMPL_UNITY_BUILD) + if(NOT IMPL_ENTRYPOINTS) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires ENTRYPOINTS." + ) + endif() + set(_NBL_ENTRYPOINTS ${IMPL_ENTRYPOINTS}) + list(LENGTH _NBL_ENTRYPOINTS _NBL_ENTRYPOINT_COUNT) + if(_NBL_ENTRYPOINT_COUNT EQUAL 0) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires ENTRYPOINTS." + ) + endif() + foreach(_NBL_ENTRY IN LISTS _NBL_ENTRYPOINTS) + if(NOT _NBL_ENTRY MATCHES "^[A-Za-z_][A-Za-z0-9_]*$") + ERROR_WHILE_PARSING_ITEM( + "Invalid ENTRYPOINTS entry \"${_NBL_ENTRY}\".\n" + "Entrypoint names must be valid C identifiers." + ) + endif() + endforeach() + list(REMOVE_DUPLICATES _NBL_ENTRYPOINTS) + list(LENGTH _NBL_ENTRYPOINTS _NBL_ENTRYPOINTS_UNIQ_COUNT) + if(NOT _NBL_ENTRYPOINTS_UNIQ_COUNT EQUAL _NBL_ENTRYPOINT_COUNT) + ERROR_WHILE_PARSING_ITEM( + "ENTRYPOINTS contains duplicates." + ) + endif() + NBL_NSC_HAS_LIB_PROFILE(_NBL_HAS_LIB_PROFILE) + if(NOT _NBL_HAS_LIB_PROFILE) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires a lib_* profile (use -T lib_* in COMPILE_OPTIONS or COMMON_OPTIONS)." + ) + endif() + NBL_NSC_HAS_ENTRYPOINT_OPTION(_NBL_HAS_ENTRYPOINT_OPT) + if(_NBL_HAS_ENTRYPOINT_OPT) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD does not allow -E entrypoint options; use ENTRYPOINTS." + ) + endif() + endif() + macro(NBL_NORMALIZE_FLOAT_LITERAL _CAP_NAME _VALUE _MANTISSA_DIGITS _TYPE_LABEL _OUT_VAR) NBL_REQUIRE_PYTHON() set(_NBL_RAW "${_VALUE}") @@ -1849,6 +1946,20 @@ sys.stdout.write(str(h)) list(LENGTH CAP_NAMES CAP_COUNT) + set(NBL_NSC_UNITY_CONTENT "") + if(IMPL_UNITY_BUILD) + set(NBL_NSC_UNITY_ENTRYPOINTS ${IMPL_ENTRYPOINTS}) + set(NBL_NSC_UNITY_INPUT_RAW "${TARGET_INPUT}") + file(TO_CMAKE_PATH "${TARGET_INPUT}" NBL_NSC_UNITY_INPUT_PATH) + set(NBL_NSC_UNITY_FINAL_KEY "${BASE_KEY}.spv") + NBL_HASH_SPIRV_KEY("${NBL_NSC_UNITY_FINAL_KEY}" NBL_NSC_UNITY_HASH) + set(NBL_NSC_UNITY_HASHED_KEY "${NBL_NSC_UNITY_HASH}.spv") + set(NBL_NSC_UNITY_OUTPUT_REL_PATH "$/${NBL_NSC_UNITY_HASHED_KEY}") + set(NBL_NSC_UNITY_OUTPUT "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_OUTPUT_REL_PATH}") + set(NBL_NSC_UNITY_INPUT "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_HASH}.unity.hlsl") + set(NBL_NSC_UNITY_CONTENT "#include \n") + endif() + set(RETVAL_FMT "${BASE_KEY}") set(RETVAL_ARGS "") set(CX_CAPACITY 0) @@ -2055,6 +2166,77 @@ namespace nbl::core::detail { string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") + if(IMPL_UNITY_BUILD) + set(UNITY_FILE_KEY_FMT "${BASE_KEY}.spv") + string(CONFIGURE [=[ +namespace nbl::core::detail { + template<> + struct SpirvFileKeyBuilder + { + template + @SPIRV_BUILD_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build(const Args&... args) + { + (void)std::forward_as_tuple(args...); + typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full); + typename StringLiteralBufferType::type retval = {}; + retval.append("$/"); + nbl::core::detail::put(retval, nbl::core::FNV1a_64(nbl_spirv_full.view())); + retval.append(".spv"); + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=] UNITY_FILE_KEY_EVAL @ONLY) + set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${UNITY_FILE_KEY_EVAL}") + + foreach(_NBL_ENTRY IN LISTS IMPL_ENTRYPOINTS) + string(LENGTH "${_NBL_ENTRY}" _NBL_ENTRY_LEN) + math(EXPR _NBL_ENTRY_CAP "${_NBL_ENTRY_LEN} + 7 + 20") + set(ENTRY_NAME "${_NBL_ENTRY}") + set(ENTRY_CAPACITY "${_NBL_ENTRY_CAP}") + string(CONFIGURE [=[ +namespace nbl::core::detail { + template<> + struct SpirvEntrypointBuilder + { + template + @SPIRV_BUILD_REQUIRES@ + static constexpr StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> build(const Args&... args) + { +@SPIRV_ARG_DECLS@ typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full@RETVAL_ARGS_STR@); + const auto nbl_spirv_hash = nbl::core::FNV1a_64(nbl_spirv_full.view()); + StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> retval = {}; + retval.append("@ENTRY_NAME@"); + retval.append("__nbl_p"); + nbl::core::detail::put(retval, nbl_spirv_hash); + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=] UNITY_ENTRY_EVAL @ONLY) + set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${UNITY_ENTRY_EVAL}") + endforeach() + endif() + function(GENERATE_KEYS PREFIX CAP_INDEX) set(CAPS_VALUES_PART "${ARGN}") if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) @@ -2140,6 +2322,34 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE if(CAPS_EVAL STREQUAL "") set(CAPS_EVAL " // no caps\n") endif() + if(IMPL_UNITY_BUILD) + set(_NBL_UNITY_STRUCT "DeviceConfigCaps__nbl_p${FINAL_KEY_HASH}") + set(_NBL_UNITY_ENTRY_DEFS "") + set(_NBL_UNITY_ENTRY_UNDEFS "") + foreach(_NBL_ENTRY IN LISTS NBL_NSC_UNITY_ENTRYPOINTS) + string(APPEND _NBL_UNITY_ENTRY_DEFS "#define ${_NBL_ENTRY} ${_NBL_ENTRY}__nbl_p${FINAL_KEY_HASH}\n") + string(APPEND _NBL_UNITY_ENTRY_UNDEFS "#undef ${_NBL_ENTRY}\n") + endforeach() + set(UNITY_STRUCT "${_NBL_UNITY_STRUCT}") + set(UNITY_ENTRY_DEFS "${_NBL_UNITY_ENTRY_DEFS}") + set(UNITY_ENTRY_UNDEFS "${_NBL_UNITY_ENTRY_UNDEFS}") + set(UNITY_INPUT_PATH "${NBL_NSC_UNITY_INPUT_PATH}") + string(CONFIGURE [=[ + +struct @UNITY_STRUCT@ +{ +@CAPS_EVAL@ +}; +#define DeviceConfigCaps @UNITY_STRUCT@ +@UNITY_ENTRY_DEFS@#include "@UNITY_INPUT_PATH@" +@UNITY_ENTRY_UNDEFS@#undef DeviceConfigCaps + +]=] _NBL_UNITY_BLOCK @ONLY) + set(_NBL_UNITY_CONTENT "${NBL_NSC_UNITY_CONTENT}") + string(APPEND _NBL_UNITY_CONTENT "${_NBL_UNITY_BLOCK}") + set(NBL_NSC_UNITY_CONTENT "${_NBL_UNITY_CONTENT}" PARENT_SCOPE) + return() + endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) set(_NBL_CONFIG_WRITE TRUE) if(EXISTS "${CONFIG_FILE}") @@ -2491,6 +2701,328 @@ NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBE GENERATE_KEYS("" 0) + if(IMPL_UNITY_BUILD) + set(NBL_NSC_UNITY_CONTENT "${NBL_NSC_UNITY_CONTENT}") + set(_NBL_UNITY_WRITE TRUE) + if(EXISTS "${NBL_NSC_UNITY_INPUT}") + file(READ "${NBL_NSC_UNITY_INPUT}" _NBL_UNITY_OLD) + if(_NBL_UNITY_OLD STREQUAL "${NBL_NSC_UNITY_CONTENT}") + set(_NBL_UNITY_WRITE FALSE) + endif() + endif() + if(_NBL_UNITY_WRITE) + file(WRITE "${NBL_NSC_UNITY_INPUT}" "${NBL_NSC_UNITY_CONTENT}") + endif() + + set(NBL_NSC_REGISTERED_INPUT "${NBL_NSC_UNITY_INPUT_RAW}") + set(NBL_NSC_COMPILE_INPUT "${NBL_NSC_UNITY_INPUT}") + set(NBL_NSC_DEPENDS_ON "${DEPENDS_ON}") + list(APPEND NBL_NSC_DEPENDS_ON "${NBL_NSC_UNITY_INPUT}" "${NBL_NSC_REGISTERED_INPUT}") + + set(FINAL_KEY_REL_PATH "${NBL_NSC_UNITY_OUTPUT_REL_PATH}") + set(TARGET_OUTPUT "${NBL_NSC_UNITY_OUTPUT}") + set(DEPFILE_PATH "${TARGET_OUTPUT}.dep") + set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") + set(NBL_NSC_PREPROCESSED_PATH "${TARGET_OUTPUT}.pre.hlsl") + if(NSC_CACHE_DIR) + get_filename_component(NBL_NSC_CACHE_ROOT "${NSC_CACHE_DIR}" ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}") + file(RELATIVE_PATH NBL_NSC_CACHE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT}") + set(NBL_NSC_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache.pre") + else() + set(NBL_NSC_CACHE_PATH "${TARGET_OUTPUT}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${TARGET_OUTPUT}.ppcache.pre") + endif() + + set(NBL_NSC_DEPFILE_ARGS "") + if(NSC_USE_DEPFILE) + set(NBL_NSC_DEPFILE_ARGS -MD -MF "${DEPFILE_PATH}") + endif() + + set(NBL_NSC_CACHE_ARGS "") + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache-compression "${NSC_SHADER_CACHE_COMPRESSION}") + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") + endif() + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-cache) + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache-file "${NBL_NSC_PREPROCESS_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_PREAMBLE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-preamble) + endif() + endif() + if(NSC_STDOUT_LOG) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-stdout-log) + endif() + set(NBL_NSC_REPORT_ARGS "") + if(NSC_JSON_REPORT) + set(NBL_NSC_REPORT_PATH "${TARGET_OUTPUT}.report.json") + list(APPEND NBL_NSC_REPORT_ARGS -nbl-report "${NBL_NSC_REPORT_PATH}") + endif() + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + ${NBL_NSC_DEPFILE_ARGS} + $<$:-verbose> + ${NBL_NSC_CACHE_ARGS} + ${NBL_NSC_REPORT_ARGS} + "${NBL_NSC_COMPILE_INPUT}" + ) + + get_filename_component(NBL_NSC_INPUT_NAME "${NBL_NSC_REGISTERED_INPUT}" NAME) + get_filename_component(NBL_NSC_CONFIG_NAME "${NBL_NSC_UNITY_INPUT}" NAME) + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_INPUT_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_CONFIG_NAME}") + set(NBL_NSC_MAIN_DEPENDENCY "${NBL_NSC_REGISTERED_INPUT}") + if(TARGET nsc) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + list(APPEND NBL_NSC_DEPENDS_ON "$") + else() + list(APPEND NBL_NSC_DEPENDS_ON nsc) + endif() + endif() + set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESSED_PATH}") + endif() + if(NSC_JSON_REPORT) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_REPORT_PATH}") + endif() + + set(NBL_NSC_CUSTOM_COMMAND_ARGS + OUTPUT "${TARGET_OUTPUT}" + BYPRODUCTS ${NBL_NSC_BYPRODUCTS} + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${NBL_NSC_DEPENDS_ON} + COMMENT "${NBL_NSC_COMMENT_LEFT} (${NBL_NSC_COMMENT_RIGHT})" + VERBATIM + COMMAND_EXPAND_LISTS + ) + if(NBL_NSC_MAIN_DEPENDENCY) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS MAIN_DEPENDENCY "${NBL_NSC_MAIN_DEPENDENCY}") + endif() + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") + endif() + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_INDEX "${_NBL_EXPORT_RULE_INDEX}") + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_INDEX} ${NBL_NSC_COMPILE_COMMAND} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_INDEX} "${TARGET_OUTPUT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_INDEX} "${NBL_NSC_LOG_PATH}" PARENT_SCOPE) + if(NSC_SHADER_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "${NBL_NSC_CACHE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_PREPROCESS_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESS_CACHE_PATH}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESSED_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_USE_DEPFILE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "${DEPFILE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_JSON_REPORT) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "${NBL_NSC_REPORT_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + math(EXPR _NBL_EXPORT_INDEX_NEXT "${_NBL_EXPORT_INDEX} + 1") + set(_NBL_EXPORT_RULE_INDEX "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) + endif() + set(NBL_NSC_OUT_FILES "") + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESSED_PATH}") + endif() + set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) + endif() + + set(HEADER_ONLY_LIKE "") + set(ADD_INPUT_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS AND CMAKE_GENERATOR MATCHES "Visual Studio") + set(ADD_INPUT_AS_HEADER_ONLY FALSE) + endif() + if(ADD_INPUT_AS_HEADER_ONLY) + list(APPEND HEADER_ONLY_LIKE "${NBL_NSC_REGISTERED_INPUT}") + endif() + if(NBL_NSC_OUT_FILES AND NOT CMAKE_CONFIGURATION_TYPES) + list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) + endif() + if(HEADER_ONLY_LIKE AND IMPL_HLSL_GLOB) + foreach(_HLSL_SOURCE IN LISTS IMPL_HLSL_GLOB) + list(REMOVE_ITEM HEADER_ONLY_LIKE "${_HLSL_SOURCE}") + endforeach() + endif() + if(HEADER_ONLY_LIKE) + list(REMOVE_DUPLICATES HEADER_ONLY_LIKE) + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + ) + endif() + set(ADD_CONFIG_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NBL_NSC_MAIN_DEPENDENCY STREQUAL "${NBL_NSC_UNITY_INPUT}") + set(ADD_CONFIG_AS_HEADER_ONLY FALSE) + endif() + endif() + if(ADD_CONFIG_AS_HEADER_ONLY) + target_sources(${IMPL_TARGET} PRIVATE "${NBL_NSC_UNITY_INPUT}") + set_source_files_properties("${NBL_NSC_UNITY_INPUT}" PROPERTIES + GENERATED TRUE + HEADER_FILE_ONLY ON + ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${NBL_NSC_UNITY_INPUT}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_CONFIGURATION_TYPES) + foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + if(_CFG STREQUAL "") + continue() + endif() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${NBL_NSC_UNITY_HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endforeach() + else() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endif() + endif() + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${NBL_NSC_REGISTERED_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${NBL_NSC_UNITY_INPUT}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + endif() + endforeach() unset(KEYS) diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 44a35dbac4..141f832e59 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -42,7 +42,7 @@ For each registered input it generates: - One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). - A matching `.spv.hash` sidecar for fast up-to-date checks on cache hits. -- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. +- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each (or a single `.spv` in `UNITY_BUILD` mode; see below). - A generated header (you choose the path via `INCLUDE`) containing: - a primary template `get_spirv_key(...args)` and `get_spirv_key(device, ...args)` - `get_spirv_key` returns a small owning buffer; use `.view()` or implicit `std::string_view` to consume it @@ -212,7 +212,7 @@ std::string_view key = keyBuf; auto bundle = assetMgr->getAsset(key.data(), loadParams); ``` -`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). These are already hashed (e.g. `Debug/123456789.spv`) and are intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). These are already hashed (e.g. `Debug/123456789.spv`) and are intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. In `UNITY_BUILD` mode this list contains one entry per base `KEY` (all permutations share a single `.spv`). ## Permutations via `CAPS` @@ -231,6 +231,66 @@ Each `CAPS` entry looks like: At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the structs you pass in for `limits`/`features` (duck-typed by required members) and any custom kinds. Each group starts with `__limits`, `__features`, or `__`, followed by `.member_` entries. Group order follows the **first appearance of each kind in `CAPS`** (and this same order is the required argument order for `get_spirv_key`); groups with no members are omitted. +## Unity build for permutations (single `.spv`) + +`UNITY_BUILD` is an optional mode for `NBL_CREATE_NSC_COMPILE_RULES` that emits a **single `.spv`** per input `KEY`, while still supporting all permutation structs. It works by compiling a single HLSL unit that includes your input multiple times, once per permutation, and **renames each entrypoint** to a unique mangled name. + +Usage: + +```cmake +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} -T lib_6_8 + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + UNITY_BUILD + ENTRYPOINTS entryA entryB +) +``` + +Constraints: + +- `UNITY_BUILD` **requires a `lib_*` profile** (e.g. `-T lib_6_8`). Non-lib profiles require a single `-E` entrypoint and cannot host multiple entrypoints in one `.spv`. +- `ENTRYPOINTS` is **mandatory** in `UNITY_BUILD`. The names must be valid C identifiers. +- `UNITY_BUILD` does not allow `-E` options; entrypoints are taken from `ENTRYPOINTS`. +- Your input file must be safe to include multiple times (no `#pragma once` or include guards on the main input). +- Per-permutation macros must not alter guarded includes. Permutations are intended to be consumed via `DeviceConfigCaps`, not by redefining macros that affect heavy headers. + +How it works: + +- For each permutation, NSC auto-generates a wrapper block: + - creates a unique `DeviceConfigCaps__nbl_p` with that permutation's values + - `#define`s `DeviceConfigCaps` and each entrypoint name to a mangled symbol + - `#include`s the original input file + - `#undef`s the aliases +- Mangled entrypoint name is: + +``` +__nbl_p +``` + +`` is `FNV-1a 64-bit` of the **full permutation key string** (the same string used for hashing outputs in normal mode). + +Runtime usage: + +```cpp +auto keyBuf = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +auto entry = nbl::this_example::builtin::build::get_spirv_entrypoint<"shader", "entryA">(device); +// load .spv by key, pick entrypoint by name +``` + +Notes: + +- `get_spirv_key` returns the **file key** (same for all permutations in unity mode). +- `get_spirv_entrypoint` returns the **mangled entrypoint** for the current permutation. +- `get_spirv_entrypoint` uses the same `KEY` and permutation args as `get_spirv_key`. +- If you need different entrypoint lists per input, use separate `NBL_CREATE_NSC_COMPILE_RULES` calls. + Each generated `.config` file defines a `DeviceConfigCaps` struct for HLSL. It includes: - flat members for `limits`/`features` (backwards compatibility with older shaders) - nested structs for custom kinds only, e.g. `DeviceConfigCaps::userA` diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index bf6dcd74f1..1b30055b30 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -197,6 +197,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::span forceIncludes = {}; std::string_view codeForCache = {}; bool applyForceIncludes = true; + bool preserveComments = true; + bool emitLineDirectives = true; + bool emitPragmaDirectives = true; + bool fastSafeValidation = false; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; bool depfile = false; system::path depfilePath = {}; @@ -290,6 +294,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted return false; if (forceIncludes != other.forceIncludes) return false; + if (preserveComments != other.preserveComments) return false; + if (emitLineDirectives != other.emitLineDirectives) return false; + if (emitPragmaDirectives != other.emitPragmaDirectives) return false; return true; } @@ -318,12 +325,19 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted for (const auto& inc : options.forceIncludes) forceIncludes.emplace_back(inc); + preserveComments = options.preserveComments; + emitLineDirectives = options.emitLineDirectives; + emitPragmaDirectives = options.emitPragmaDirectives; + // Sort them so equality and hashing are well defined std::sort(extraDefines.begin(), extraDefines.end(), [](const SMacroDefinition& lhs, const SMacroDefinition& rhs) {return lhs.identifier < rhs.identifier; }); }; std::string sourceIdentifier; std::vector extraDefines; std::vector forceIncludes; + bool preserveComments = true; + bool emitLineDirectives = true; + bool emitPragmaDirectives = true; }; // TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and decompressShader struct SCompilerArgs final @@ -383,7 +397,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline SEntry(const std::string_view _mainFileContents, const SCompilerOptions& compilerOptions) : mainFileContents(std::move(std::string(_mainFileContents))), compilerArgs(compilerOptions) { // Form the hashable for the compiler data - size_t preprocessorArgsHashableSize = compilerArgs.preprocessorArgs.sourceIdentifier.size() + compilerArgs.preprocessorArgs.extraDefines.size() * sizeof(SMacroDefinition); + size_t preprocessorArgsHashableSize = compilerArgs.preprocessorArgs.sourceIdentifier.size() + compilerArgs.preprocessorArgs.extraDefines.size() * sizeof(SMacroDefinition) + 3u; size_t compilerArgsHashableSize = sizeof(compilerArgs.stage) + sizeof(compilerArgs.targetSpirvVersion) + sizeof(compilerArgs.debugInfoFlags.value) + compilerArgs.optimizerPasses.size(); std::vector hashable; hashable.reserve(preprocessorArgsHashableSize + compilerArgsHashableSize + mainFileContents.size()); @@ -397,6 +411,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted } for (const auto& inc : compilerArgs.preprocessorArgs.forceIncludes) hashable.insert(hashable.end(), inc.begin(), inc.end()); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.preserveComments)); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.emitLineDirectives)); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.emitPragmaDirectives)); // Insert rest of stuff from this struct. We're going to treat stage, targetSpirvVersion and debugInfoFlags.value as byte arrays for simplicity hashable.insert(hashable.end(), reinterpret_cast(&compilerArgs.stage), reinterpret_cast(&compilerArgs.stage) + sizeof(compilerArgs.stage)); @@ -480,9 +497,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted return m_defaultCompression; } - NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; - NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder) const; - NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies = true, bool* depsUpdated = nullptr) const; + NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder, bool fastSafeValidation = false) const; + NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder, bool fastSafeValidation = false) const; + NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies = true, bool* depsUpdated = nullptr, bool fastSafeValidation = false) const; NBL_API2 core::smart_refctd_ptr decompressEntry(const SEntry& entry) const; inline CCache() {} @@ -515,7 +532,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted EntrySet m_container; ECompression m_defaultCompression = ECompression::LZMA; - NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated) const; + NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const; }; class CPreprocessCache final : public IReferenceCounted @@ -582,7 +599,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted NBL_API2 static bool writeToFile(const system::path& path, const CPreprocessCache& cache); NBL_API2 static SProbeResult probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions); NBL_API2 static const char* getProbeReason(EProbeStatus status); - NBL_API2 bool validateDependencies(const CIncludeFinder* finder, bool* depsUpdated = nullptr) const; + NBL_API2 bool validateDependencies(const CIncludeFinder* finder, bool* depsUpdated = nullptr, bool fastSafeValidation = false) const; NBL_API2 std::string buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const; private: diff --git a/include/nbl/core/string/SpirvKeyHelpers.h b/include/nbl/core/string/SpirvKeyHelpers.h index c9f3150c2d..51939f255d 100644 --- a/include/nbl/core/string/SpirvKeyHelpers.h +++ b/include/nbl/core/string/SpirvKeyHelpers.h @@ -24,6 +24,38 @@ struct SpirvKeyBuilder } }; +template +struct SpirvFileKeyBuilder +{ + template + static constexpr auto build(const Args&... args) + { + return SpirvKeyBuilder::build(args...); + } + + template + static constexpr auto build_from_device(const Device* device, const Args&... args) + { + return SpirvKeyBuilder::build_from_device(device, args...); + } +}; + +template +struct SpirvEntrypointBuilder +{ + template + static constexpr void build(const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } + + template + static constexpr void build_from_device(const Device*, const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } +}; + template concept spirv_device_has_limits = requires(const Device* device) { diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 2bb4d2bea9..1243abff53 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -412,27 +412,11 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (!dependenciesOut) dependenciesOut = &localDependencies; - // HACK: we do a pre-pre-process here to add \n after every #pragma to neutralize boost::wave's actions - // See https://github.com/Devsh-Graphics-Programming/Nabla/issues/746 - pragmaStart = clock_t::now(); - size_t extra_newlines = 0; - size_t line_start = 0; - for (size_t i = 0; i < code.size(); ++i) + if (code.find("#pragma") != std::string::npos) { - if (code[i] != '\n') - continue; - size_t j = line_start; - while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) - ++j; - if (j + 7 <= i && code.compare(j, 7, "#pragma") == 0) - ++extra_newlines; - line_start = i + 1; - } - if (extra_newlines) - { - std::string patched; - patched.reserve(code.size() + extra_newlines); - line_start = 0; + pragmaStart = clock_t::now(); + size_t extra_newlines = 0; + size_t line_start = 0; for (size_t i = 0; i < code.size(); ++i) { if (code[i] != '\n') @@ -440,17 +424,34 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE size_t j = line_start; while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) ++j; - const bool is_pragma = (j + 7 <= i) && (code.compare(j, 7, "#pragma") == 0); - patched.append(code, line_start, i - line_start + 1); - if (is_pragma) - patched.push_back('\n'); + if (j + 7 <= i && code.compare(j, 7, "#pragma") == 0) + ++extra_newlines; line_start = i + 1; } - if (line_start < code.size()) - patched.append(code, line_start, code.size() - line_start); - code = std::move(patched); + if (extra_newlines) + { + std::string patched; + patched.reserve(code.size() + extra_newlines); + line_start = 0; + for (size_t i = 0; i < code.size(); ++i) + { + if (code[i] != '\n') + continue; + size_t j = line_start; + while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) + ++j; + const bool is_pragma = (j + 7 <= i) && (code.compare(j, 7, "#pragma") == 0); + patched.append(code, line_start, i - line_start + 1); + if (is_pragma) + patched.push_back('\n'); + line_start = i + 1; + } + if (line_start < code.size()) + patched.append(code, line_start, code.size() - line_start); + code = std::move(patched); + } + pragmaEnd = clock_t::now(); } - pragmaEnd = clock_t::now(); // preprocess waveStart = clock_t::now(); diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 605cee6c74..c6fc9d7946 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -56,8 +56,18 @@ struct IncludeCacheEntry std::unordered_map g_includeCache; std::mutex g_includeCacheMutex; +std::unordered_map g_canonicalCache; +std::mutex g_canonicalCacheMutex; #ifdef NBL_EMBED_BUILTIN_RESOURCES +struct BuiltinIncludeCacheEntry +{ + nbl::asset::IShaderCompiler::IIncludeLoader::found_t value; +}; + +std::unordered_map g_builtinIncludeCache; +std::mutex g_builtinIncludeCacheMutex; + inline bool tryGetBuiltinResource(const std::string& normalized, const nbl::system::SBuiltinFile*& outFile, std::string& outRel, std::string_view& outPrefix) { auto tryNamespace = [&](std::string_view prefix, const nbl::system::SBuiltinFile& (*getResource)(const std::string&)) -> bool @@ -125,6 +135,13 @@ class CBuiltinArchiveIncludeLoader final : public nbl::asset::IShaderCompiler::I normalized = (nbl::system::path(search) / includeName).generic_string(); } + { + std::lock_guard lock(g_builtinIncludeCacheMutex); + const auto it = g_builtinIncludeCache.find(normalized); + if (it != g_builtinIncludeCache.end()) + return it->second.value; + } + const nbl::system::SBuiltinFile* resource = nullptr; std::string rel; std::string_view prefix; @@ -140,6 +157,10 @@ class CBuiltinArchiveIncludeLoader final : public nbl::asset::IShaderCompiler::I ret.hasHash = true; ret.fileSize = resource->size; ret.hasFileInfo = false; + { + std::lock_guard lock(g_builtinIncludeCacheMutex); + g_builtinIncludeCache.emplace(normalized, BuiltinIncludeCacheEntry{ ret }); + } return ret; } }; @@ -287,7 +308,7 @@ inline void collectFileInfoMismatchesParallel(const DepContainer& deps, std::vec if (threads > fileCount) threads = static_cast(fileCount); - if (threads <= 1u || fileCount < 32u) + if (threads <= 1u || fileCount < 64u) { for (size_t k = 0; k < fileCount; ++k) { @@ -464,7 +485,7 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons if (options.readCache) { - auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder, true, nullptr); + auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder, true, nullptr, options.preprocessorOptions.fastSafeValidation); if (found != options.readCache->m_container.end()) { if (options.cacheHit) @@ -561,8 +582,25 @@ IShaderCompiler::CFileSystemIncludeLoader::CFileSystemIncludeLoader(core::smart_ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName) const -> found_t { system::path path = searchPath / includeName; - if (std::filesystem::exists(path)) - path = std::filesystem::canonical(path); + if (!path.empty()) + { + const auto rawPath = path; + { + std::lock_guard lock(g_canonicalCacheMutex); + const auto it = g_canonicalCache.find(rawPath); + if (it != g_canonicalCache.end()) + path = it->second; + } + if (path == rawPath && std::filesystem::exists(path)) + { + auto canonicalPath = std::filesystem::canonical(path); + { + std::lock_guard lock(g_canonicalCacheMutex); + g_canonicalCache.emplace(rawPath, canonicalPath); + } + path = std::move(canonicalPath); + } + } uint64_t fileSize = 0; int64_t lastWriteTime = 0; @@ -816,24 +854,24 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in return {}; } -core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool fastSafeValidation) const { - const auto found = find_impl(mainFile, finder, true, nullptr); + const auto found = find_impl(mainFile, finder, true, nullptr, fastSafeValidation); if (found==m_container.end()) return nullptr; return found->decompressShader(); } -bool IShaderCompiler::CCache::contains(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +bool IShaderCompiler::CCache::contains(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool fastSafeValidation) const { - return find_impl(mainFile, finder, true, nullptr) != m_container.end(); + return find_impl(mainFile, finder, true, nullptr, fastSafeValidation) != m_container.end(); } -bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies, bool* depsUpdated) const +bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const { const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; const CCache::SEntry entry(cacheCode, options); - const auto found = find_impl(entry, finder, validateDependencies, depsUpdated); + const auto found = find_impl(entry, finder, validateDependencies, depsUpdated, fastSafeValidation); if (found == m_container.end()) return false; outEntry = SEntry(*found); @@ -845,7 +883,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::decompressEntry( return entry.decompressShader(); } -IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated) const +IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const { auto found = m_container.find(mainFile); if (found == m_container.end() || !validateDependencies) @@ -862,6 +900,8 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ collectFileInfoMismatchesParallel(found->dependencies, mismatches, system); if (mismatches.empty()) return found; + if (fastSafeValidation) + return m_container.end(); if (!finder) return m_container.end(); @@ -1281,7 +1321,7 @@ bool IShaderCompiler::probeShaderCache(const CCache* cache, std::string_view cod return false; const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; const CCache::SEntry entry(cacheCode, options); - return cache->contains(entry, finder); + return cache->contains(entry, finder, options.preprocessorOptions.fastSafeValidation); } bool IShaderCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const @@ -1319,6 +1359,11 @@ IShaderCompiler::CPreprocessCache::SProbeResult IShaderCompiler::CPreprocessCach { core::blake3_hasher hasher; hasher.update(result.prefix.data(), result.prefix.size()); + const uint8_t waveFlags = + (static_cast(preprocessOptions.preserveComments) << 0u) | + (static_cast(preprocessOptions.emitLineDirectives) << 1u) | + (static_cast(preprocessOptions.emitPragmaDirectives) << 2u); + hasher.update(&waveFlags, sizeof(waveFlags)); result.prefixHash = static_cast(hasher); } const bool hasEntry = cache && cache->hasEntry(); @@ -1342,7 +1387,7 @@ IShaderCompiler::CPreprocessCache::SProbeResult IShaderCompiler::CPreprocessCach return result; } bool depsUpdated = false; - const bool depsValid = cache->validateDependencies(finder, &depsUpdated); + const bool depsValid = cache->validateDependencies(finder, &depsUpdated, preprocessOptions.fastSafeValidation); result.depsUpdated = depsUpdated; if (prefixMatch && depsValid) { @@ -1868,7 +1913,7 @@ bool IShaderCompiler::CPreprocessCache::writeToFile(const system::path& path, co return bool(out); } -bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder, bool* depsUpdated) const +bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder, bool* depsUpdated, bool fastSafeValidation) const { if (!m_hasEntry || !finder) return false; @@ -1882,6 +1927,8 @@ bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinde collectFileInfoMismatchesParallel(m_entry.dependencies, mismatches, system); if (mismatches.empty()) return true; + if (fastSafeValidation) + return false; std::unordered_map fileStatus; std::unordered_map logicalStatus; diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index a5024a7d05..b32a975554 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -37,6 +37,9 @@ inline void to_json(json& j, const SEntry::SPreprocessorArgs& preprocArgs) { "sourceIdentifier", preprocArgs.sourceIdentifier }, { "extraDefines", preprocArgs.extraDefines}, { "forceIncludes", preprocArgs.forceIncludes}, + { "preserveComments", preprocArgs.preserveComments}, + { "emitLineDirectives", preprocArgs.emitLineDirectives}, + { "emitPragmaDirectives", preprocArgs.emitPragmaDirectives}, }; } @@ -45,6 +48,12 @@ inline void from_json(const json& j, SEntry::SPreprocessorArgs& preprocArgs) j.at("sourceIdentifier").get_to(preprocArgs.sourceIdentifier); j.at("extraDefines").get_to(preprocArgs.extraDefines); j.at("forceIncludes").get_to(preprocArgs.forceIncludes); + if (j.contains("preserveComments")) + j.at("preserveComments").get_to(preprocArgs.preserveComments); + if (j.contains("emitLineDirectives")) + j.at("emitLineDirectives").get_to(preprocArgs.emitLineDirectives); + if (j.contains("emitPragmaDirectives")) + j.at("emitPragmaDirectives").get_to(preprocArgs.emitPragmaDirectives); } // Optimizer pass has its own method for easier vector serialization diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index ca8494e4ec..e2d457ad02 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "nbl/asset/metadata/CHLSLMetadata.h" @@ -1453,7 +1454,7 @@ class ShaderCompiler final : public IApplicationFramework reserveSize += def.identifier.size() + def.definition.size(); for (const auto& inc : options.preprocessorOptions.forceIncludes) reserveSize += inc.size(); - reserveSize += sizeof(options.stage) + sizeof(options.preprocessorOptions.targetSpirvVersion) + sizeof(options.debugInfoFlags.value); + reserveSize += sizeof(options.stage) + sizeof(options.preprocessorOptions.targetSpirvVersion) + sizeof(options.debugInfoFlags.value) + 3u; reserveSize += cacheCode.size(); std::vector defines; @@ -1476,6 +1477,9 @@ class ShaderCompiler final : public IApplicationFramework } for (const auto& inc : options.preprocessorOptions.forceIncludes) hashable.insert(hashable.end(), inc.begin(), inc.end()); + hashable.push_back(static_cast(options.preprocessorOptions.preserveComments)); + hashable.push_back(static_cast(options.preprocessorOptions.emitLineDirectives)); + hashable.push_back(static_cast(options.preprocessorOptions.emitPragmaDirectives)); const auto stage = options.stage; const auto spirvVersion = options.preprocessorOptions.targetSpirvVersion; @@ -1520,6 +1524,9 @@ class ShaderCompiler final : public IApplicationFramework for (const auto& inc : options.preprocessorOptions.forceIncludes) forceIncludes.push_back(inc); pre["forceIncludes"] = std::move(forceIncludes); + pre["preserveComments"] = options.preprocessorOptions.preserveComments; + pre["emitLineDirectives"] = options.preprocessorOptions.emitLineDirectives; + pre["emitPragmaDirectives"] = options.preprocessorOptions.emitPragmaDirectives; json j; j["shaderStage"] = static_cast(options.stage); @@ -1871,14 +1878,20 @@ class ShaderCompiler final : public IApplicationFramework std::string_view code(codePtr, codeSize); if (!code.empty() && code.back() == '\0') code.remove_suffix(1); + const bool useShaderCache = shaderCache.enabled && !preprocessOnly; + const bool usePreCache = preCache.enabled && !preprocessOnly; + const bool needCacheKey = useShaderCache || usePreCache; CHLSLCompiler::SPreprocessorOptions preOpt = {}; preOpt.sourceIdentifier = sourceIdentifier; preOpt.logger = m_logger.get(); preOpt.forceIncludes = std::span(m_force_includes); preOpt.depfile = false; preOpt.depfilePath = dep.path; + preOpt.preserveComments = preprocessOnly; + preOpt.emitLineDirectives = preprocessOnly; + preOpt.fastSafeValidation = useShaderCache || usePreCache; std::string codeForCacheStorage; - if (!sourceIdentifier.empty()) + if (needCacheKey && !sourceIdentifier.empty()) { uint64_t srcSize = 0; int64_t srcTime = 0; @@ -1900,7 +1913,7 @@ class ShaderCompiler final : public IApplicationFramework } } } - if (preOpt.codeForCache.empty()) + if (needCacheKey && preOpt.codeForCache.empty()) preOpt.codeForCache = code; CHLSLCompiler::SOptions opt = {}; @@ -1916,8 +1929,6 @@ class ShaderCompiler final : public IApplicationFramework return writeBinaryFile(m_system.get(), std::filesystem::path(std::string(path)), contents.data(), contents.size()); }; - const bool useShaderCache = shaderCache.enabled && !preprocessOnly; - const bool usePreCache = preCache.enabled && !preprocessOnly; const bool validateCacheDeps = true; if (reportEnabled) { @@ -2037,7 +2048,7 @@ class ShaderCompiler final : public IApplicationFramework { auto* finder = getFinder(); const auto validateStart = clock_t::now(); - shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder, shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated); + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder, shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated, opt.preprocessorOptions.fastSafeValidation); const auto validateEnd = clock_t::now(); shaderProbe.entryReady = shaderProbe.hit; shaderProbe.validateDuration = validateEnd - validateStart; @@ -2113,7 +2124,7 @@ class ShaderCompiler final : public IApplicationFramework cacheObj->setEntry(std::move(entry)); bool depsUpdated = false; - const bool depsValid = cacheObj->validateDependencies(finder, &depsUpdated); + const bool depsValid = cacheObj->validateDependencies(finder, &depsUpdated, preOpt.fastSafeValidation); if (depsValid) { IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); @@ -2127,12 +2138,20 @@ class ShaderCompiler final : public IApplicationFramework preProbe.result.cacheUpdated = depsUpdated; preProbe.result.status = IShaderCompiler::CPreprocessCache::EProbeStatus::Hit; preProbe.result.stage = stageOverrideThread; - preProbe.result.code = cacheObj->buildCombinedCode(codeProbe.body, sourceIdentifier); - preProbe.ok = !preProbe.result.code.empty(); - if (preProbe.ok) + if (preCache.preamble) + { + preProbe.ok = true; preProbe.duration = clock_t::now() - start; + } else - preIndexHit = false; + { + preProbe.result.code = cacheObj->buildCombinedCode(codeProbe.body, sourceIdentifier); + preProbe.ok = !preProbe.result.code.empty(); + if (preProbe.ok) + preProbe.duration = clock_t::now() - start; + else + preIndexHit = false; + } } else { @@ -2407,7 +2426,7 @@ class ShaderCompiler final : public IApplicationFramework { stageOverride = preProbe.result.stage; preCacheObj = preProbe.cacheObj; - if (!preCache.preamble || !preProbe.result.cacheHit) + if (!preCache.preamble) { preprocessedCode = std::move(preProbe.result.code); preprocessedReady = true; @@ -2469,7 +2488,7 @@ class ShaderCompiler final : public IApplicationFramework if (ensureFullCacheForWrite(cacheObj)) { IShaderCompiler::CCache::SEntry fullEntry; - if (cacheObj->findEntryForCode(code, opt, nullptr, fullEntry, false, nullptr)) + if (cacheObj->findEntryForCode(code, opt, nullptr, fullEntry, false, nullptr, false)) { fullEntry.dependencies.clear(); fullEntry.dependencies.reserve(shaderProbe.entry.dependencies.size()); @@ -2583,25 +2602,6 @@ class ShaderCompiler final : public IApplicationFramework std::string bodyPreprocessed; if (!body.empty()) { - const auto bodyHasDirective = [](std::string_view text) -> bool - { - size_t pos = 0; - while (pos < text.size()) - { - size_t lineEnd = text.find('\n', pos); - if (lineEnd == std::string_view::npos) - lineEnd = text.size(); - size_t i = pos; - while (i < lineEnd && (text[i] == ' ' || text[i] == '\t' || text[i] == '\r')) - ++i; - if (i < lineEnd && text[i] == '#') - { - return true; - } - pos = lineEnd + 1; - } - return false; - }; const auto macroName = [](const std::string& macro) -> std::string_view { std::string_view name(macro); @@ -2615,8 +2615,29 @@ class ShaderCompiler final : public IApplicationFramework name.remove_suffix(1); return name; }; - const auto bodyUsesMacros = [&](std::string_view text) -> bool + struct StringViewHash { + size_t operator()(std::string_view value) const noexcept + { + return std::hash{}(value); + } + }; + struct BodyScanResult + { + bool hasDirective = false; + bool usesMacro = false; + bool hasInclude = false; + }; + const auto buildMacroNameSet = [&](const std::vector& macros) + { + std::unordered_set names; + names.reserve(macros.size() + 8); + for (const auto& macro : macros) + { + const std::string_view name = macroName(macro); + if (!name.empty()) + names.emplace(name); + } static constexpr std::string_view kBuiltinMacros[] = { "__LINE__", @@ -2627,20 +2648,136 @@ class ShaderCompiler final : public IApplicationFramework "__TIMESTAMP__" }; for (const auto builtin : kBuiltinMacros) + names.emplace(builtin); + return names; + }; + const auto scanBody = [&](std::string_view text, const std::unordered_set& macroNames) -> BodyScanResult + { + BodyScanResult result = {}; + bool atLineStart = true; + bool inLineComment = false; + bool inBlockComment = false; + bool inString = false; + char stringDelim = 0; + + auto isIdentStart = [](char c) { return (c == '_') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); }; + auto isIdentChar = [](char c) { return (c == '_') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); }; + + size_t i = 0; + while (i < text.size()) { - if (text.find(builtin) != std::string_view::npos) - return true; - } - for (const auto& macro : entry.macroDefs) - { - const std::string_view name = macroName(macro); - if (!name.empty() && text.find(name) != std::string_view::npos) - return true; + const char c = text[i]; + const char next = (i + 1 < text.size()) ? text[i + 1] : '\0'; + + if (inLineComment) + { + if (c == '\n') + { + inLineComment = false; + atLineStart = true; + } + ++i; + continue; + } + if (inBlockComment) + { + if (c == '*' && next == '/') + { + inBlockComment = false; + i += 2; + continue; + } + if (c == '\n') + atLineStart = true; + else + atLineStart = false; + ++i; + continue; + } + if (inString) + { + if (c == '\\' && i + 1 < text.size()) + { + i += 2; + continue; + } + if (c == stringDelim) + inString = false; + if (c == '\n') + atLineStart = true; + else + atLineStart = false; + ++i; + continue; + } + + if (c == '/' && next == '/') + { + inLineComment = true; + i += 2; + continue; + } + if (c == '/' && next == '*') + { + inBlockComment = true; + i += 2; + continue; + } + if (c == '"' || c == '\'') + { + inString = true; + stringDelim = c; + atLineStart = false; + ++i; + continue; + } + + if (c == '\n') + { + atLineStart = true; + ++i; + continue; + } + + if (atLineStart) + { + if (c == ' ' || c == '\t' || c == '\r') + { + ++i; + continue; + } + if (c == '#') + { + result.hasDirective = true; + size_t j = i + 1; + while (j < text.size() && (text[j] == ' ' || text[j] == '\t')) + ++j; + if (j + 7 <= text.size() && text.compare(j, 7, "include") == 0) + result.hasInclude = true; + } + atLineStart = false; + } + + if (isIdentStart(c)) + { + size_t j = i + 1; + while (j < text.size() && isIdentChar(text[j])) + ++j; + const std::string_view ident(text.data() + i, j - i); + if (!macroNames.empty() && macroNames.find(ident) != macroNames.end()) + result.usesMacro = true; + i = j; + continue; + } + + atLineStart = false; + ++i; } - return false; + return result; }; - const bool hasDirective = bodyHasDirective(body); - const bool needsPreprocess = hasDirective || bodyUsesMacros(body); + const auto macroNames = buildMacroNameSet(entry.macroDefs); + const auto scan = scanBody(body, macroNames); + const bool needsPreprocess = scan.hasDirective || scan.usesMacro; if (needsPreprocess) { if (!entry.macroBlock.empty()) @@ -2678,31 +2815,7 @@ class ShaderCompiler final : public IApplicationFramework bodyOpt.extraDefines = {}; } - const auto bodyHasInclude = [](std::string_view text) -> bool - { - size_t pos = 0; - while (pos < text.size()) - { - size_t lineEnd = text.find('\n', pos); - if (lineEnd == std::string_view::npos) - lineEnd = text.size(); - size_t i = pos; - while (i < lineEnd && (text[i] == ' ' || text[i] == '\t' || text[i] == '\r')) - ++i; - if (i < lineEnd && text[i] == '#') - { - ++i; - while (i < lineEnd && (text[i] == ' ' || text[i] == '\t')) - ++i; - if (lineEnd - i >= 7 && text.compare(i, 7, "include") == 0) - return true; - } - pos = lineEnd + 1; - } - return false; - }; - const bool hasInclude = bodyHasInclude(body); - auto* bodyDepsOut = hasInclude ? &bodyDeps : nullptr; + auto* bodyDepsOut = scan.hasInclude ? &bodyDeps : nullptr; const auto preambleFinderStart = clock_t::now(); auto* finder = getFinder(); preambleFinderDuration = clock_t::now() - preambleFinderStart; @@ -2942,7 +3055,7 @@ class ShaderCompiler final : public IApplicationFramework else if (cacheObj) { const auto depLookupStart = clock_t::now(); - const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder, depEntry, validateCacheDeps); + const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder, depEntry, validateCacheDeps, nullptr, opt.preprocessorOptions.fastSafeValidation); const auto depLookupEnd = clock_t::now(); if (verbose) {