diff --git a/.gitignore b/.gitignore index c7bbb2808e..9fd3357b9f 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,8 @@ tools/nsc/bin/* */__pycache__/* __pycache__/* *.pyc +Testing/ +nsc_cache_lzma/ +nsc_cache_*/ +_tmp/ diff --git a/3rdparty/boost/CMakeLists.txt b/3rdparty/boost/CMakeLists.txt index 1e7189fce1..131b287eed 100644 --- a/3rdparty/boost/CMakeLists.txt +++ b/3rdparty/boost/CMakeLists.txt @@ -120,10 +120,11 @@ endforeach() # include will lead to ABI mismatch hence we update the target and let inherit options target_compile_definitions(boost_wave PUBLIC BOOST_WAVE_ENABLE_COMMANDLINE_MACROS=1 - PUBLIC BOOST_WAVE_SUPPORT_PRAGMA_ONCE=0 + PUBLIC BOOST_WAVE_SUPPORT_PRAGMA_ONCE=1 PUBLIC BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES=1 PUBLIC BOOST_WAVE_SERIALIZATION=0 PUBLIC BOOST_WAVE_SUPPORT_INCLUDE_NEXT=0 + PUBLIC BOOST_WAVE_USE_STRICT_LEXER=0 # threading option: https://github.com/boostorg/wave/issues/237#issuecomment-2710251773 PUBLIC BOOST_WAVE_SUPPORT_THREADING=0 @@ -131,4 +132,4 @@ target_compile_definitions(boost_wave set(NBL_BOOST_TARGETS ${NBL_BOOST_TARGETS} -PARENT_SCOPE) \ No newline at end of file +PARENT_SCOPE) diff --git a/CMakeLists.txt b/CMakeLists.txt index 773c9c3563..49b2dc8eed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ project(Nabla LANGUAGES CXX C ) enable_language(C CXX ASM ASM_NASM) +enable_testing() include(GNUInstallDirs) include(CMakePackageConfigHelpers) diff --git a/cmake/common.cmake b/cmake/common.cmake index 2de6dc758f..2aad3c9b6f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1143,6 +1143,15 @@ option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) +option(NBL_NSC_DISABLE_CUSTOM_COMMANDS "Disable NSC custom commands" OFF) +option(NBL_NSC_VERBOSE "Enable NSC verbose logging to .log" ON) +option(NSC_SHADER_CACHE "Enable NSC shader cache" ON) +set(NSC_SHADER_CACHE_COMPRESSION "raw" CACHE STRING "NSC shader cache compression (raw or lzma)") +option(NSC_PREPROCESS_CACHE "Enable NSC preprocess cache" ON) +option(NSC_PREPROCESS_PREAMBLE "Enable NSC preprocess preamble" ON) +option(NSC_STDOUT_LOG "Mirror NSC log to stdout" OFF) +option(NSC_JSON_REPORT "Write NSC JSON report alongside outputs" OFF) +set(NSC_CACHE_DIR "" CACHE PATH "Optional root directory for NSC cache files (shader/preprocess)") function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") @@ -1152,18 +1161,13 @@ function(NBL_CREATE_NSC_COMPILE_RULES) // -> @COMMENT@! #ifndef _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ #define _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ -#ifdef __HLSL_VERSION #include struct DeviceConfigCaps { @CAPS_EVAL@ }; - -#include "@TARGET_INPUT@" - -#endif // __HLSL_VERSION -#endif // _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ // <- @COMMENT@! +#endif // _PERMUTATION_CAPS_AUTO_GEN_GLOBALS_INCLUDED_ ]=]) @@ -1207,21 +1211,29 @@ struct DeviceConfigCaps list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=vulkan-with-source>) endif() + list(APPEND REQUIRED_OPTIONS + -I "${NBL_ROOT_PATH}/include" + -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" + -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" + -I "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" + ) if(NOT NBL_EMBED_BUILTIN_RESOURCES) - list(APPEND REQUIRED_OPTIONS - -no-nbl-builtins - -I "${NBL_ROOT_PATH}/include" - -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" - -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" - -I "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" - ) + list(APPEND REQUIRED_OPTIONS -no-nbl-builtins) endif() set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) - set(OPTIONAL_SINGLE_ARGS GLOB_DIR) - cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) + set(OPTIONAL_SINGLE_ARGS GLOB_DIR EXPORT_RULES) + cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB;DISABLE_CUSTOM_COMMANDS;UNITY_BUILD" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS;ENTRYPOINTS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) + set(_NBL_DISABLE_CUSTOM_COMMANDS FALSE) + if(NBL_NSC_DISABLE_CUSTOM_COMMANDS OR IMPL_DISABLE_CUSTOM_COMMANDS) + set(_NBL_DISABLE_CUSTOM_COMMANDS TRUE) + endif() + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_RULE_INDEX 0) + endif() + set(IMPL_HLSL_GLOB "") if(NOT IMPL_DISCARD_DEFAULT_GLOB) set(GLOB_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") @@ -1250,7 +1262,17 @@ struct DeviceConfigCaps if(NOT HEADER_RULE_GENERATED) set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include/$") set(INCLUDE_FILE "${INCLUDE_DIR}/$") - set(INCLUDE_CONTENT $) + set(NBL_HEADER_GUARD_RAW "${IMPL_TARGET}_${IMPL_NAMESPACE}_SPIRV_KEYS_HPP_INCLUDED") + string(SHA1 NBL_HEADER_GUARD_HASH "${NBL_HEADER_GUARD_RAW}") + string(TOUPPER "${NBL_HEADER_GUARD_HASH}" NBL_HEADER_GUARD_HASH_UPPER) + set(NBL_HEADER_GUARD "SPIRV_KEYS_${NBL_HEADER_GUARD_HASH_UPPER}_HPP_INCLUDED") + set(INCLUDE_CONTENT_TEMPLATE [=[ +#ifndef @NBL_HEADER_GUARD@ +#define @NBL_HEADER_GUARD@ +$ +#endif +]=]) + string(CONFIGURE "${INCLUDE_CONTENT_TEMPLATE}" INCLUDE_CONTENT @ONLY) file(GENERATE OUTPUT ${INCLUDE_FILE} CONTENT $ @@ -1260,7 +1282,7 @@ struct DeviceConfigCaps target_sources(${IMPL_TARGET} PUBLIC ${INCLUDE_FILE}) set_source_files_properties(${INCLUDE_FILE} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None + GENERATED TRUE ) target_compile_definitions(${IMPL_TARGET} INTERFACE $) @@ -1268,7 +1290,10 @@ struct DeviceConfigCaps set_target_properties(${IMPL_TARGET} PROPERTIES NBL_HEADER_GENERATED_RULE ON) set(HEADER_ITEM_VIEW [=[ -#include "nabla.h" +#include +#include +#include "nbl/core/hash/fnv1a64.h" +#include "nbl/core/string/SpirvKeyHelpers.h" ]=]) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_VIEW}") @@ -1283,13 +1308,32 @@ struct DeviceConfigCaps if(NOT NS_IMPL_KEYS_PROPERTY_DEFINED) set(HEADER_ITEM_VIEW [=[ namespace @IMPL_NAMESPACE@ { - template - inline const nbl::core::string get_spirv_key(const nbl::video::SPhysicalDeviceLimits& limits, const nbl::video::SPhysicalDeviceFeatures& features); + template + requires ((... && !std::is_pointer_v>)) + inline constexpr typename nbl::core::detail::StringLiteralBufferType::type get_spirv_key(const Args&... args) + { + return nbl::core::detail::SpirvFileKeyBuilder::build(args...); + } + + template + inline std::string get_spirv_key(const Device* device, const Args&... args) + { + const auto key = nbl::core::detail::SpirvFileKeyBuilder::build_from_device(device, args...); + return std::string(key.view()); + } - template - inline const nbl::core::string get_spirv_key(const nbl::video::ILogicalDevice* device) + template + requires ((... && !std::is_pointer_v>)) + inline constexpr auto get_spirv_entrypoint(const Args&... args) { - return get_spirv_key(device->getPhysicalDevice()->getLimits(), device->getEnabledFeatures()); + return nbl::core::detail::SpirvEntrypointBuilder::build(args...); + } + + template + inline std::string get_spirv_entrypoint(const Device* device, const Args&... args) + { + const auto entry = nbl::core::detail::SpirvEntrypointBuilder::build_from_device(device, args...); + return std::string(entry.view()); } } @@ -1368,67 +1412,413 @@ namespace @IMPL_NAMESPACE@ { ) endfunction() + macro(NBL_NSC_RESOLVE_CAP_KIND _CAP_KIND_RAW _CAP_STRUCT _CAP_NAME _OUT_KIND) + set(_CAP_KIND_RAW "${_CAP_KIND_RAW}") + set(_CAP_STRUCT "${_CAP_STRUCT}") + + if(_CAP_KIND_RAW STREQUAL "custom") + if(_CAP_STRUCT STREQUAL "") + ERROR_WHILE_PARSING_ITEM( + "CAPS entry with kind \"custom\" requires \"struct\".\n" + ) + endif() + set(${_OUT_KIND} "${_CAP_STRUCT}") + else() + set(${_OUT_KIND} "${_CAP_KIND_RAW}") + endif() + + if(NOT "${${_OUT_KIND}}" MATCHES "^[A-Za-z_][A-Za-z0-9_]*$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP kind \"${${_OUT_KIND}}\" for ${_CAP_NAME}\n" + "CAP kinds must be valid C/C++ identifiers." + ) + endif() + endmacro() + + macro(NBL_REQUIRE_PYTHON) + if(NOT Python3_EXECUTABLE) + find_package(Python3 COMPONENTS Interpreter REQUIRED) + endif() + endmacro() + + macro(NBL_NSC_HAS_LIB_PROFILE _OUT_VAR) + set(_NBL_HAS_LIB_PROFILE FALSE) + set(_NBL_SEEN_T FALSE) + foreach(_NBL_OPT IN LISTS COMPILE_OPTIONS IMPL_COMMON_OPTIONS) + if(_NBL_OPT MATCHES "^\\$<") + continue() + endif() + if(_NBL_SEEN_T) + if(_NBL_OPT MATCHES "^lib_") + set(_NBL_HAS_LIB_PROFILE TRUE) + endif() + set(_NBL_SEEN_T FALSE) + elseif(_NBL_OPT STREQUAL "-T") + set(_NBL_SEEN_T TRUE) + elseif(_NBL_OPT MATCHES "^-Tlib_") + set(_NBL_HAS_LIB_PROFILE TRUE) + endif() + endforeach() + set(${_OUT_VAR} ${_NBL_HAS_LIB_PROFILE}) + endmacro() + + macro(NBL_NSC_HAS_ENTRYPOINT_OPTION _OUT_VAR) + set(_NBL_HAS_ENTRYPOINT FALSE) + set(_NBL_SEEN_E FALSE) + foreach(_NBL_OPT IN LISTS COMPILE_OPTIONS IMPL_COMMON_OPTIONS) + if(_NBL_OPT MATCHES "^\\$<") + continue() + endif() + if(_NBL_SEEN_E) + set(_NBL_HAS_ENTRYPOINT TRUE) + set(_NBL_SEEN_E FALSE) + elseif(_NBL_OPT STREQUAL "-E") + set(_NBL_SEEN_E TRUE) + elseif(_NBL_OPT MATCHES "^-E.+") + set(_NBL_HAS_ENTRYPOINT TRUE) + endif() + endforeach() + set(${_OUT_VAR} ${_NBL_HAS_ENTRYPOINT}) + endmacro() + + if(IMPL_UNITY_BUILD) + if(NOT IMPL_ENTRYPOINTS) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires ENTRYPOINTS." + ) + endif() + set(_NBL_ENTRYPOINTS ${IMPL_ENTRYPOINTS}) + list(LENGTH _NBL_ENTRYPOINTS _NBL_ENTRYPOINT_COUNT) + if(_NBL_ENTRYPOINT_COUNT EQUAL 0) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires ENTRYPOINTS." + ) + endif() + foreach(_NBL_ENTRY IN LISTS _NBL_ENTRYPOINTS) + if(NOT _NBL_ENTRY MATCHES "^[A-Za-z_][A-Za-z0-9_]*$") + ERROR_WHILE_PARSING_ITEM( + "Invalid ENTRYPOINTS entry \"${_NBL_ENTRY}\".\n" + "Entrypoint names must be valid C identifiers." + ) + endif() + endforeach() + list(REMOVE_DUPLICATES _NBL_ENTRYPOINTS) + list(LENGTH _NBL_ENTRYPOINTS _NBL_ENTRYPOINTS_UNIQ_COUNT) + if(NOT _NBL_ENTRYPOINTS_UNIQ_COUNT EQUAL _NBL_ENTRYPOINT_COUNT) + ERROR_WHILE_PARSING_ITEM( + "ENTRYPOINTS contains duplicates." + ) + endif() + NBL_NSC_HAS_LIB_PROFILE(_NBL_HAS_LIB_PROFILE) + if(NOT _NBL_HAS_LIB_PROFILE) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD requires a lib_* profile (use -T lib_* in COMPILE_OPTIONS or COMMON_OPTIONS)." + ) + endif() + NBL_NSC_HAS_ENTRYPOINT_OPTION(_NBL_HAS_ENTRYPOINT_OPT) + if(_NBL_HAS_ENTRYPOINT_OPT) + ERROR_WHILE_PARSING_ITEM( + "UNITY_BUILD does not allow -E entrypoint options; use ENTRYPOINTS." + ) + endif() + endif() + + macro(NBL_NORMALIZE_FLOAT_LITERAL _CAP_NAME _VALUE _MANTISSA_DIGITS _TYPE_LABEL _OUT_VAR) + NBL_REQUIRE_PYTHON() + set(_NBL_RAW "${_VALUE}") + if(_TYPE_LABEL STREQUAL "float") + if("${_NBL_RAW}" MATCHES "^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?[fF]$") + string(REGEX REPLACE "[fF]$" "" _NBL_RAW "${_NBL_RAW}") + endif() + elseif(_TYPE_LABEL STREQUAL "double") + if("${_NBL_RAW}" MATCHES "^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?[dD]$") + string(REGEX REPLACE "[dD]$" "" _NBL_RAW "${_NBL_RAW}") + endif() + endif() + + set(_NBL_CANON_DONE FALSE) + if("${_NBL_RAW}" MATCHES "^[+-]?[0-9]\\.([0-9]+)e([+-][0-9]+)$") + set(_NBL_MANTISSA "${CMAKE_MATCH_1}") + set(_NBL_EXPONENT "${CMAKE_MATCH_2}") + string(LENGTH "${_NBL_MANTISSA}" _NBL_MANTISSA_LEN) + string(LENGTH "${_NBL_EXPONENT}" _NBL_EXPONENT_LEN) + math(EXPR _NBL_EXPONENT_DIGITS "${_NBL_EXPONENT_LEN} - 1") + if(_NBL_MANTISSA_LEN EQUAL ${_MANTISSA_DIGITS} AND _NBL_EXPONENT_DIGITS GREATER_EQUAL 2 AND _NBL_EXPONENT_DIGITS LESS_EQUAL 3) + string(TOLOWER "${_NBL_RAW}" _NBL_CANON) + set(_NBL_CANON_DONE TRUE) + endif() + endif() + + if(NOT _NBL_CANON_DONE) + set(_NBL_PY_SCRIPT [=[ +import sys,math,struct +t=sys.argv[1] +s=sys.argv[2] +if t=="float" and s[-1:] in ("f","F"): + s=s[:-1] +if t=="double" and s[-1:] in ("d","D"): + s=s[:-1] +try: + x=float(s) +except Exception: + sys.exit(2) +if t=="float": + x=struct.unpack("!f",struct.pack("!f",x))[0] +if not math.isfinite(x): + sys.exit(2) +p=8 if t=="float" else 16 +sign="-" if x<0 else "" +x=abs(x) +if x==0.0: + sys.stdout.write(sign+"0."+"0"*p+"e+00") + sys.exit(0) +m=x +e=0 +while m>=10.0: + m/=10.0 + e+=1 +while m<1.0: + m*=10.0 + e-=1 +digits=[0]*(p+1) +digits[0]=int(m) +frac=m-digits[0] +for i in range(1,p+1): + frac*=10.0 + d=int(frac) + if d>9: + d=9 + digits[i]=d + frac-=d +frac*=10.0 +rd=int(frac) +if rd>9: + rd=9 +rem=frac-rd +ru = rd>5 or (rd==5 and (rem>0 or (digits[p]%2))) +if ru: + i=p + while i>=0 and digits[i]==9: + digits[i]=0 + i-=1 + if i>=0: + digits[i]+=1 + else: + digits[0]=1 + for j in range(1,p+1): + digits[j]=0 + e+=1 +es="-" if e<0 else "+" +if e<0: + e=-e +ew=3 if e>=100 else 2 +sys.stdout.write(sign+str(digits[0])+"."+("".join(str(d) for d in digits[1:]))+"e"+es+str(e).zfill(ew)) +]=]) + execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "${_NBL_PY_SCRIPT}" "${_TYPE_LABEL}" "${_NBL_RAW}" + RESULT_VARIABLE _NBL_FMT_RESULT + OUTPUT_VARIABLE _NBL_CANON + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT _NBL_FMT_RESULT EQUAL 0) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${_VALUE}\" for ${_CAP_NAME}\n" + "${_TYPE_LABEL} values must be numbers or numeric strings." + ) + endif() + endif() + set(${_OUT_VAR} "${_NBL_CANON}") + endmacro() + + macro(NBL_HASH_SPIRV_KEY _VALUE _OUT_VAR) + NBL_REQUIRE_PYTHON() + set(_NBL_PY_HASH [=[ +import sys +s=sys.argv[1] +h=14695981039346656037 +for b in s.encode("utf-8"): + h^=b + h=(h*1099511628211)&0xFFFFFFFFFFFFFFFF +sys.stdout.write(str(h)) +]=]) + execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "${_NBL_PY_HASH}" "${_VALUE}" + RESULT_VARIABLE _NBL_HASH_RESULT + OUTPUT_VARIABLE _NBL_HASH_OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT _NBL_HASH_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to hash SPIR-V key \"${_VALUE}\"") + endif() + set(${_OUT_VAR} "${_NBL_HASH_OUT}") + endmacro() + set(CAP_NAMES "") set(CAP_TYPES "") set(CAP_KINDS "") + set(CAP_VALUES_INDEX 0) if(HAS_CAPS) math(EXPR LAST_CAP "${CAPS_LENGTH} - 1") foreach(CAP_IDX RANGE 0 ${LAST_CAP}) - string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) - string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) - string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + string(JSON MEMBERS_TYPE ERROR_VARIABLE MEMBERS_ERROR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members) + if(MEMBERS_TYPE STREQUAL "ARRAY") + string(JSON CAP_KIND_RAW ERROR_VARIABLE CAP_KIND_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) + if(CAP_KIND_ERROR) + set(CAP_KIND_RAW limits) + endif() - # -> TODO: improve validation, input should be string - if(CAP_TYPE_ERROR) - set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently) + string(JSON CAP_STRUCT ERROR_VARIABLE CAP_STRUCT_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} struct) + if(CAP_STRUCT_ERROR) + set(CAP_STRUCT "") + endif() + + NBL_NSC_RESOLVE_CAP_KIND("${CAP_KIND_RAW}" "${CAP_STRUCT}" "member group" CAP_KIND) + + string(JSON MEMBERS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members) + if(MEMBERS_LENGTH GREATER 0) + math(EXPR LAST_MEMBER "${MEMBERS_LENGTH} - 1") + foreach(MEMBER_IDX RANGE 0 ${LAST_MEMBER}) + string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} name) + string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} type) + + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t|int16_t|int32_t|int64_t|float|double)$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t, int16_t, int32_t, int64_t, float, double" + ) + endif() + + string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values) + + set(VALUES "") + math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") + foreach(VAL_IDX RANGE 0 ${LAST_VAL}) + string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) + string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} members ${MEMBER_IDX} values ${VAL_IDX}) + + if(CAP_TYPE STREQUAL "float") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Float values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 8 "float" VALUE) + elseif(CAP_TYPE STREQUAL "double") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Double values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 16 "double" VALUE) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." + ) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." + ) + endif() + + if(CAP_TYPE STREQUAL "bool") + if(NOT VALUE MATCHES "^[01]$") + ERROR_WHILE_PARSING_ITEM( + "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" + "Boolean CAPs can only have values 0 or 1." + ) + endif() + endif() + list(APPEND VALUES "${VALUE}") + endforeach() + + set(CAP_VALUES_${CAP_VALUES_INDEX} "${VALUES}") + list(APPEND CAP_NAMES "${CAP_NAME}") + list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") + math(EXPR CAP_VALUES_INDEX "${CAP_VALUES_INDEX} + 1") + endforeach() + endif() else() - if(NOT CAP_KIND MATCHES "^(limits|features)$") + if(NOT MEMBERS_ERROR) ERROR_WHILE_PARSING_ITEM( - "Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n" - "Allowed kinds are: limits, features" + "CAPS.members must be an array when provided." ) endif() - endif() - # <- - if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") - ERROR_WHILE_PARSING_ITEM( - "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" - "Allowed types are: bool, uint16_t, uint32_t, uint64_t" - ) - endif() + string(JSON CAP_KIND_RAW ERROR_VARIABLE CAP_KIND_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) + string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) + string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) - string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values) + if(CAP_KIND_ERROR) + set(CAP_KIND_RAW limits) # I assume its limit by default (or when invalid value present, currently) + endif() - set(VALUES "") - math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") - foreach(VAL_IDX RANGE 0 ${LAST_VAL}) - string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) - string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + string(JSON CAP_STRUCT ERROR_VARIABLE CAP_STRUCT_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} struct) + if(CAP_STRUCT_ERROR) + set(CAP_STRUCT "") + endif() - if(NOT VAL_TYPE STREQUAL "NUMBER") + NBL_NSC_RESOLVE_CAP_KIND("${CAP_KIND_RAW}" "${CAP_STRUCT}" "${CAP_NAME}" CAP_KIND) + + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t|int16_t|int32_t|int64_t|float|double)$") ERROR_WHILE_PARSING_ITEM( - "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" - "Use numbers for uint*_t and 0/1 for bools." + "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" + "Allowed types are: bool, uint16_t, uint32_t, uint64_t, int16_t, int32_t, int64_t, float, double" ) endif() - if(CAP_TYPE STREQUAL "bool") - if(NOT VALUE MATCHES "^[01]$") + string(JSON CAP_VALUES_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values) + + set(VALUES "") + math(EXPR LAST_VAL "${CAP_VALUES_LENGTH} - 1") + foreach(VAL_IDX RANGE 0 ${LAST_VAL}) + string(JSON VALUE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + string(JSON VAL_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} values ${VAL_IDX}) + + if(CAP_TYPE STREQUAL "float") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Float values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 8 "float" VALUE) + elseif(CAP_TYPE STREQUAL "double") + if(NOT (VAL_TYPE STREQUAL "STRING" OR VAL_TYPE STREQUAL "NUMBER")) + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP value \"${VALUE}\" for ${CAP_NAME}\n" + "Double values must be numbers or numeric strings." + ) + endif() + NBL_NORMALIZE_FLOAT_LITERAL("${CAP_NAME}" "${VALUE}" 16 "double" VALUE) + elseif(NOT VAL_TYPE STREQUAL "NUMBER") ERROR_WHILE_PARSING_ITEM( - "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" - "Boolean CAPs can only have values 0 or 1." + "Invalid CAP value \"${VALUE}\" for CAP \"${CAP_NAME}\" of type ${CAP_TYPE}\n" + "Use numbers for uint*_t and 0/1 for bools." ) endif() - endif() - list(APPEND VALUES "${VALUE}") - endforeach() + if(CAP_TYPE STREQUAL "bool") + if(NOT VALUE MATCHES "^[01]$") + ERROR_WHILE_PARSING_ITEM( + "Invalid bool value \"${VALUE}\" for ${CAP_NAME}\n" + "Boolean CAPs can only have values 0 or 1." + ) + endif() + endif() + list(APPEND VALUES "${VALUE}") + endforeach() - set(CAP_VALUES_${CAP_IDX} "${VALUES}") - list(APPEND CAP_NAMES "${CAP_NAME}") - list(APPEND CAP_TYPES "${CAP_TYPE}") - list(APPEND CAP_KINDS "${CAP_KIND}") + set(CAP_VALUES_${CAP_VALUES_INDEX} "${VALUES}") + list(APPEND CAP_NAMES "${CAP_NAME}") + list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") + math(EXPR CAP_VALUES_INDEX "${CAP_VALUES_INDEX} + 1") + endif() endforeach() endif() @@ -1438,6 +1828,10 @@ namespace @IMPL_NAMESPACE@ { if(NOT IS_ABSOLUTE "${TARGET_INPUT}") set(TARGET_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_INPUT}") endif() + if(IMPL_HLSL_GLOB) + get_filename_component(_ABS_TARGET_INPUT "${TARGET_INPUT}" ABSOLUTE) + list(REMOVE_ITEM IMPL_HLSL_GLOB "${TARGET_INPUT}" "${_ABS_TARGET_INPUT}") + endif() get_target_property(CANONICAL_IDENTIFIERS ${IMPL_TARGET} NBL_CANONICAL_IDENTIFIERS) @@ -1453,116 +1847,820 @@ namespace @IMPL_NAMESPACE@ { set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_CANONICAL_IDENTIFIERS "${NEW_CANONICAL_IDENTIFIER}") - set(HEADER_ITEM_VIEW [=[ -namespace @IMPL_NAMESPACE@ { - template<> - inline const nbl::core::string get_spirv_key - (const nbl::video::SPhysicalDeviceLimits& limits, const nbl::video::SPhysicalDeviceFeatures& features) - { - nbl::core::string retval = "@BASE_KEY@"; -@RETVAL_EVAL@ - retval += ".spv"; - return "$/" + retval; - } -} + if(NUM_CAPS GREATER 0) + set(KIND_ORDER "") + foreach(_NBL_KIND IN LISTS CAP_KINDS) + list(FIND KIND_ORDER "${_NBL_KIND}" _NBL_KIND_INDEX) + if(_NBL_KIND_INDEX EQUAL -1) + list(APPEND KIND_ORDER "${_NBL_KIND}") + endif() + endforeach() + + set(ORDERED_KINDS "${KIND_ORDER}") + + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + unset(_NBL_KIND_INDICES_${_NBL_KIND}) + endforeach() + + math(EXPR LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE 0 ${LAST_CAP}) + list(GET CAP_KINDS ${i} _NBL_KIND) + set(_NBL_ORIG_CAP_VALUES_${i} "${CAP_VALUES_${i}}") + list(APPEND _NBL_KIND_INDICES_${_NBL_KIND} ${i}) + endforeach() + + set(_NBL_ORDERED_INDICES "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(DEFINED _NBL_KIND_INDICES_${_NBL_KIND}) + list(APPEND _NBL_ORDERED_INDICES ${_NBL_KIND_INDICES_${_NBL_KIND}}) + endif() + endforeach() + + set(_NBL_ORDERED_CAP_NAMES "") + set(_NBL_ORDERED_CAP_TYPES "") + set(_NBL_ORDERED_CAP_KINDS "") + set(_NBL_ORDERED_VALUES_INDEX 0) + foreach(_NBL_INDEX IN LISTS _NBL_ORDERED_INDICES) + list(GET CAP_NAMES ${_NBL_INDEX} _NBL_CAP_NAME) + list(GET CAP_TYPES ${_NBL_INDEX} _NBL_CAP_TYPE) + list(GET CAP_KINDS ${_NBL_INDEX} _NBL_CAP_KIND) + set(_NBL_CAP_VALUES "${_NBL_ORIG_CAP_VALUES_${_NBL_INDEX}}") + list(APPEND _NBL_ORDERED_CAP_NAMES "${_NBL_CAP_NAME}") + list(APPEND _NBL_ORDERED_CAP_TYPES "${_NBL_CAP_TYPE}") + list(APPEND _NBL_ORDERED_CAP_KINDS "${_NBL_CAP_KIND}") + set(CAP_VALUES_${_NBL_ORDERED_VALUES_INDEX} "${_NBL_CAP_VALUES}") + math(EXPR _NBL_ORDERED_VALUES_INDEX "${_NBL_ORDERED_VALUES_INDEX} + 1") + endforeach() + + set(CAP_NAMES "${_NBL_ORDERED_CAP_NAMES}") + set(CAP_TYPES "${_NBL_ORDERED_CAP_TYPES}") + set(CAP_KINDS "${_NBL_ORDERED_CAP_KINDS}") + list(LENGTH CAP_NAMES NUM_CAPS) + else() + set(ORDERED_KINDS "") + endif() + + list(LENGTH ORDERED_KINDS ORDERED_KIND_COUNT) + set(NON_DEVICE_KINDS "") + set(HAS_LIMITS FALSE) + set(HAS_FEATURES FALSE) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(_NBL_KIND STREQUAL "limits") + set(HAS_LIMITS TRUE) + elseif(_NBL_KIND STREQUAL "features") + set(HAS_FEATURES TRUE) + else() + list(APPEND NON_DEVICE_KINDS "${_NBL_KIND}") + endif() + endforeach() + list(LENGTH NON_DEVICE_KINDS NON_DEVICE_COUNT) + + string(MAKE_C_IDENTIFIER "${BASE_KEY}" BASE_KEY_IDENT) + string(MD5 BASE_KEY_HASH "${BASE_KEY}") + string(SUBSTRING "${BASE_KEY_HASH}" 0 8 BASE_KEY_HASH8) + set(KIND_PREFIX "${BASE_KEY_IDENT}_${BASE_KEY_HASH8}") + + set(MATCH_KINDS "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + list(APPEND MATCH_KINDS "${_NBL_KIND}") + endforeach() + + foreach(_NBL_KIND IN LISTS MATCH_KINDS) + set(_NBL_KIND_MEMBERS_${_NBL_KIND} "") + set(_NBL_KIND_TYPES_${_NBL_KIND} "") + endforeach() + + if(NUM_CAPS GREATER 0) + math(EXPR _NBL_LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE ${_NBL_LAST_CAP}) + list(GET CAP_KINDS ${i} _NBL_KIND) + list(GET CAP_NAMES ${i} _NBL_CAP) + list(GET CAP_TYPES ${i} _NBL_TYPE) + list(FIND _NBL_KIND_MEMBERS_${_NBL_KIND} "${_NBL_CAP}" _NBL_MEMBER_INDEX) + if(_NBL_MEMBER_INDEX EQUAL -1) + list(APPEND _NBL_KIND_MEMBERS_${_NBL_KIND} "${_NBL_CAP}") + list(APPEND _NBL_KIND_TYPES_${_NBL_KIND} "${_NBL_TYPE}") + endif() + endforeach() + endif() -]=]) - unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) + + set(NBL_NSC_UNITY_CONTENT "") + if(IMPL_UNITY_BUILD) + set(NBL_NSC_UNITY_ENTRYPOINTS ${IMPL_ENTRYPOINTS}) + set(NBL_NSC_UNITY_INPUT_RAW "${TARGET_INPUT}") + file(TO_CMAKE_PATH "${TARGET_INPUT}" NBL_NSC_UNITY_INPUT_PATH) + set(NBL_NSC_UNITY_FINAL_KEY "${BASE_KEY}.spv") + NBL_HASH_SPIRV_KEY("${NBL_NSC_UNITY_FINAL_KEY}" NBL_NSC_UNITY_HASH) + set(NBL_NSC_UNITY_HASHED_KEY "${NBL_NSC_UNITY_HASH}.spv") + set(NBL_NSC_UNITY_OUTPUT_REL_PATH "$/${NBL_NSC_UNITY_HASHED_KEY}") + set(NBL_NSC_UNITY_OUTPUT "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_OUTPUT_REL_PATH}") + set(NBL_NSC_UNITY_INPUT "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_HASH}.unity.hlsl") + set(NBL_NSC_UNITY_CONTENT "#include \n") + endif() + + set(RETVAL_FMT "${BASE_KEY}") + set(RETVAL_ARGS "") + set(CX_CAPACITY 0) + string(LENGTH "${BASE_KEY}" CX_BASE_LEN) + math(EXPR CX_CAPACITY "${CX_BASE_LEN} + 4 + 24") if(CAP_COUNT GREATER 0) math(EXPR LAST_CAP "${CAP_COUNT} - 1") + set(PREV_KIND "") foreach(i RANGE ${LAST_CAP}) list(GET CAP_NAMES ${i} CAP) list(GET CAP_KINDS ${i} KIND) + list(GET CAP_TYPES ${i} TYPE) + if(NOT KIND STREQUAL PREV_KIND) + string(APPEND RETVAL_FMT "__${KIND}") + string(LENGTH "${KIND}" KIND_LEN) + math(EXPR CX_CAPACITY "${CX_CAPACITY} + 2 + ${KIND_LEN}") + set(PREV_KIND "${KIND}") + endif() + string(APPEND RETVAL_FMT ".${CAP}_%s") + list(APPEND RETVAL_ARGS "nbl_spirv_${KIND}.${CAP}") + string(LENGTH "${CAP}" CAP_LEN) + math(EXPR CX_CAPACITY "${CX_CAPACITY} + 2 + ${CAP_LEN}") + if(TYPE STREQUAL "bool") + set(DIGITS 1) + elseif(TYPE STREQUAL "uint16_t") + set(DIGITS 5) + elseif(TYPE STREQUAL "uint32_t") + set(DIGITS 10) + elseif(TYPE STREQUAL "int16_t") + set(DIGITS 6) + elseif(TYPE STREQUAL "int32_t") + set(DIGITS 11) + elseif(TYPE STREQUAL "int64_t") + set(DIGITS 20) + elseif(TYPE STREQUAL "uint64_t") + set(DIGITS 20) + elseif(TYPE STREQUAL "float") + set(DIGITS 16) + elseif(TYPE STREQUAL "double") + set(DIGITS 24) + else() + set(DIGITS 20) + endif() + math(EXPR CX_CAPACITY "${CX_CAPACITY} + ${DIGITS}") + endforeach() + endif() + string(APPEND RETVAL_FMT ".spv") + if(RETVAL_ARGS) + string(JOIN ", " RETVAL_ARGS_JOINED ${RETVAL_ARGS}) + set(RETVAL_ARGS_STR ", ${RETVAL_ARGS_JOINED}") + else() + set(RETVAL_ARGS_STR "") + endif() + string(CONFIGURE [=[ + typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full@RETVAL_ARGS_STR@); + retval.append("$/"); + nbl::core::detail::put(retval, nbl::core::FNV1a_64(nbl_spirv_full.view())); + retval.append(".spv"); +]=] RETVAL_EVAL_CONSTEXPR @ONLY) + + set(SPIRV_CUSTOM_TRAITS "") + foreach(_NBL_KIND IN LISTS MATCH_KINDS) + set(_NBL_MEMBER_LINES "") + list(LENGTH _NBL_KIND_MEMBERS_${_NBL_KIND} _NBL_MEMBER_COUNT) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + if(_NBL_MEMBER_COUNT GREATER 0) + math(EXPR _NBL_MEMBER_LAST "${_NBL_MEMBER_COUNT} - 1") + foreach(_NBL_MEMBER_INDEX RANGE ${_NBL_MEMBER_LAST}) + list(GET _NBL_KIND_MEMBERS_${_NBL_KIND} ${_NBL_MEMBER_INDEX} _NBL_MEMBER_NAME) + list(GET _NBL_KIND_TYPES_${_NBL_KIND} ${_NBL_MEMBER_INDEX} _NBL_MEMBER_TYPE) + set(MEMBER_NAME "${_NBL_MEMBER_NAME}") + set(MEMBER_TYPE "${_NBL_MEMBER_TYPE}") + string(CONFIGURE [=[ + requires std::is_same_v, @MEMBER_TYPE@>; +]=] _NBL_MEMBER_LINE @ONLY) + string(APPEND _NBL_MEMBER_LINES "${_NBL_MEMBER_LINE}") + endforeach() + set(KIND "${KIND_TRAIT}") + set(MEMBER_LINES "${_NBL_MEMBER_LINES}") + string(CONFIGURE [=[ + template + struct SpirvPerm_@KIND@ + { + static constexpr bool value = requires(const T& v) + { +@MEMBER_LINES@ }; + }; + +]=] _NBL_CUSTOM_TRAIT @ONLY) + else() + set(KIND "${KIND_TRAIT}") string(CONFIGURE [=[ - retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + template + struct SpirvPerm_@KIND@ + { + static constexpr bool value = false; + }; + +]=] _NBL_CUSTOM_TRAIT @ONLY) + endif() + string(APPEND SPIRV_CUSTOM_TRAITS "${_NBL_CUSTOM_TRAIT}") + endforeach() + + set(SPIRV_BUILD_REQUIRES "") + if(ORDERED_KIND_COUNT EQUAL 0) + set(SPIRV_BUILD_REQUIRES "requires (sizeof...(Args) == 0)") + else() + set(_NBL_REQS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + list(APPEND _NBL_REQS "SpirvPerm_${KIND_TRAIT}>>>::value") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") endforeach() + string(JOIN " && " _NBL_REQS_JOINED ${_NBL_REQS}) + set(SPIRV_BUILD_REQUIRES "requires (sizeof...(Args) == ${ORDERED_KIND_COUNT} && ${_NBL_REQS_JOINED})") endif() - + + set(SPIRV_ARG_DECLS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + string(APPEND SPIRV_ARG_DECLS "\t\tconst auto& nbl_spirv_${_NBL_KIND} = std::get<${_NBL_KIND_INDEX}>(std::forward_as_tuple(args...));\n") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") + endforeach() + + set(SPIRV_BUILD_FROM_DEVICE_REQUIRES "") + set(_NBL_DEVICE_REQS "") + if(HAS_LIMITS) + list(APPEND _NBL_DEVICE_REQS "nbl::core::detail::spirv_device_has_limits") + endif() + if(HAS_FEATURES) + list(APPEND _NBL_DEVICE_REQS "nbl::core::detail::spirv_device_has_features") + endif() + if(NON_DEVICE_COUNT EQUAL 0) + list(APPEND _NBL_DEVICE_REQS "sizeof...(Args) == 0") + else() + list(APPEND _NBL_DEVICE_REQS "sizeof...(Args) == ${NON_DEVICE_COUNT}") + set(_NBL_REQS "") + set(_NBL_KIND_INDEX 0) + foreach(_NBL_KIND IN LISTS NON_DEVICE_KINDS) + set(KIND_TRAIT "${KIND_PREFIX}_${_NBL_KIND}") + list(APPEND _NBL_REQS "SpirvPerm_${KIND_TRAIT}>>>::value") + math(EXPR _NBL_KIND_INDEX "${_NBL_KIND_INDEX} + 1") + endforeach() + if(_NBL_REQS) + string(JOIN " && " _NBL_REQS_JOINED ${_NBL_REQS}) + list(APPEND _NBL_DEVICE_REQS "${_NBL_REQS_JOINED}") + endif() + endif() + string(JOIN " && " SPIRV_DEVICE_REQUIRES_EXPR ${_NBL_DEVICE_REQS}) + set(SPIRV_BUILD_FROM_DEVICE_REQUIRES "requires (${SPIRV_DEVICE_REQUIRES_EXPR})") + + set(SPIRV_BUILD_FROM_DEVICE_ARGS "") + set(_NBL_ARG_INDEX 0) + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(_NBL_KIND STREQUAL "limits") + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "nbl::core::detail::spirv_device_get_limits(device)") + elseif(_NBL_KIND STREQUAL "features") + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "nbl::core::detail::spirv_device_get_features(device)") + else() + list(APPEND SPIRV_BUILD_FROM_DEVICE_ARGS "std::get<${_NBL_ARG_INDEX}>(std::forward_as_tuple(args...))") + math(EXPR _NBL_ARG_INDEX "${_NBL_ARG_INDEX} + 1") + endif() + endforeach() + if(SPIRV_BUILD_FROM_DEVICE_ARGS) + string(JOIN ", " SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED ${SPIRV_BUILD_FROM_DEVICE_ARGS}) + else() + set(SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED "") + endif() + + set(SPIRV_TRIVIAL_ASSERTS "") + + set(HEADER_ITEM_VIEW [=[ +namespace nbl::core::detail { + template<> + struct StringLiteralBufferType + { + using type = StringLiteralBuffer<@CX_CAPACITY@ + 1>; + }; + + template<> + struct SpirvKeyBuilder + { +@SPIRV_CUSTOM_TRAITS@ template + @SPIRV_BUILD_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build(const Args&... args) + { +@SPIRV_ARG_DECLS@@SPIRV_TRIVIAL_ASSERTS@ typename StringLiteralBufferType::type retval = {}; +@RETVAL_EVAL_CONSTEXPR@ + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=]) string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") - - function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART) + + if(IMPL_UNITY_BUILD) + set(UNITY_FILE_KEY_FMT "${BASE_KEY}.spv") + string(CONFIGURE [=[ +namespace nbl::core::detail { + template<> + struct SpirvFileKeyBuilder + { + template + @SPIRV_BUILD_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build(const Args&... args) + { + (void)std::forward_as_tuple(args...); + typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full); + typename StringLiteralBufferType::type retval = {}; + retval.append("$/"); + nbl::core::detail::put(retval, nbl::core::FNV1a_64(nbl_spirv_full.view())); + retval.append(".spv"); + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr typename StringLiteralBufferType::type build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=] UNITY_FILE_KEY_EVAL @ONLY) + set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${UNITY_FILE_KEY_EVAL}") + + foreach(_NBL_ENTRY IN LISTS IMPL_ENTRYPOINTS) + string(LENGTH "${_NBL_ENTRY}" _NBL_ENTRY_LEN) + math(EXPR _NBL_ENTRY_CAP "${_NBL_ENTRY_LEN} + 7 + 20") + set(ENTRY_NAME "${_NBL_ENTRY}") + set(ENTRY_CAPACITY "${_NBL_ENTRY_CAP}") + string(CONFIGURE [=[ +namespace nbl::core::detail { + template<> + struct SpirvEntrypointBuilder + { + template + @SPIRV_BUILD_REQUIRES@ + static constexpr StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> build(const Args&... args) + { +@SPIRV_ARG_DECLS@ typename StringLiteralBufferType::type nbl_spirv_full = {}; + nbl::core::detail::append_printf_s(nbl_spirv_full@RETVAL_ARGS_STR@); + const auto nbl_spirv_hash = nbl::core::FNV1a_64(nbl_spirv_full.view()); + StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> retval = {}; + retval.append("@ENTRY_NAME@"); + retval.append("__nbl_p"); + nbl::core::detail::put(retval, nbl_spirv_hash); + return retval; + } + + template + @SPIRV_BUILD_FROM_DEVICE_REQUIRES@ + static constexpr StringLiteralBuffer<@ENTRY_CAPACITY@ + 1> build_from_device(const Device* device, const Args&... args) + { + return build(@SPIRV_BUILD_FROM_DEVICE_ARGS_JOINED@); + } + }; +} + +]=] UNITY_ENTRY_EVAL @ONLY) + set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${UNITY_ENTRY_EVAL}") + endforeach() + endif() + + function(GENERATE_KEYS PREFIX CAP_INDEX) + set(CAPS_VALUES_PART "${ARGN}") if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) - # generate .config file set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader - set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") - set(CAPS_EVAL "${CAPS_EVAL_PART}") + NBL_HASH_SPIRV_KEY("${FINAL_KEY}" FINAL_KEY_HASH) + set(HASHED_KEY "${FINAL_KEY_HASH}.spv") + set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_HASH}") + set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.in.hlsl") + set(CAPS_EVAL "") + if(NUM_CAPS GREATER 0) + set(CAPS_EVAL_LIMITS "") + set(CAPS_EVAL_FEATURES "") + set(_NBL_CUSTOM_KIND_LIST "") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(NOT _NBL_KIND STREQUAL "limits" AND NOT _NBL_KIND STREQUAL "features") + list(APPEND _NBL_CUSTOM_KIND_LIST "${_NBL_KIND}") + set(_NBL_CUSTOM_LINES_${_NBL_KIND} "") + endif() + endforeach() + + math(EXPR _NBL_LAST_CAP "${NUM_CAPS} - 1") + foreach(i RANGE 0 ${_NBL_LAST_CAP}) + list(GET CAP_NAMES ${i} _NBL_CAP_NAME) + list(GET CAP_TYPES ${i} _NBL_CAP_TYPE) + list(GET CAP_KINDS ${i} _NBL_CAP_KIND) + list(GET CAPS_VALUES_PART ${i} _NBL_CAP_VALUE) + set(MEMBER_NAME "${_NBL_CAP_NAME}") + set(MEMBER_TYPE "${_NBL_CAP_TYPE}") + set(MEMBER_VALUE "${_NBL_CAP_VALUE}") + if(MEMBER_TYPE STREQUAL "double" AND MEMBER_VALUE STREQUAL "1.7976931348623165e+308") + set(MEMBER_VALUE "1.7976931348623157e+308") + endif() + if(MEMBER_TYPE STREQUAL "double") + set(MEMBER_VALUE "${MEMBER_VALUE}L") + endif() + string(CONFIGURE [=[ +NBL_CONSTEXPR_STATIC_INLINE @MEMBER_TYPE@ @MEMBER_NAME@ = (@MEMBER_TYPE@) @MEMBER_VALUE@; +]=] _NBL_MEMBER_LINE @ONLY) + if(_NBL_CAP_KIND STREQUAL "limits") + string(APPEND CAPS_EVAL_LIMITS " ${_NBL_MEMBER_LINE}") + elseif(_NBL_CAP_KIND STREQUAL "features") + string(APPEND CAPS_EVAL_FEATURES " ${_NBL_MEMBER_LINE}") + else() + set(_NBL_CUSTOM_LINE_VAR "_NBL_CUSTOM_LINES_${_NBL_CAP_KIND}") + set(${_NBL_CUSTOM_LINE_VAR} "${${_NBL_CUSTOM_LINE_VAR}} ${_NBL_MEMBER_LINE}") + endif() + endforeach() + + if(CAPS_EVAL_LIMITS) + string(APPEND CAPS_EVAL " // limits\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_LIMITS}") + endif() + if(CAPS_EVAL_FEATURES) + string(APPEND CAPS_EVAL " // features\n") + string(APPEND CAPS_EVAL "${CAPS_EVAL_FEATURES}") + endif() + + set(_NBL_HAS_CUSTOM FALSE) + foreach(_NBL_KIND IN LISTS _NBL_CUSTOM_KIND_LIST) + if(_NBL_CUSTOM_LINES_${_NBL_KIND}) + set(_NBL_HAS_CUSTOM TRUE) + endif() + endforeach() + + if(_NBL_HAS_CUSTOM) + string(APPEND CAPS_EVAL " // custom structs\n") + foreach(_NBL_KIND IN LISTS ORDERED_KINDS) + if(NOT _NBL_KIND STREQUAL "limits" AND NOT _NBL_KIND STREQUAL "features") + if(_NBL_CUSTOM_LINES_${_NBL_KIND}) + set(NBL_KIND_NAME "${_NBL_KIND}") + set(MEMBER_LINES "${_NBL_CUSTOM_LINES_${_NBL_KIND}}") + string(CONFIGURE [=[ + struct @NBL_KIND_NAME@ + { +@MEMBER_LINES@ }; +]=] _NBL_KIND_STRUCT @ONLY) + string(APPEND CAPS_EVAL "${_NBL_KIND_STRUCT}") + endif() + endif() + endforeach() + endif() + endif() + if(CAPS_EVAL STREQUAL "") + set(CAPS_EVAL " // no caps\n") + endif() + if(IMPL_UNITY_BUILD) + set(_NBL_UNITY_STRUCT "DeviceConfigCaps__nbl_p${FINAL_KEY_HASH}") + set(_NBL_UNITY_ENTRY_DEFS "") + set(_NBL_UNITY_ENTRY_UNDEFS "") + foreach(_NBL_ENTRY IN LISTS NBL_NSC_UNITY_ENTRYPOINTS) + string(APPEND _NBL_UNITY_ENTRY_DEFS "#define ${_NBL_ENTRY} ${_NBL_ENTRY}__nbl_p${FINAL_KEY_HASH}\n") + string(APPEND _NBL_UNITY_ENTRY_UNDEFS "#undef ${_NBL_ENTRY}\n") + endforeach() + set(UNITY_STRUCT "${_NBL_UNITY_STRUCT}") + set(UNITY_ENTRY_DEFS "${_NBL_UNITY_ENTRY_DEFS}") + set(UNITY_ENTRY_UNDEFS "${_NBL_UNITY_ENTRY_UNDEFS}") + set(UNITY_INPUT_PATH "${NBL_NSC_UNITY_INPUT_PATH}") + string(CONFIGURE [=[ + +struct @UNITY_STRUCT@ +{ +@CAPS_EVAL@ +}; +#define DeviceConfigCaps @UNITY_STRUCT@ +@UNITY_ENTRY_DEFS@#include "@UNITY_INPUT_PATH@" +@UNITY_ENTRY_UNDEFS@#undef DeviceConfigCaps + +]=] _NBL_UNITY_BLOCK @ONLY) + set(_NBL_UNITY_CONTENT "${NBL_NSC_UNITY_CONTENT}") + string(APPEND _NBL_UNITY_CONTENT "${_NBL_UNITY_BLOCK}") + set(NBL_NSC_UNITY_CONTENT "${_NBL_UNITY_CONTENT}" PARENT_SCOPE) + return() + endif() string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) - file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") + set(_NBL_CONFIG_WRITE TRUE) + if(EXISTS "${CONFIG_FILE}") + file(READ "${CONFIG_FILE}" _NBL_CONFIG_OLD) + if(_NBL_CONFIG_OLD STREQUAL "${CONFIG_CONTENT}") + set(_NBL_CONFIG_WRITE FALSE) + endif() + endif() + if(_NBL_CONFIG_WRITE) + file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") + endif() + list(APPEND DEPENDS_ON "${TARGET_INPUT}" "${CONFIG_FILE}") # generate keys and commands for compiling shaders - set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") + set(FINAL_KEY_REL_PATH "$/${HASHED_KEY}") set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") - set(DEPFILE_PATH "${TARGET_OUTPUT}.d") + set(DEPFILE_PATH "${TARGET_OUTPUT}.dep") set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") + set(NBL_NSC_PREPROCESSED_PATH "${TARGET_OUTPUT}.pre.hlsl") + if(NSC_CACHE_DIR) + get_filename_component(NBL_NSC_CACHE_ROOT "${NSC_CACHE_DIR}" ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}") + file(RELATIVE_PATH NBL_NSC_CACHE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT}") + set(NBL_NSC_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache.pre") + else() + set(NBL_NSC_CACHE_PATH "${TARGET_OUTPUT}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${TARGET_OUTPUT}.ppcache.pre") + endif() set(NBL_NSC_DEPFILE_ARGS "") if(NSC_USE_DEPFILE) set(NBL_NSC_DEPFILE_ARGS -MD -MF "${DEPFILE_PATH}") endif() + set(NBL_NSC_CACHE_ARGS "") + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache-compression "${NSC_SHADER_CACHE_COMPRESSION}") + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") + endif() + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-cache) + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache-file "${NBL_NSC_PREPROCESS_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_PREAMBLE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-preamble) + endif() + endif() + if(NSC_STDOUT_LOG) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-stdout-log) + endif() + set(NBL_NSC_REPORT_ARGS "") + if(NSC_JSON_REPORT) + set(NBL_NSC_REPORT_PATH "${TARGET_OUTPUT}.report.json") + list(APPEND NBL_NSC_REPORT_ARGS -nbl-report "${NBL_NSC_REPORT_PATH}") + endif() + set(NBL_NSC_COMPILE_COMMAND "$" -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} ${NBL_NSC_DEPFILE_ARGS} - "${CONFIG_FILE}" + $<$:-verbose> + ${NBL_NSC_CACHE_ARGS} + ${NBL_NSC_REPORT_ARGS} + -FI "${CONFIG_FILE}" + "${TARGET_INPUT}" ) get_filename_component(NBL_NSC_INPUT_NAME "${TARGET_INPUT}" NAME) get_filename_component(NBL_NSC_CONFIG_NAME "${CONFIG_FILE}" NAME) + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_INPUT_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_CONFIG_NAME}") + if(NBL_NSC_INPUT_NAME MATCHES "\\.in\\.hlsl$") + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_CONFIG_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_INPUT_NAME}") + endif() + set(NBL_NSC_MAIN_DEPENDENCY "${TARGET_INPUT}") + if(TARGET nsc) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + list(APPEND DEPENDS_ON "$") + else() + list(APPEND DEPENDS_ON nsc) + endif() + endif() set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESSED_PATH}") + endif() + if(NSC_JSON_REPORT) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_REPORT_PATH}") + endif() set(NBL_NSC_CUSTOM_COMMAND_ARGS OUTPUT "${TARGET_OUTPUT}" BYPRODUCTS ${NBL_NSC_BYPRODUCTS} COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} - COMMENT "${NBL_NSC_CONFIG_NAME} (${NBL_NSC_INPUT_NAME})" + COMMENT "${NBL_NSC_COMMENT_LEFT} (${NBL_NSC_COMMENT_RIGHT})" VERBATIM COMMAND_EXPAND_LISTS ) + if(NBL_NSC_MAIN_DEPENDENCY) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS MAIN_DEPENDENCY "${NBL_NSC_MAIN_DEPENDENCY}") + endif() if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() - add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) - set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") - if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_INDEX "${_NBL_EXPORT_RULE_INDEX}") + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_INDEX} ${NBL_NSC_COMPILE_COMMAND} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_INDEX} "${TARGET_OUTPUT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_INDEX} "${NBL_NSC_LOG_PATH}" PARENT_SCOPE) + if(NSC_SHADER_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "${NBL_NSC_CACHE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_PREPROCESS_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESS_CACHE_PATH}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESSED_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_USE_DEPFILE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "${DEPFILE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_JSON_REPORT) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "${NBL_NSC_REPORT_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + math(EXPR _NBL_EXPORT_INDEX_NEXT "${_NBL_EXPORT_INDEX} + 1") + set(_NBL_EXPORT_RULE_INDEX "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) + endif() + set(NBL_NSC_OUT_FILES "") + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESSED_PATH}") + endif() + set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) endif() - set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" ${NBL_NSC_OUT_FILES}) - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - if(CMAKE_CONFIGURATION_TYPES) - foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") + set(HEADER_ONLY_LIKE "") + set(ADD_INPUT_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS AND CMAKE_GENERATOR MATCHES "Visual Studio") + set(ADD_INPUT_AS_HEADER_ONLY FALSE) + endif() + if(ADD_INPUT_AS_HEADER_ONLY) + list(APPEND HEADER_ONLY_LIKE "${TARGET_INPUT}") + endif() + if(NBL_NSC_OUT_FILES AND NOT CMAKE_CONFIGURATION_TYPES) + list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) + endif() + if(HEADER_ONLY_LIKE AND IMPL_HLSL_GLOB) + foreach(_HLSL_SOURCE IN LISTS IMPL_HLSL_GLOB) + list(REMOVE_ITEM HEADER_ONLY_LIKE "${_HLSL_SOURCE}") + endforeach() + endif() + if(HEADER_ONLY_LIKE) + list(REMOVE_DUPLICATES HEADER_ONLY_LIKE) + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + ) + endif() + set(ADD_CONFIG_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NBL_NSC_MAIN_DEPENDENCY STREQUAL "${CONFIG_FILE}") + set(ADD_CONFIG_AS_HEADER_ONLY FALSE) + endif() + endif() + if(ADD_CONFIG_AS_HEADER_ONLY) + target_sources(${IMPL_TARGET} PRIVATE "${CONFIG_FILE}") + set_source_files_properties("${CONFIG_FILE}" PROPERTIES + GENERATED TRUE + HEADER_FILE_ONLY ON + ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${CONFIG_FILE}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_CONFIGURATION_TYPES) + foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + if(_CFG STREQUAL "") + continue() + endif() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endforeach() + else() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") endif() - source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) - endforeach() - else() - set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") - if(NSC_USE_DEPFILE) - list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() - source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) endif() set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES @@ -1582,15 +2680,349 @@ namespace @IMPL_NAMESPACE@ { set(VAR_NAME "CAP_VALUES_${CAP_INDEX}") set(VALUES "${${VAR_NAME}}") + set(KEY_PREFIX ".") + if(CAP_INDEX EQUAL 0) + set(KEY_PREFIX "__${CURRENT_KIND}.") + else() + math(EXPR PREV_INDEX "${CAP_INDEX} - 1") + list(GET CAP_KINDS ${PREV_INDEX} PREV_KIND) + if(NOT CURRENT_KIND STREQUAL PREV_KIND) + set(KEY_PREFIX "__${CURRENT_KIND}.") + endif() + endif() foreach(V IN LISTS VALUES) - set(NEW_PREFIX "${PREFIX}.${CURRENT_CAP}_${V}") - set(NEW_EVAL "${CAPS_EVAL_PART}NBL_CONSTEXPR_STATIC_INLINE ${CURRENT_TYPE} ${CURRENT_CAP} = (${CURRENT_TYPE}) ${V}; // got permuted\n") + set(NEW_PREFIX "${PREFIX}${KEY_PREFIX}${CURRENT_CAP}_${V}") + set(NEW_VALUES "${CAPS_VALUES_PART}") + list(APPEND NEW_VALUES "${V}") math(EXPR NEXT_INDEX "${CAP_INDEX} + 1") - GENERATE_KEYS("${NEW_PREFIX}" "${NEXT_INDEX}" "${NEW_EVAL}") + GENERATE_KEYS("${NEW_PREFIX}" "${NEXT_INDEX}" ${NEW_VALUES}) endforeach() endfunction() - GENERATE_KEYS("" 0 "") + GENERATE_KEYS("" 0) + + if(IMPL_UNITY_BUILD) + set(NBL_NSC_UNITY_CONTENT "${NBL_NSC_UNITY_CONTENT}") + set(_NBL_UNITY_WRITE TRUE) + if(EXISTS "${NBL_NSC_UNITY_INPUT}") + file(READ "${NBL_NSC_UNITY_INPUT}" _NBL_UNITY_OLD) + if(_NBL_UNITY_OLD STREQUAL "${NBL_NSC_UNITY_CONTENT}") + set(_NBL_UNITY_WRITE FALSE) + endif() + endif() + if(_NBL_UNITY_WRITE) + file(WRITE "${NBL_NSC_UNITY_INPUT}" "${NBL_NSC_UNITY_CONTENT}") + endif() + + set(NBL_NSC_REGISTERED_INPUT "${NBL_NSC_UNITY_INPUT_RAW}") + set(NBL_NSC_COMPILE_INPUT "${NBL_NSC_UNITY_INPUT}") + set(NBL_NSC_DEPENDS_ON "${DEPENDS_ON}") + list(APPEND NBL_NSC_DEPENDS_ON "${NBL_NSC_UNITY_INPUT}" "${NBL_NSC_REGISTERED_INPUT}") + + set(FINAL_KEY_REL_PATH "${NBL_NSC_UNITY_OUTPUT_REL_PATH}") + set(TARGET_OUTPUT "${NBL_NSC_UNITY_OUTPUT}") + set(DEPFILE_PATH "${TARGET_OUTPUT}.dep") + set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") + set(NBL_NSC_PREPROCESSED_PATH "${TARGET_OUTPUT}.pre.hlsl") + if(NSC_CACHE_DIR) + get_filename_component(NBL_NSC_CACHE_ROOT "${NSC_CACHE_DIR}" ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}") + file(RELATIVE_PATH NBL_NSC_CACHE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT}") + set(NBL_NSC_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${NBL_NSC_CACHE_ROOT}/${NBL_NSC_CACHE_REL}.ppcache.pre") + else() + set(NBL_NSC_CACHE_PATH "${TARGET_OUTPUT}.ppcache") + set(NBL_NSC_PREPROCESS_CACHE_PATH "${TARGET_OUTPUT}.ppcache.pre") + endif() + + set(NBL_NSC_DEPFILE_ARGS "") + if(NSC_USE_DEPFILE) + set(NBL_NSC_DEPFILE_ARGS -MD -MF "${DEPFILE_PATH}") + endif() + + set(NBL_NSC_CACHE_ARGS "") + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-shader-cache-compression "${NSC_SHADER_CACHE_COMPRESSION}") + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -shader-cache-file "${NBL_NSC_CACHE_PATH}") + endif() + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-cache) + if(NSC_CACHE_DIR) + list(APPEND NBL_NSC_CACHE_ARGS -preprocess-cache-file "${NBL_NSC_PREPROCESS_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_PREAMBLE) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-preprocess-preamble) + endif() + endif() + if(NSC_STDOUT_LOG) + list(APPEND NBL_NSC_CACHE_ARGS -nbl-stdout-log) + endif() + set(NBL_NSC_REPORT_ARGS "") + if(NSC_JSON_REPORT) + set(NBL_NSC_REPORT_PATH "${TARGET_OUTPUT}.report.json") + list(APPEND NBL_NSC_REPORT_ARGS -nbl-report "${NBL_NSC_REPORT_PATH}") + endif() + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + ${NBL_NSC_DEPFILE_ARGS} + $<$:-verbose> + ${NBL_NSC_CACHE_ARGS} + ${NBL_NSC_REPORT_ARGS} + "${NBL_NSC_COMPILE_INPUT}" + ) + + get_filename_component(NBL_NSC_INPUT_NAME "${NBL_NSC_REGISTERED_INPUT}" NAME) + get_filename_component(NBL_NSC_CONFIG_NAME "${NBL_NSC_UNITY_INPUT}" NAME) + set(NBL_NSC_COMMENT_LEFT "${NBL_NSC_INPUT_NAME}") + set(NBL_NSC_COMMENT_RIGHT "${NBL_NSC_CONFIG_NAME}") + set(NBL_NSC_MAIN_DEPENDENCY "${NBL_NSC_REGISTERED_INPUT}") + if(TARGET nsc) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + list(APPEND NBL_NSC_DEPENDS_ON "$") + else() + list(APPEND NBL_NSC_DEPENDS_ON nsc) + endif() + endif() + set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_PREPROCESSED_PATH}") + endif() + if(NSC_JSON_REPORT) + list(APPEND NBL_NSC_BYPRODUCTS "${NBL_NSC_REPORT_PATH}") + endif() + + set(NBL_NSC_CUSTOM_COMMAND_ARGS + OUTPUT "${TARGET_OUTPUT}" + BYPRODUCTS ${NBL_NSC_BYPRODUCTS} + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${NBL_NSC_DEPENDS_ON} + COMMENT "${NBL_NSC_COMMENT_LEFT} (${NBL_NSC_COMMENT_RIGHT})" + VERBATIM + COMMAND_EXPAND_LISTS + ) + if(NBL_NSC_MAIN_DEPENDENCY) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS MAIN_DEPENDENCY "${NBL_NSC_MAIN_DEPENDENCY}") + endif() + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") + endif() + if(IMPL_EXPORT_RULES) + set(_NBL_EXPORT_INDEX "${_NBL_EXPORT_RULE_INDEX}") + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_INDEX} ${NBL_NSC_COMPILE_COMMAND} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_INDEX} "${TARGET_OUTPUT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_INDEX} "${NBL_NSC_LOG_PATH}" PARENT_SCOPE) + if(NSC_SHADER_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "${NBL_NSC_CACHE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_PREPROCESS_CACHE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESS_CACHE_PATH}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "${NBL_NSC_PREPROCESSED_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_USE_DEPFILE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "${DEPFILE_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + if(NSC_JSON_REPORT) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "${NBL_NSC_REPORT_PATH}" PARENT_SCOPE) + else() + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_INDEX} "" PARENT_SCOPE) + endif() + math(EXPR _NBL_EXPORT_INDEX_NEXT "${_NBL_EXPORT_INDEX} + 1") + set(_NBL_EXPORT_RULE_INDEX "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_INDEX_NEXT}" PARENT_SCOPE) + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) + endif() + set(NBL_NSC_OUT_FILES "") + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_CACHE_PATH}") + endif() + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESS_CACHE_PATH}") + list(APPEND NBL_NSC_OUT_FILES "${NBL_NSC_PREPROCESSED_PATH}") + endif() + set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) + endif() + + set(HEADER_ONLY_LIKE "") + set(ADD_INPUT_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS AND CMAKE_GENERATOR MATCHES "Visual Studio") + set(ADD_INPUT_AS_HEADER_ONLY FALSE) + endif() + if(ADD_INPUT_AS_HEADER_ONLY) + list(APPEND HEADER_ONLY_LIKE "${NBL_NSC_REGISTERED_INPUT}") + endif() + if(NBL_NSC_OUT_FILES AND NOT CMAKE_CONFIGURATION_TYPES) + list(APPEND HEADER_ONLY_LIKE ${NBL_NSC_OUT_FILES}) + endif() + if(HEADER_ONLY_LIKE AND IMPL_HLSL_GLOB) + foreach(_HLSL_SOURCE IN LISTS IMPL_HLSL_GLOB) + list(REMOVE_ITEM HEADER_ONLY_LIKE "${_HLSL_SOURCE}") + endforeach() + endif() + if(HEADER_ONLY_LIKE) + list(REMOVE_DUPLICATES HEADER_ONLY_LIKE) + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + ) + endif() + set(ADD_CONFIG_AS_HEADER_ONLY TRUE) + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NBL_NSC_MAIN_DEPENDENCY STREQUAL "${NBL_NSC_UNITY_INPUT}") + set(ADD_CONFIG_AS_HEADER_ONLY FALSE) + endif() + endif() + if(ADD_CONFIG_AS_HEADER_ONLY) + target_sources(${IMPL_TARGET} PRIVATE "${NBL_NSC_UNITY_INPUT}") + set_source_files_properties("${NBL_NSC_UNITY_INPUT}" PROPERTIES + GENERATED TRUE + HEADER_FILE_ONLY ON + ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${NBL_NSC_UNITY_INPUT}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + if(NOT _NBL_DISABLE_CUSTOM_COMMANDS) + if(CMAKE_CONFIGURATION_TYPES) + foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + if(_CFG STREQUAL "") + continue() + endif() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${NBL_NSC_UNITY_HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endforeach() + else() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${NBL_NSC_UNITY_HASHED_KEY}") + set(TARGET_OUTPUT_IDE_PREPROCESSED "${TARGET_OUTPUT_IDE}.pre.hlsl") + if(NSC_CACHE_DIR) + file(RELATIVE_PATH TARGET_OUTPUT_IDE_REL "${IMPL_BINARY_DIR}" "${TARGET_OUTPUT_IDE}") + set(TARGET_OUTPUT_IDE_CACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${NBL_NSC_CACHE_ROOT}/${TARGET_OUTPUT_IDE_REL}.ppcache.pre") + else() + set(TARGET_OUTPUT_IDE_CACHE "${TARGET_OUTPUT_IDE}.ppcache") + set(TARGET_OUTPUT_IDE_PRECACHE "${TARGET_OUTPUT_IDE}.ppcache.pre") + endif() + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.dep") + endif() + if(NSC_SHADER_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_CACHE}") + endif() + set(ADD_PREPROCESSED_IDE TRUE) + if(NSC_PREPROCESS_CACHE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PRECACHE}") + if(ADD_PREPROCESSED_IDE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE_PREPROCESSED}") + endif() + endif() + list(REMOVE_DUPLICATES NBL_NSC_OUT_FILES_IDE) + target_sources(${IMPL_TARGET} PRIVATE ${NBL_NSC_OUT_FILES_IDE}) + set_source_files_properties(${NBL_NSC_OUT_FILES_IDE} PROPERTIES + HEADER_FILE_ONLY ON + GENERATED TRUE + ) + if(NSC_SHADER_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_CACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + endif() + if(NSC_PREPROCESS_CACHE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PRECACHE}" PROPERTIES HEADER_FILE_ONLY OFF) + if(ADD_PREPROCESSED_IDE) + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES HEADER_FILE_ONLY ON) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties("${TARGET_OUTPUT_IDE_PREPROCESSED}" PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() + endif() + endif() + source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endif() + endif() + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${NBL_NSC_REGISTERED_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${NBL_NSC_UNITY_INPUT}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + endif() + endforeach() unset(KEYS) @@ -1605,17 +3037,57 @@ namespace @IMPL_NAMESPACE@ { list(APPEND KEYS ${ACCESS_KEY}) endforeach() - source_group("${IN}" FILES ${CONFIGS} ${INPUTS}) + source_group("${IN}/autogen" FILES ${CONFIGS}) + source_group("${IN}" FILES ${INPUTS}) + if(IMPL_HLSL_GLOB AND INPUTS) + set(_NBL_INPUTS_ABS "") + foreach(_IN_FILE IN LISTS INPUTS) + get_filename_component(_IN_ABS "${_IN_FILE}" ABSOLUTE) + string(TOLOWER "${_IN_ABS}" _IN_ABS_LOWER) + list(APPEND _NBL_INPUTS_ABS "${_IN_ABS_LOWER}") + endforeach() + set(_NBL_HLSL_FILTERED "") + foreach(_HLSL_FILE IN LISTS IMPL_HLSL_GLOB) + get_filename_component(_HLSL_ABS "${_HLSL_FILE}" ABSOLUTE) + string(TOLOWER "${_HLSL_ABS}" _HLSL_ABS_LOWER) + list(FIND _NBL_INPUTS_ABS "${_HLSL_ABS_LOWER}" _HLSL_INDEX) + if(_HLSL_INDEX EQUAL -1) + list(APPEND _NBL_HLSL_FILTERED "${_HLSL_FILE}") + endif() + endforeach() + set(IMPL_HLSL_GLOB "${_NBL_HLSL_FILTERED}") + endif() if(IMPL_HLSL_GLOB) target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None ) + if(CMAKE_GENERATOR MATCHES "Visual Studio") + set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES + VS_EXCLUDED_FROM_BUILD TRUE + VS_TOOL_OVERRIDE "None" + ) + endif() source_group("HLSL Files" FILES ${IMPL_HLSL_GLOB}) endif() set(${IMPL_OUTPUT_VAR} ${KEYS} PARENT_SCOPE) + if(IMPL_EXPORT_RULES) + set(${IMPL_EXPORT_RULES}_COUNT "${_NBL_EXPORT_RULE_INDEX}" PARENT_SCOPE) + if(_NBL_EXPORT_RULE_INDEX GREATER 0) + math(EXPR _NBL_EXPORT_LAST "${_NBL_EXPORT_RULE_INDEX} - 1") + foreach(_NBL_EXPORT_IDX RANGE 0 ${_NBL_EXPORT_LAST}) + set(${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_IDX} ${${IMPL_EXPORT_RULES}_COMMAND_${_NBL_EXPORT_IDX}} PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_OUTPUT_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_LOG_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_CACHE_SHADER_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_CACHE_PREPROCESS_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_PREPROCESSED_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_DEPFILE_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + set(${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_IDX} "${${IMPL_EXPORT_RULES}_REPORT_${_NBL_EXPORT_IDX}}" PARENT_SCOPE) + endforeach() + endif() + endif() endfunction() function(NBL_CREATE_RESOURCE_ARCHIVE) diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 400aff5eb7..141f832e59 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -41,16 +41,27 @@ Runtime compilation is still useful for prototyping, but (assuming you don't use For each registered input it generates: - One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). -- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. +- A matching `.spv.hash` sidecar for fast up-to-date checks on cache hits. +- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each (or a single `.spv` in `UNITY_BUILD` mode; see below). - A generated header (you choose the path via `INCLUDE`) containing: - - a primary template `get_spirv_key(limits, features)` and `get_spirv_key(device)` +- a primary template `get_spirv_key(...args)` and `get_spirv_key(device, ...args)` +- `get_spirv_key` returns a small owning buffer; use `.view()` or implicit `std::string_view` to consume it +- arguments must follow the **kind order** as it appears in `CAPS` (first appearance), validated structurally by required member names/types for each kind (including `limits`/`features`, no strong typing) + - `get_spirv_key(device, ...)` expects only **non-device** kinds in that same order; `limits`/`features` are injected from the device + - note: an order-agnostic API would require enforcing unique member sets across kinds to guarantee unambiguous matching; we keep a conventional order instead to stay flexible without extra constraints - explicit specializations for each registered base `KEY` - the returned key already includes the build config prefix (compiled into the header). -Keys are strings that match the output layout: +Keys are hashed to keep filenames short and stable across long permutation strings. The **full key string** is built as: ``` -/(._)(._)....spv +__._._...spv +``` + +Then `FNV-1a 64-bit` is computed from that full key (no `` prefix), and the **final output key** is: + +``` +/.spv ``` ## The JSON "INPUTS" format @@ -96,6 +107,69 @@ By default `NBL_CREATE_NSC_COMPILE_RULES` also collects `*.hlsl` files for IDE v - `GLOB_DIR` (optional): root directory for the default `*.hlsl` scan. - `DISCARD_DEFAULT_GLOB` (flag): disables the default scan and IDE grouping. +## Cache layers (SPIR-V + preprocess) + +There are three independent cache layers: + +- `NSC_SHADER_CACHE` (default `ON`) -> SPIR-V cache (`.spv.ppcache`) for full compilation results. +- `NSC_SHADER_CACHE_COMPRESSION` (default `raw`) -> compression used for shader cache entries (`raw` or `lzma`). +- `NSC_PREPROCESS_CACHE` (default `ON`) -> preprocessor prefix cache (`.spv.ppcache.pre`) to avoid repeating Boost.Wave include work when only the main shader changes. +- `NSC_PREPROCESS_PREAMBLE` (default `ON`) -> preamble mode: reuse cached preprocessed prefix + macro state and run Wave only on the body, then compile without re-lexing the prefix. +- All layers are used only for compilation (not `-P` preprocess-only runs). +- When preprocess cache is enabled and used, NSC also writes a combined preprocessed view (`.spv.pre.hlsl`) next to the outputs. + - This file is the exact input fed to DXC on the preprocess-cache path, so it's ready to paste into Godbolt for repros (use the same flags/includes). + +With `-verbose`, `.log` shows: + +- `Shader Cache: ` and `Cache hit!/miss! ...` for SPIR-V cache. +- `Preprocess cache: ` and `Preprocess cache hit!/miss! ...` for the prefix cache. +- Timing lines (performance): + - `Shader cache load took: ...` + - `Shader cache validate took: ...` + - `Shader cache lookup took: ...` + - `Shader cache write took: ...` (only when deps metadata changed on hit) + - `Preprocess cache lookup took: ...` + - `Total cache probe took: ...` + - `Preamble body preprocess took: ...` (only when preamble mode is used) + - `Preprocess took: ...` (only on compile path) + - `Compile took: ...` (only on compile path) + - `Total build time: ...` (preprocess + compile) + - `Write output took: ...` (only when output file is written) + - `Total took: ...` (overall tool runtime) + +When `-nbl-report` is provided, NSC also writes a JSON report containing: + +- `shader_cache` (hit/lookup/load/validate times) +- `preprocess_cache` (hit/lookup status) +- `preamble` (used + body/prefix timings) +- `compile` and `preprocess` timings +- `output` + `depfile` metadata +- `total_ms` and `total_with_output_ms` + +You can also toggle layers directly on the `nsc` CLI: + +- `-nbl-shader-cache` +- `-nbl-shader-cache-compression ` +- `-nbl-preprocess-cache` +- `-nbl-preprocess-preamble` +- `-nbl-stdout-log` (mirror the log file output to stdout) +- `-nbl-report ` (write a JSON report with cache hits, timings, and output metadata) + +Related CMake options: + +- `NSC_PREPROCESS_PREAMBLE` (default `ON`) +- `NSC_STDOUT_LOG` (default `OFF`) +- `NSC_SHADER_CACHE_COMPRESSION` (default `raw`) + +You can redirect the caches into a shared directory with: + +- `NSC_CACHE_DIR` (path). The cache files keep the same relative layout as `BINARY_DIR` (including `/`), but live under the given root. This is handy for CI or persistent cache volumes. + +The preprocess cache key is based on the **prefix** of the input file (leading directives/comments plus forced includes), and cache validity is checked against include dependency hashes. That means: + +- edits to the shader body still hit (fast path) +- changes to prefix directives, forced-includes, or included headers cause a cold run + ## Minimal usage (no permutations) Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): @@ -133,11 +207,12 @@ Then include the generated header and use the key to load the SPIR-V: ```cpp #include "nbl/this_example/builtin/build/spirv/keys.hpp" // ... -auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); -auto bundle = assetMgr->getAsset(key.c_str(), loadParams); +auto keyBuf = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +std::string_view key = keyBuf; +auto bundle = assetMgr->getAsset(key.data(), loadParams); ``` -`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). These are already hashed (e.g. `Debug/123456789.spv`) and are intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. In `UNITY_BUILD` mode this list contains one entry per base `KEY` (all permutations share a single `.spv`). ## Permutations via `CAPS` @@ -145,17 +220,134 @@ auto bundle = assetMgr->getAsset(key.c_str(), loadParams); Each `CAPS` entry looks like: -- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid). +- `kind` (string, optional): `"limits"`, `"features"`, or `"custom"` (defaults to `"limits"` if omitted/invalid). +- `struct` (string, required for `kind="custom"`): name of the custom permutation struct (valid C/C++ identifier). If you use `limits` or `features` here, do not also use the built-in `limits`/`features` kinds in the same rule. - `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). -- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. +- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`, `int16_t`, `int32_t`, `int64_t`, `float`, `double`. - `values` (array of numbers, required): the values you want to prebuild. - for `bool`, values must be `0` or `1`. + - for signed integer types, negative values are allowed. + - for `float`/`double`, you can provide **numbers or numeric strings** (e.g. `-1`, `-1.0`, `1e-3`, or `-1.f` for floats). Values are **normalized** to canonical scientific notation (1 digit before the decimal, 8 digits after for `float` or 16 for `double`, signed exponent with 2 or 3 digits). The normalized text becomes part of the key. + +At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the structs you pass in for `limits`/`features` (duck-typed by required members) and any custom kinds. Each group starts with `__limits`, `__features`, or `__`, followed by `.member_` entries. Group order follows the **first appearance of each kind in `CAPS`** (and this same order is the required argument order for `get_spirv_key`); groups with no members are omitted. + +## Unity build for permutations (single `.spv`) + +`UNITY_BUILD` is an optional mode for `NBL_CREATE_NSC_COMPILE_RULES` that emits a **single `.spv`** per input `KEY`, while still supporting all permutation structs. It works by compiling a single HLSL unit that includes your input multiple times, once per permutation, and **renames each entrypoint** to a unique mangled name. + +Usage: + +```cmake +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} -T lib_6_8 + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + UNITY_BUILD + ENTRYPOINTS entryA entryB +) +``` + +Constraints: + +- `UNITY_BUILD` **requires a `lib_*` profile** (e.g. `-T lib_6_8`). Non-lib profiles require a single `-E` entrypoint and cannot host multiple entrypoints in one `.spv`. +- `ENTRYPOINTS` is **mandatory** in `UNITY_BUILD`. The names must be valid C identifiers. +- `UNITY_BUILD` does not allow `-E` options; entrypoints are taken from `ENTRYPOINTS`. +- Your input file must be safe to include multiple times (no `#pragma once` or include guards on the main input). +- Per-permutation macros must not alter guarded includes. Permutations are intended to be consumed via `DeviceConfigCaps`, not by redefining macros that affect heavy headers. + +How it works: + +- For each permutation, NSC auto-generates a wrapper block: + - creates a unique `DeviceConfigCaps__nbl_p` with that permutation's values + - `#define`s `DeviceConfigCaps` and each entrypoint name to a mangled symbol + - `#include`s the original input file + - `#undef`s the aliases +- Mangled entrypoint name is: + +``` +__nbl_p +``` + +`` is `FNV-1a 64-bit` of the **full permutation key string** (the same string used for hashing outputs in normal mode). + +Runtime usage: + +```cpp +auto keyBuf = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +auto entry = nbl::this_example::builtin::build::get_spirv_entrypoint<"shader", "entryA">(device); +// load .spv by key, pick entrypoint by name +``` + +Notes: + +- `get_spirv_key` returns the **file key** (same for all permutations in unity mode). +- `get_spirv_entrypoint` returns the **mangled entrypoint** for the current permutation. +- `get_spirv_entrypoint` uses the same `KEY` and permutation args as `get_spirv_key`. +- If you need different entrypoint lists per input, use separate `NBL_CREATE_NSC_COMPILE_RULES` calls. + +Each generated `.config` file defines a `DeviceConfigCaps` struct for HLSL. It includes: +- flat members for `limits`/`features` (backwards compatibility with older shaders) +- nested structs for custom kinds only, e.g. `DeviceConfigCaps::userA` + +Example shape: + +```hlsl +struct DeviceConfigCaps +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t maxImageDimension2D = 16384u; + NBL_CONSTEXPR_STATIC_INLINE bool shaderCullDistance = true; + + struct userA + { + NBL_CONSTEXPR_STATIC_INLINE uint32_t mode = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t quality = 1u; + }; +}; +``` + +For more complex usage and regression-style checks (constexpr vs runtime, hashing, mixed payloads), see `examples_tests/73_SpirvKeysTest`. + +### Grouping caps by kind (optional) -At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in. +To avoid repeating the same `kind`, you can group caps with `members`: + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "CAPS": [ + { + "kind": "custom", + "struct": "userA", + "members": [ + { "name": "mode", "type": "uint32_t", "values": [0, 1] }, + { "name": "quality", "type": "uint32_t", "values": [1, 2, 4] } + ] + }, + { + "kind": "features", + "members": [ + { "name": "shaderFloat64", "type": "bool", "values": [0, 1] } + ] + } + ] + } +] +]=]) +``` ### Example: mixing `limits` and `features` -This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order): +This example permutes over one device limit and one device feature. Suffix order follows the `CAPS` order (`__limits` then `__features` here), and member order within each group follows the `CAPS` order for that group: ```cmake set(JSON [=[ @@ -190,11 +382,79 @@ NBL_CREATE_NSC_COMPILE_RULES( ) ``` +## Custom permutation structs + +If you need permutations based on data outside of device `limits`/`features`, define a custom struct in C++ and use `kind: "custom"` with `struct` set to the parameter name. At runtime you can pass any struct type that exposes the required members with matching types; **argument order follows the `CAPS` kind order**. Using custom names `limits` or `features` is allowed, but you cannot mix them with the built-in `limits`/`features` kinds in the same rule. + +Example: + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/fft.hlsl", + "KEY": "fft", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "CAPS": [ + { + "kind": "custom", + "struct": "fftConfig", + "name": "passCount", + "type": "uint32_t", + "values": [4, 8] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + # ... + OUTPUT_VAR KEYS + INPUTS ${JSON} +) +``` + +Runtime usage: + +```cpp +nbl::this_example::FFTConfig cfg = {}; +cfg.passCount = 4; +auto key = nbl::this_example::builtin::build::get_spirv_key<"fft">(device, cfg); +``` + +Constexpr usage with extra structs (order must match `CAPS` kind order, first appearance): + +```cpp +struct MyLimits { uint32_t maxImageDimension2D; }; +struct MyFeatures { bool shaderCullDistance; }; +struct UserA { uint32_t mode; uint32_t quality; }; +struct UserB { bool useAlternatePath; bool useFastPath; }; + +constexpr UserA userA = { 0u, 1u }; +constexpr UserB userB = { false, true }; +constexpr MyLimits limits = { 16384u }; +constexpr MyFeatures features = { true }; + +static constexpr auto keyBuf = + nbl::this_example::builtin::build::get_spirv_key<"shader_cd">(userA, userB, limits, features); +static constexpr std::string_view keyView = keyBuf; + +``` + +## Common pitfalls + +- Argument order must follow the **first appearance of each kind in `CAPS`**; this is an intentional convention to keep the API flexible. +- `get_spirv_key` returns a buffer; prefer `std::string_view key = buf;` or `buf.view()` to consume it. +- Do not store a `std::string_view` from a temporary buffer; keep the buffer alive. +- `float`/`double` CAP values are normalized to canonical scientific notation (1 digit before the decimal, 8 or 16 digits after, signed exponent); values passed to `get_spirv_key` must match one of the CAP values exactly. +- `constexpr` key generation works with `float`/`double` members when the values match the CAP list. + This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): ``` -Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv -Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv +Debug/6014683721143225910.spv +Debug/10493750182651038558.spv ... ``` diff --git a/examples_tests b/examples_tests index 671d1f16b0..b6ee9c8971 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 671d1f16b0837a70c3016c2472864528f35db0bc +Subproject commit b6ee9c897111dd6dea8479edaed249e5459a9f29 diff --git a/include/nbl/asset/utils/CHLSLCompiler.h b/include/nbl/asset/utils/CHLSLCompiler.h index 92a1dca394..87c496a1b2 100644 --- a/include/nbl/asset/utils/CHLSLCompiler.h +++ b/include/nbl/asset/utils/CHLSLCompiler.h @@ -33,6 +33,8 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler struct SOptions : IShaderCompiler::SCompilerOptions { std::span dxcOptions; // TODO: span is a VIEW to memory, so to something which we should treat immutable - why not span of string_view then? Since its span we force users to keep those std::strings alive anyway but now we cannnot even make nice constexpr & pass such expression here directly + std::span dxcCompileFlagsOverride = {}; + bool assumePreprocessed = false; IShader::E_CONTENT_TYPE getCodeContentType() const override { return IShader::E_CONTENT_TYPE::ECT_HLSL; }; }; @@ -51,7 +53,7 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler //} std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies = nullptr) const override; - std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies = nullptr) const; + std::string preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies = nullptr, std::vector* macro_defs = nullptr) const; void insertIntoStart(std::string& code, std::ostringstream&& ins) const override; @@ -118,6 +120,9 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler return std::span(RequiredArguments); } + protected: + bool preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const override; + protected: // This can't be a unique_ptr due to it being an undefined type // when Nabla is used as a lib @@ -155,4 +160,4 @@ class NBL_API2 CHLSLCompiler final : public IShaderCompiler #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 9fd4eee833..1b30055b30 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -33,9 +33,11 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { system::path absolutePath = {}; std::string contents = {}; - core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future - // Could be used in the future for early rejection of cache hit - //nbl::system::IFileBase::time_point_t lastWriteTime = {}; + core::blake3_hash_t hash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + bool hasFileInfo = false; + bool hasHash = false; explicit inline operator bool() const {return !absolutePath.empty();} }; @@ -87,6 +89,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted IIncludeLoader::found_t getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName) const; inline core::smart_refctd_ptr getDefaultFileSystemLoader() const { return m_defaultFileSystemLoader; } + inline system::ISystem* getSystem() const { return m_system.get(); } void addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader); @@ -106,6 +109,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::vector m_loaders; std::vector> m_generators; core::smart_refctd_ptr m_defaultFileSystemLoader; + core::smart_refctd_ptr m_system; }; // @@ -118,6 +122,60 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::string_view definition; }; + // Forward declaration for dependency access. + struct CCache; + + struct SPreprocessingDependency + { + public: + // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this + inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash, + const system::path& _absolutePath = {}, uint64_t _fileSize = 0, int64_t _lastWriteTime = 0, bool _hasFileInfo = false) : + requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash), + absolutePath(_absolutePath), fileSize(_fileSize), lastWriteTime(_lastWriteTime), hasFileInfo(_hasFileInfo) + {} + + inline SPreprocessingDependency(SPreprocessingDependency&) = default; + inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete; + inline SPreprocessingDependency(SPreprocessingDependency&&) = default; + inline SPreprocessingDependency& operator=(SPreprocessingDependency&&) = default; + + // Needed for json vector serialization. Making it private and declaring from_json(_, SEntry&) as friend didn't work + inline SPreprocessingDependency() {} + + inline const system::path& getRequestingSourceDir() const { return requestingSourceDir; } + inline std::string_view getIdentifier() const { return identifier; } + inline bool isStandardInclude() const { return standardInclude; } + inline const core::blake3_hash_t& getHash() const { return hash; } + inline const system::path& getAbsolutePath() const { return absolutePath; } + inline uint64_t getFileSize() const { return fileSize; } + inline int64_t getLastWriteTime() const { return lastWriteTime; } + inline bool getHasFileInfo() const { return hasFileInfo; } + inline void setFileInfo(uint64_t size, int64_t timeTicks, bool hasInfo) const + { + fileSize = size; + lastWriteTime = timeTicks; + hasFileInfo = hasInfo; + } + + private: + friend void to_json(nlohmann::json& j, const SPreprocessingDependency& dependency); + friend void from_json(const nlohmann::json& j, SPreprocessingDependency& dependency); + friend class CCache; + + // path or identifier + system::path requestingSourceDir = ""; + std::string identifier = ""; + // hash of the contents - used to check against a found_t + core::blake3_hash_t hash = {}; + // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` + bool standardInclude = false; + system::path absolutePath = {}; + mutable uint64_t fileSize = 0; + mutable int64_t lastWriteTime = 0; + mutable bool hasFileInfo = false; + }; + // using E_SPIRV_VERSION = nbl::hlsl::SpirvVersion; @@ -136,6 +194,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted system::logger_opt_ptr logger = nullptr; const CIncludeFinder* includeFinder = nullptr; std::span extraDefines = {}; + std::span forceIncludes = {}; + std::string_view codeForCache = {}; + bool applyForceIncludes = true; + bool preserveComments = true; + bool emitLineDirectives = true; + bool emitPragmaDirectives = true; + bool fastSafeValidation = false; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; bool depfile = false; system::path depfilePath = {}; @@ -153,7 +218,6 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted }; // Forward declaration for SCompilerOptions use - struct CCache; /* @stage shaderStage, can be ESS_ALL_OR_LIBRARY to make multi-entrypoint shaders @targetSpirvVersion spirv version @@ -185,56 +249,34 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted SPreprocessorOptions preprocessorOptions = {}; CCache* readCache = nullptr; CCache* writeCache = nullptr; + bool* cacheHit = nullptr; + const std::vector* dependencyOverrides = nullptr; }; + static std::string applyForceIncludes(std::string_view code, std::span forceIncludes); + static bool probeShaderCache(const CCache* cache, std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder); + class CCache final : public IReferenceCounted { friend class IShaderCompiler; public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.1.0"; + constexpr static inline std::string_view VERSION = "1.2.7"; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 + enum class ECompression : uint8_t + { + LZMA = 0u, + RAW = 1u + }; + struct SEntry { friend class CCache; - struct SPreprocessingDependency - { - public: - // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this - inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) : - requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash) - {} - - inline SPreprocessingDependency(SPreprocessingDependency&) = default; - inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete; - inline SPreprocessingDependency(SPreprocessingDependency&&) = default; - inline SPreprocessingDependency& operator=(SPreprocessingDependency&&) = default; - - // Needed for json vector serialization. Making it private and declaring from_json(_, SEntry&) as friend didn't work - inline SPreprocessingDependency() {} - - inline const system::path& getRequestingSourceDir() const { return requestingSourceDir; } - inline std::string_view getIdentifier() const { return identifier; } - inline bool isStandardInclude() const { return standardInclude; } - - private: - friend void to_json(nlohmann::json& j, const SEntry::SPreprocessingDependency& dependency); - friend void from_json(const nlohmann::json& j, SEntry::SPreprocessingDependency& dependency); - friend class CCache; - - // path or identifier - system::path requestingSourceDir = ""; - std::string identifier = ""; - // hash of the contents - used to check against a found_t - core::blake3_hash_t hash = {}; - // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` - bool standardInclude = false; - - }; + using SPreprocessingDependency = IShaderCompiler::SPreprocessingDependency; struct SCompilerArgs; // Forward declaration for SPreprocessorArgs's friend declaration @@ -251,6 +293,11 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted if (definesIt->identifier != otherDefinesIt->identifier || definesIt->definition != otherDefinesIt->definition) return false; + if (forceIncludes != other.forceIncludes) return false; + if (preserveComments != other.preserveComments) return false; + if (emitLineDirectives != other.emitLineDirectives) return false; + if (emitPragmaDirectives != other.emitPragmaDirectives) return false; + return true; } @@ -275,11 +322,22 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted for (auto define : options.extraDefines) extraDefines.emplace_back(std::string(define.identifier), std::string(define.definition)); + for (const auto& inc : options.forceIncludes) + forceIncludes.emplace_back(inc); + + preserveComments = options.preserveComments; + emitLineDirectives = options.emitLineDirectives; + emitPragmaDirectives = options.emitPragmaDirectives; + // Sort them so equality and hashing are well defined std::sort(extraDefines.begin(), extraDefines.end(), [](const SMacroDefinition& lhs, const SMacroDefinition& rhs) {return lhs.identifier < rhs.identifier; }); }; std::string sourceIdentifier; std::vector extraDefines; + std::vector forceIncludes; + bool preserveComments = true; + bool emitLineDirectives = true; + bool emitPragmaDirectives = true; }; // TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and decompressShader struct SCompilerArgs final @@ -339,7 +397,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline SEntry(const std::string_view _mainFileContents, const SCompilerOptions& compilerOptions) : mainFileContents(std::move(std::string(_mainFileContents))), compilerArgs(compilerOptions) { // Form the hashable for the compiler data - size_t preprocessorArgsHashableSize = compilerArgs.preprocessorArgs.sourceIdentifier.size() + compilerArgs.preprocessorArgs.extraDefines.size() * sizeof(SMacroDefinition); + size_t preprocessorArgsHashableSize = compilerArgs.preprocessorArgs.sourceIdentifier.size() + compilerArgs.preprocessorArgs.extraDefines.size() * sizeof(SMacroDefinition) + 3u; size_t compilerArgsHashableSize = sizeof(compilerArgs.stage) + sizeof(compilerArgs.targetSpirvVersion) + sizeof(compilerArgs.debugInfoFlags.value) + compilerArgs.optimizerPasses.size(); std::vector hashable; hashable.reserve(preprocessorArgsHashableSize + compilerArgsHashableSize + mainFileContents.size()); @@ -351,6 +409,11 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted hashable.insert(hashable.end(), defines.identifier.begin(), defines.identifier.end()); hashable.insert(hashable.end(), defines.definition.begin(), defines.definition.end()); } + for (const auto& inc : compilerArgs.preprocessorArgs.forceIncludes) + hashable.insert(hashable.end(), inc.begin(), inc.end()); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.preserveComments)); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.emitLineDirectives)); + hashable.push_back(static_cast(compilerArgs.preprocessorArgs.emitPragmaDirectives)); // Insert rest of stuff from this struct. We're going to treat stage, targetSpirvVersion and debugInfoFlags.value as byte arrays for simplicity hashable.insert(hashable.end(), reinterpret_cast(&compilerArgs.stage), reinterpret_cast(&compilerArgs.stage) + sizeof(compilerArgs.stage)); @@ -376,7 +439,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline SEntry(const SEntry& other) : mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash), lookupHash(other.lookupHash), dependencies(other.dependencies), spirv(other.spirv), - uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize) {} + uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize), + compression(other.compression) {} inline SEntry& operator=(SEntry& other) = delete; inline SEntry(SEntry&& other) = default; @@ -396,10 +460,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted core::smart_refctd_ptr spirv; core::blake3_hash_t uncompressedContentHash; size_t uncompressedSize; + ECompression compression = ECompression::LZMA; }; inline void insert(SEntry&& entry) { + if (auto found = m_container.find(entry); found != m_container.end()) + m_container.erase(found); m_container.insert(std::move(entry)); } @@ -416,16 +483,30 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted auto retVal = core::make_smart_refctd_ptr(); for (auto& entry : m_container) retVal->m_container.emplace(entry); + retVal->m_defaultCompression = m_defaultCompression; return retVal; } - NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder) const; + inline void setDefaultCompression(ECompression compression) + { + m_defaultCompression = compression; + } + + inline ECompression getDefaultCompression() const + { + return m_defaultCompression; + } + + NBL_API2 core::smart_refctd_ptr find(const SEntry& mainFile, const CIncludeFinder* finder, bool fastSafeValidation = false) const; + NBL_API2 bool contains(const SEntry& mainFile, const CIncludeFinder* finder, bool fastSafeValidation = false) const; + NBL_API2 bool findEntryForCode(std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies = true, bool* depsUpdated = nullptr, bool fastSafeValidation = false) const; + NBL_API2 core::smart_refctd_ptr decompressEntry(const SEntry& entry) const; inline CCache() {} // De/serialization methods NBL_API2 core::smart_refctd_ptr serialize() const; - NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache); + NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache, bool skipDependencies = false); private: // we only do lookups based on main file contents + compiler options @@ -449,22 +530,102 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted using EntrySet = core::unordered_set; EntrySet m_container; + ECompression m_defaultCompression = ECompression::LZMA; - NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; + NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const; }; - struct DepfileWriteParams + class CPreprocessCache final : public IReferenceCounted { - system::ISystem* system = nullptr; - std::string_view depfilePath = {}; - std::string_view outputPath = {}; - std::string_view sourceIdentifier = {}; - system::path workingDirectory = {}; + public: + constexpr static inline std::string_view VERSION = "2.3"; + + struct SEntry + { + core::blake3_hash_t prefixHash = {}; + std::string preprocessedPrefix; + std::vector macroDefs; + std::string macroBlock; + std::vector dxcFlags; + uint32_t pragmaStage = static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN); + CCache::SEntry::dependency_container_t dependencies; + }; + + enum class ELoadStatus : uint8_t + { + Missing, + Invalid, + Loaded + }; + + enum class EProbeStatus : uint8_t + { + Hit, + NoPrefix, + Missing, + Invalid, + PrefixChanged, + DependenciesChanged, + EntryInvalid + }; + + struct SProbeResult + { + std::string codeStorage; + std::string_view prefix = {}; + std::string_view body = {}; + core::blake3_hash_t prefixHash = {}; + EProbeStatus status = EProbeStatus::EntryInvalid; + bool hasPrefix = false; + bool cacheHit = false; + bool depsUpdated = false; + }; + + inline bool hasEntry() const { return m_hasEntry; } + inline const SEntry& getEntry() const { return m_entry; } + inline void setEntry(SEntry&& entry) + { + m_entry = std::move(entry); + m_hasEntry = true; + m_prefixLoaded = true; + m_backingPath.clear(); + m_prefixOffset = 0; + m_prefixSize = 0; + } + + NBL_API2 core::smart_refctd_ptr serialize() const; + NBL_API2 static core::smart_refctd_ptr deserialize(const std::span serializedCache); + NBL_API2 static core::smart_refctd_ptr loadFromFile(const system::path& path, ELoadStatus& status, bool loadPrefix = true); + NBL_API2 static bool writeToFile(const system::path& path, const CPreprocessCache& cache); + NBL_API2 static SProbeResult probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions); + NBL_API2 static const char* getProbeReason(EProbeStatus status); + NBL_API2 bool validateDependencies(const CIncludeFinder* finder, bool* depsUpdated = nullptr, bool fastSafeValidation = false) const; + NBL_API2 std::string buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const; + + private: + void ensurePrefixLoaded() const; + + bool m_hasEntry = false; + mutable SEntry m_entry; + mutable system::path m_backingPath; + mutable uint64_t m_prefixOffset = 0; + mutable uint32_t m_prefixSize = 0; + mutable bool m_prefixLoaded = true; }; - static bool writeDepfile(const DepfileWriteParams& params, const CCache::SEntry::dependency_container_t& dependencies, const CIncludeFinder* includeFinder = nullptr, system::logger_opt_ptr logger = nullptr); + struct SPreprocessCacheResult + { + bool ok = true; + bool cacheUsed = false; + bool cacheHit = false; + bool cacheUpdated = false; + CPreprocessCache::EProbeStatus status = CPreprocessCache::EProbeStatus::EntryInvalid; + IShader::E_SHADER_STAGE stage = IShader::E_SHADER_STAGE::ESS_UNKNOWN; + std::string code; + }; core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; + SPreprocessCacheResult preprocessWithCache(std::string_view code, IShader::E_SHADER_STAGE stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache& cache, CPreprocessCache::ELoadStatus loadStatus, std::string_view sourceIdentifier) const; inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const { @@ -593,6 +754,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted virtual void insertIntoStart(std::string& code, std::ostringstream&& ins) const = 0; virtual core::smart_refctd_ptr compileToSPIRV_impl(const std::string_view code, const SCompilerOptions& options, std::vector* dependencies) const = 0; + virtual bool preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const; core::smart_refctd_ptr m_system; diff --git a/include/nbl/core/hash/fnv1a64.h b/include/nbl/core/hash/fnv1a64.h new file mode 100644 index 0000000000..96f5315fbb --- /dev/null +++ b/include/nbl/core/hash/fnv1a64.h @@ -0,0 +1,28 @@ +// Copyright (C) 2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_CORE_FNV1A64_H_INCLUDED_ +#define _NBL_CORE_FNV1A64_H_INCLUDED_ + +#include +#include +#include + +namespace nbl::core +{ + +// FNV-1a 64-bit hash. +constexpr uint64_t FNV1a_64(std::string_view sv) +{ + uint64_t h = 14695981039346656037ull; + for (unsigned char c : sv) + { + h ^= c; + h *= 1099511628211ull; + } + return h; +} + +} + +#endif // _NBL_CORE_FNV1A64_H_INCLUDED_ diff --git a/include/nbl/core/string/SpirvKeyHelpers.h b/include/nbl/core/string/SpirvKeyHelpers.h new file mode 100644 index 0000000000..51939f255d --- /dev/null +++ b/include/nbl/core/string/SpirvKeyHelpers.h @@ -0,0 +1,87 @@ +#ifndef _NBL_CORE_STRING_SPIRV_KEY_HELPERS_H_INCLUDED_ +#define _NBL_CORE_STRING_SPIRV_KEY_HELPERS_H_INCLUDED_ + +#include +#include +#include +#include + +#include "nbl/core/string/StringLiteral.h" + +namespace nbl::core::detail +{ + +template +struct SpirvKeyBuilderMissing : std::false_type {}; + +template +struct SpirvKeyBuilder +{ + template + static constexpr void build(const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } +}; + +template +struct SpirvFileKeyBuilder +{ + template + static constexpr auto build(const Args&... args) + { + return SpirvKeyBuilder::build(args...); + } + + template + static constexpr auto build_from_device(const Device* device, const Args&... args) + { + return SpirvKeyBuilder::build_from_device(device, args...); + } +}; + +template +struct SpirvEntrypointBuilder +{ + template + static constexpr void build(const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } + + template + static constexpr void build_from_device(const Device*, const Args&...) + { + static_assert(SpirvKeyBuilderMissing::value, "Unknown SPIR-V key"); + } +}; + +template +concept spirv_device_has_limits = requires(const Device* device) +{ + device->getPhysicalDevice()->getLimits(); +}; + +template +concept spirv_device_has_features = requires(const Device* device) +{ + device->getEnabledFeatures(); +}; + +template +constexpr decltype(auto) spirv_device_get_limits(const Device* device) +{ + static_assert(spirv_device_has_limits, "Device does not provide getLimits"); + return device->getPhysicalDevice()->getLimits(); +} + +template +constexpr decltype(auto) spirv_device_get_features(const Device* device) +{ + static_assert(spirv_device_has_features, "Device does not provide getEnabledFeatures"); + return device->getEnabledFeatures(); +} + +} + +#endif diff --git a/include/nbl/core/string/StringLiteral.h b/include/nbl/core/string/StringLiteral.h index ebbed673f6..d48ebce7a5 100644 --- a/include/nbl/core/string/StringLiteral.h +++ b/include/nbl/core/string/StringLiteral.h @@ -5,6 +5,15 @@ #define _NBL_CORE_STRING_LITERAL_H_INCLUDED_ #include +#include +#include +#include +#include +#include +#include +#include + +#include "nbl/core/hash/fnv1a64.h" namespace nbl::core { @@ -25,4 +34,289 @@ struct StringLiteral // for compatibility's sake #define NBL_CORE_UNIQUE_STRING_LITERAL_TYPE(STRING_LITERAL) nbl::core::StringLiteral(STRING_LITERAL) +namespace nbl::core::detail +{ + +template +struct StringLiteralBufferType +{ + using type = void; +}; + +template +class StringLiteralBuffer +{ +public: + constexpr void append(char c) + { + if (!ensure_capacity(1)) + return; + b[n++] = c; + } + constexpr void append(std::string_view sv) + { + if (!ensure_capacity(sv.size())) + return; + for (char c : sv) + b[n++] = c; + } + constexpr void append(const char* s) + { + for (; *s; ++s) + append(*s); + } + + constexpr std::string_view view() const { return { b.data(), n }; } + constexpr operator std::string_view() const { return view(); } + constexpr const char* data() const { return b.data(); } + constexpr size_t size() const { return n; } + +private: + constexpr bool ensure_capacity(size_t add) + { + if (n + add <= Cap) + return true; + if (std::is_constant_evaluated()) + throw "overflow"; + assert(false && "StringLiteralBuffer overflow"); + return false; + } + + std::array b{}; + size_t n = 0; +}; + +template +constexpr std::string_view to_string_view(const StringLiteralBuffer& v) +{ + return v.view(); +} + +template +constexpr void append_uint_padded(Out& o, unsigned value, int width) +{ + char buf[16]; + int len = 0; + do + { + buf[len++] = static_cast('0' + (value % 10u)); + value /= 10u; + } while (value); + while (len < width) + buf[len++] = '0'; + for (int i = len - 1; i >= 0; --i) + o.append(buf[i]); +} + +template +constexpr void append_float_scientific(Out& o, T v) +{ + using Limits = std::numeric_limits; + constexpr int precision = Limits::max_digits10 - 1; + if (v != v) + { + assert(false && "StringLiteralBuffer float format failed"); + return; + } + if constexpr (Limits::has_infinity) + { + if (v == Limits::infinity() || v == -Limits::infinity()) + { + assert(false && "StringLiteralBuffer float format failed"); + return; + } + } + if (v < T(0)) + { + o.append('-'); + v = -v; + } + if (v == T(0)) + { + o.append('0'); + o.append('.'); + for (int i = 0; i < precision; ++i) + o.append('0'); + o.append('e'); + o.append('+'); + append_uint_padded(o, 0u, 2); + return; + } + + long double m = static_cast(v); + int exp10 = 0; + while (m >= 10.0L) + { + m /= 10.0L; + ++exp10; + } + while (m < 1.0L) + { + m *= 10.0L; + --exp10; + } + + std::array digits{}; + digits[0] = static_cast(m); + long double frac = m - static_cast(digits[0]); + for (int i = 1; i <= precision; ++i) + { + frac *= 10.0L; + int d = static_cast(frac); + if (d > 9) + d = 9; + digits[i] = d; + frac -= static_cast(d); + } + + frac *= 10.0L; + int round_digit = static_cast(frac); + if (round_digit > 9) + round_digit = 9; + long double remainder = frac - static_cast(round_digit); + bool round_up = false; + if (round_digit > 5) + round_up = true; + else if (round_digit == 5) + { + if (remainder > 0.0L) + round_up = true; + else + round_up = (digits[precision] % 2) != 0; + } + + if (round_up) + { + int i = precision; + for (; i >= 0; --i) + { + if (digits[i] < 9) + { + digits[i]++; + break; + } + digits[i] = 0; + } + if (i < 0) + { + digits[0] = 1; + for (int j = 1; j <= precision; ++j) + digits[j] = 0; + ++exp10; + } + } + + o.append(static_cast('0' + digits[0])); + o.append('.'); + for (int i = 1; i <= precision; ++i) + o.append(static_cast('0' + digits[i])); + o.append('e'); + if (exp10 < 0) + { + o.append('-'); + exp10 = -exp10; + } + else + { + o.append('+'); + } + const int exp_width = (exp10 >= 100) ? 3 : 2; + append_uint_padded(o, static_cast(exp10), exp_width); +} + +template +constexpr void put(Out& o, const T& v) +{ + using U = std::remove_cvref_t; + + if constexpr (std::is_same_v) + { + o.append(v ? '1' : '0'); + } + else if constexpr (std::is_integral_v) + { + using UU = std::make_unsigned_t; + UU x{}; + + if constexpr (std::is_signed_v) + { + if (v < 0) + { + o.append('-'); + x = UU(-(v + 1)) + 1; + } + else + { + x = UU(v); + } + } + else + { + x = UU(v); + } + + char tmp[3 + sizeof(U) * 8]; + size_t k = 0; + do { + tmp[k++] = char('0' + (x % 10)); + x /= 10; + } while (x); + while (k) + o.append(tmp[--k]); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + append_float_scientific(o, v); + } + else if constexpr (std::is_floating_point_v) + { + static_assert(!sizeof(U), "Unsupported %s argument type"); + } + else if constexpr (std::is_convertible_v) + { + o.append(std::string_view(v)); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + o.append((const char*)v); + } + else + { + static_assert(!sizeof(U), "Unsupported %s argument type"); + } +} + +template +constexpr void append_printf_s(Out& out, const Args&... args) +{ + auto tup = std::forward_as_tuple(args...); + size_t ai = 0; + + for (size_t i = 0; Fmt.value[i]; ++i) + { + if (Fmt.value[i] != '%') + { + out.append(Fmt.value[i]); + continue; + } + + char c = Fmt.value[++i]; + if (c == '%') + { + out.append('%'); + continue; + } + if (c == 's') + { + std::apply([&](auto const&... xs) { + size_t k = 0; + (((k++ == ai) ? (put(out, xs), 0) : 0), ...); + }, tup); + ++ai; + } + } +} + +} + #endif // _NBL_CORE_STRING_LITERAL_H_INCLUDED_ diff --git a/include/nbl/system/CFileArchive.h b/include/nbl/system/CFileArchive.h index 818bd8f6ba..35cb2e9413 100644 --- a/include/nbl/system/CFileArchive.h +++ b/include/nbl/system/CFileArchive.h @@ -10,6 +10,8 @@ #include "nbl/system/CFileView.h" #include "nbl/system/IFileViewAllocator.h" +#include + #ifdef _NBL_PLATFORM_ANDROID_ #include "nbl/system/CFileViewAPKAllocator.h" #endif @@ -22,13 +24,21 @@ template class CInnerArchiveFile : public CFileView { std::atomic_flag* alive; + std::optional m_precomputedHash; public: template - CInnerArchiveFile(std::atomic_flag* _flag, Args&&... args) : CFileView(std::forward(args)...), alive(_flag) + CInnerArchiveFile(std::atomic_flag* _flag, std::optional precomputedHash, Args&&... args) + : CFileView(std::forward(args)...), alive(_flag), m_precomputedHash(std::move(precomputedHash)) { } ~CInnerArchiveFile() = default; + // Non-empty return means the file came from an archive that embeds a precomputed hash. + std::optional getPrecomputedHash() const override + { + return m_precomputedHash; + } + static void* operator new(size_t size) noexcept { assert(false); @@ -144,6 +154,7 @@ class CFileArchive : public IFileArchive // coast is clear, do placement new new (file, &m_fileFlags[found->ID]) CInnerArchiveFile( m_fileFlags+found->ID, + std::move(fileBuffer.precomputedHash), getDefaultAbsolutePath()/found->pathRelativeToArchive, flags, fileBuffer.initialModified, @@ -162,6 +173,7 @@ class CFileArchive : public IFileArchive void* buffer; size_t size; void* allocatorState; + std::optional precomputedHash = {}; // TODO: Implement this !!! IFileBase::time_point_t initialModified = std::chrono::utc_clock::now(); }; diff --git a/include/nbl/system/IFileBase.h b/include/nbl/system/IFileBase.h index c9ceb13a04..ae336e24cd 100644 --- a/include/nbl/system/IFileBase.h +++ b/include/nbl/system/IFileBase.h @@ -41,6 +41,7 @@ class IFileBase : public core::IReferenceCounted //! Optional, if not present this means that the hash was not already precomputed for you. // Equivalent to calling `xxHash256(getMappedPointer(),getSize(),&retval.x)` // Only really available for built-in resources or some other files that had to be read in their entirety at some point. + // Non-empty return means the file comes from an archive that embeds a precomputed hash. virtual inline std::optional getPrecomputedHash() const {return {};} //! diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 512633536f..18561015c2 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -395,12 +395,21 @@ nbl_adjust_definitions() option(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION "Enable to optimise CWaveStringResolver.cpp in Debug configuration, uses RWDI compile options for the TU" ON) if(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION) - set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES - # just enabling inlining and optimisations will help a lot - COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS}>" - # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours - SKIP_PRECOMPILE_HEADERS ON - ) + if(MSVC) + set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES + # just enabling inlining and optimisations will help a lot + COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS};/Zi>" + # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours + SKIP_PRECOMPILE_HEADERS ON + ) + else() + set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES + # just enabling inlining and optimisations will help a lot + COMPILE_OPTIONS "$<$:${NBL_CXX_RELWITHDEBINFO_COMPILE_OPTIONS}>" + # trade is you cannot mix with PCH + set this property per config (it seems), different compile options would lead to corrruptions and undefined behaviours + SKIP_PRECOMPILE_HEADERS ON + ) + endif() endif() if(NBL_EXPLICIT_MODULE_LOAD_LOG) @@ -868,4 +877,4 @@ source_group(TREE "${NBL_ROOT_PATH}" source_group(TREE "${NBL_ROOT_PATH}" PREFIX "Source Files" FILES ${NABLA_SOURCE_FILES} -) \ No newline at end of file +) diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index a593a11597..7f9763f5c4 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -136,6 +136,9 @@ CGLSLCompiler::CGLSLCompiler(core::smart_refctd_ptr&& system) std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); + if (!preprocessOptions.extraDefines.empty()) { std::ostringstream insertion; diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 1020fa9446..1243abff53 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -16,17 +16,43 @@ #include #include #include +#include +#include #include #include #include #include -#include -#include using namespace nbl; using namespace nbl::asset; using Microsoft::WRL::ComPtr; +static std::string buildMacroBlock(const std::vector& macros) +{ + if (macros.empty()) + return {}; + size_t reserve = 0; + for (const auto& macro : macros) + reserve += macro.size() + 12; + std::string out; + out.reserve(reserve); + for (const auto& macro : macros) + { + const size_t eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) + { + out.push_back(' '); + out.append(def); + } + out.push_back('\n'); + } + return out; +} + static constexpr const wchar_t* SHADER_MODEL_PROFILE = L"XX_6_8"; static const wchar_t* ShaderStageToString(asset::IShader::E_SHADER_STAGE stage) { switch (stage) @@ -363,39 +389,74 @@ namespace nbl::wave extern nbl::core::string preprocess(std::string& code, const IShaderCompiler::SPreprocessorOptions& preprocessOptions, bool withCaching, std::function post); } -std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies) const +std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies, std::vector* macro_defs) const { - const bool depfileEnabled = preprocessOptions.depfile; - if (depfileEnabled) + using clock_t = std::chrono::high_resolution_clock; + const auto preprocessStart = clock_t::now(); + auto forceIncludesStart = preprocessStart; + auto forceIncludesEnd = preprocessStart; + auto pragmaStart = preprocessStart; + auto pragmaEnd = preprocessStart; + auto waveStart = preprocessStart; + auto waveEnd = preprocessStart; + + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) { - if (preprocessOptions.depfilePath.empty()) - { - preprocessOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return {}; - } + forceIncludesStart = clock_t::now(); + code = IShaderCompiler::applyForceIncludes(code, preprocessOptions.forceIncludes); + forceIncludesEnd = clock_t::now(); } std::vector localDependencies; auto* dependenciesOut = dependencies; - if (depfileEnabled && !dependenciesOut) + if (!dependenciesOut) dependenciesOut = &localDependencies; - // HACK: we do a pre-pre-process here to add \n after every #pragma to neutralize boost::wave's actions - // See https://github.com/Devsh-Graphics-Programming/Nabla/issues/746 - size_t line_index = 0; - for (size_t i = 0; i < code.size(); i++) { - if (code[i] == '\n') { - auto line = code.substr(line_index, i - line_index); - boost::trim(line); - if (boost::starts_with(line, "#pragma")) - code.insert(i++, 1, '\n'); - line_index = i; + if (code.find("#pragma") != std::string::npos) + { + pragmaStart = clock_t::now(); + size_t extra_newlines = 0; + size_t line_start = 0; + for (size_t i = 0; i < code.size(); ++i) + { + if (code[i] != '\n') + continue; + size_t j = line_start; + while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) + ++j; + if (j + 7 <= i && code.compare(j, 7, "#pragma") == 0) + ++extra_newlines; + line_start = i + 1; + } + if (extra_newlines) + { + std::string patched; + patched.reserve(code.size() + extra_newlines); + line_start = 0; + for (size_t i = 0; i < code.size(); ++i) + { + if (code[i] != '\n') + continue; + size_t j = line_start; + while (j < i && (code[j] == ' ' || code[j] == '\t' || code[j] == '\r')) + ++j; + const bool is_pragma = (j + 7 <= i) && (code.compare(j, 7, "#pragma") == 0); + patched.append(code, line_start, i - line_start + 1); + if (is_pragma) + patched.push_back('\n'); + line_start = i + 1; + } + if (line_start < code.size()) + patched.append(code, line_start, code.size() - line_start); + code = std::move(patched); } + pragmaEnd = clock_t::now(); } // preprocess + waveStart = clock_t::now(); core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependenciesOut), - [&dxc_compile_flags_override, &stage, &dependenciesOut](nbl::wave::context& context) -> void + [&dxc_compile_flags_override, &stage, &dependenciesOut, macro_defs](nbl::wave::context& context) -> void { if (context.get_hooks().m_dxc_compile_flags_override.size() != 0) dxc_compile_flags_override = context.get_hooks().m_dxc_compile_flags_override; @@ -405,9 +466,41 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE stage = context.get_hooks().m_pragmaStage; if (dependenciesOut) + { *dependenciesOut = std::move(context.get_dependencies()); + if (!dependenciesOut->empty()) + { + std::unordered_set seen; + seen.reserve(dependenciesOut->size()); + std::vector unique; + unique.reserve(dependenciesOut->size()); + for (auto& dep : *dependenciesOut) + { + std::string key; + if (!dep.getAbsolutePath().empty()) + { + key = dep.getAbsolutePath().string(); + } + else + { + key.reserve(dep.getRequestingSourceDir().string().size() + dep.getIdentifier().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + } + if (seen.insert(key).second) + unique.emplace_back(std::move(dep)); + } + *dependenciesOut = std::move(unique); + } + } + if (macro_defs) + context.dump_macro_definitions(*macro_defs); } ); + waveEnd = clock_t::now(); // for debugging cause MSVC doesn't like to show more than 21k LoC in TextVisualizer if constexpr (false) @@ -425,18 +518,11 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (resolvedString.empty()) return resolvedString; - if (depfileEnabled) - { - IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); - params.depfilePath = depfilePathString; - params.sourceIdentifier = preprocessOptions.sourceIdentifier; - if (!params.sourceIdentifier.empty()) - params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); - params.system = m_system.get(); - if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) - return {}; - } + preprocessOptions.logger.log("Preprocess breakdown: force_includes=%lld ms, pragma_pass=%lld ms, wave_total=%lld ms, total=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(forceIncludesEnd - forceIncludesStart).count()), + static_cast(std::chrono::duration_cast(pragmaEnd - pragmaStart).count()), + static_cast(std::chrono::duration_cast(waveEnd - waveStart).count()), + static_cast(std::chrono::duration_cast(waveEnd - preprocessStart).count())); return resolvedString; } @@ -444,7 +530,25 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { std::vector extra_dxc_compile_flags = {}; - return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags, dependencies); + return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags, dependencies, nullptr); +} + +bool CHLSLCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const +{ + outEntry = {}; + std::vector deps; + std::vector dxcFlags; + std::vector macroDefs; + auto text = preprocessShader(std::string(code), stage, preprocessOptions, dxcFlags, &deps, ¯oDefs); + if (text.empty()) + return false; + outEntry.preprocessedPrefix = std::move(text); + outEntry.dependencies = std::move(deps); + outEntry.dxcFlags = std::move(dxcFlags); + outEntry.macroDefs = std::move(macroDefs); + outEntry.macroBlock = buildMacroBlock(outEntry.macroDefs); + outEntry.pragmaStage = static_cast(stage); + return true; } core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const @@ -459,7 +563,21 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st std::vector dxc_compile_flags = {}; IShader::E_SHADER_STAGE stage = options.stage; - auto newCode = preprocessShader(std::string(code), stage, hlslOptions.preprocessorOptions, dxc_compile_flags, dependencies); + using clock_t = std::chrono::high_resolution_clock; + const auto preprocessStart = clock_t::now(); + std::string newCode; + if (hlslOptions.assumePreprocessed) + { + newCode = std::string(code); + if (!hlslOptions.dxcCompileFlagsOverride.empty()) + dxc_compile_flags.assign(hlslOptions.dxcCompileFlagsOverride.begin(), hlslOptions.dxcCompileFlagsOverride.end()); + } + else + { + newCode = preprocessShader(std::string(code), stage, hlslOptions.preprocessorOptions, dxc_compile_flags, dependencies); + } + const auto preprocessEnd = clock_t::now(); + logger.log("Preprocess took: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(preprocessEnd - preprocessStart).count())); if (newCode.empty()) return nullptr; // Suffix is the shader model version @@ -543,6 +661,7 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st for (size_t i = 0; i < argc; i++) argsArray[i] = arguments[i].c_str(); + const auto compileStart = clock_t::now(); auto compileResult = dxcCompile( this, m_dxcCompilerTypes, @@ -551,6 +670,9 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st argc, hlslOptions ); + const auto compileEnd = clock_t::now(); + logger.log("Compile took: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(compileEnd - compileStart).count())); + logger.log("Total build time: %lld ms.", system::ILogger::ELL_PERFORMANCE, static_cast(std::chrono::duration_cast(compileEnd - preprocessStart).count())); if (argsArray) delete[] argsArray; diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index a2165972e5..95a7e5bbd7 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -43,46 +43,86 @@ using namespace nbl; using namespace nbl::asset; #include "nbl/asset/utils/waveContext.h" +#include namespace nbl::wave { std::string preprocess(std::string& code, const nbl::asset::IShaderCompiler::SPreprocessorOptions& preprocessOptions, bool withCaching, std::function post) { + using clock_t = std::chrono::high_resolution_clock; + const auto setupStart = clock_t::now(); nbl::wave::context context(code.begin(), code.end(), preprocessOptions.sourceIdentifier.data(), { preprocessOptions }); + const auto contextEnd = clock_t::now(); + context.set_caching(withCaching); context.add_macro_definition("__HLSL_VERSION"); context.add_macro_definition("__SPIRV_MAJOR_VERSION__=" + std::to_string(IShaderCompiler::getSpirvMajor(preprocessOptions.targetSpirvVersion))); context.add_macro_definition("__SPIRV_MINOR_VERSION__=" + std::to_string(IShaderCompiler::getSpirvMinor(preprocessOptions.targetSpirvVersion))); + const auto builtinsEnd = clock_t::now(); - // instead of defining extraDefines as "NBL_GLSL_LIMIT_MAX_IMAGE_DIMENSION_1D 32768", - // now define them as "NBL_GLSL_LIMIT_MAX_IMAGE_DIMENSION_1D=32768" - // to match boost wave syntax - // https://www.boost.org/doc/libs/1_82_0/libs/wave/doc/class_reference_context.html#:~:text=Maintain%20defined%20macros-,add_macro_definition,-bool%20add_macro_definition + const auto extraStart = builtinsEnd; for (const auto& define : preprocessOptions.extraDefines) - context.add_macro_definition(define.identifier.data() + core::string("=") + define.definition.data()); + { + core::string macro; + macro.reserve(define.identifier.size() + define.definition.size() + 1); + macro.append(define.identifier.data(), define.identifier.size()); + macro.push_back('='); + macro.append(define.definition.data(), define.definition.size()); + context.add_macro_definition(macro); + } + const auto extraEnd = clock_t::now(); // preprocess core::string resolvedString; + const auto setupEnd = extraEnd; + auto lexStart = setupEnd; + auto lexEnd = setupEnd; try { - auto stream = std::stringstream(); - for (auto i= context.begin(); i!= context.end(); i++) - stream << i->get_value(); - resolvedString = stream.str(); + const size_t reserve = code.size() + (code.size() / 2); + resolvedString.reserve(reserve); + lexStart = clock_t::now(); + for (auto i = context.begin(); i != context.end(); ++i) + { + const auto& value = i->get_value(); + resolvedString.append(value.c_str(), value.size()); + } + lexEnd = clock_t::now(); + } + catch (const boost::wave::cpp_exception& e) + { + preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]", system::ILogger::ELL_ERROR, e.what(), e.description(), e.file_name(), e.line_no(), e.column_no()); + return {}; } - catch (boost::wave::preprocess_exception& e) + catch (const boost::wave::cpplexer::lexing_exception& e) { - preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]",system::ILogger::ELL_ERROR,e.what(),e.description(),e.file_name(),e.line_no(),e.column_no()); + preprocessOptions.logger.log("%s exception caught. %s [%s:%d:%d]", system::ILogger::ELL_ERROR, e.what(), e.description(), e.file_name(), e.line_no(), e.column_no()); + return {}; + } + catch (const std::exception& e) + { + preprocessOptions.logger.log("Exception caught. %s", system::ILogger::ELL_ERROR, e.what()); return {}; } catch (...) { - preprocessOptions.logger.log("Unknown exception caught!",system::ILogger::ELL_ERROR); + preprocessOptions.logger.log("Unknown exception caught!", system::ILogger::ELL_ERROR); return {}; } + const auto postStart = clock_t::now(); post(context); + const auto postEnd = clock_t::now(); + + preprocessOptions.logger.log("Wave setup breakdown: context=%lld ms, builtins=%lld ms, extra_defines=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(contextEnd - setupStart).count()), + static_cast(std::chrono::duration_cast(builtinsEnd - contextEnd).count()), + static_cast(std::chrono::duration_cast(extraEnd - builtinsEnd).count())); + preprocessOptions.logger.log("Wave timings: setup=%lld ms, lex=%lld ms, post=%lld ms.", system::ILogger::ELL_PERFORMANCE, + static_cast(std::chrono::duration_cast(setupEnd - setupStart).count()), + static_cast(std::chrono::duration_cast(lexEnd - lexStart).count()), + static_cast(std::chrono::duration_cast(postEnd - postStart).count())); return resolvedString; } -} \ No newline at end of file +} diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index a6cd95b441..c6fc9d7946 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -4,284 +4,508 @@ #include "nbl/asset/utils/IShaderCompiler.h" #include "nbl/asset/utils/shadercUtils.h" #include "nbl/asset/utils/shaderCompiler_serialization.h" - +#include "nbl/core/hash/blake.h" +#include "nbl/core/hash/xxHash256.h" #include #include #include #include +#include #include +#include +#include +#include +#include +#include +#include +#include + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/builtin/CArchive.h" +#include "spirv/builtin/CArchive.h" +#include "boost/builtin/CArchive.h" +#include "nbl/devicegen/builtin/CArchive.h" +#endif + +#ifdef _WIN32 +#include +#endif #include #include -using namespace nbl; -using namespace nbl::asset; +namespace +{ +struct FileInfoCacheEntry +{ + uint64_t size = 0; + int64_t ticks = 0; + bool ok = false; +}; -IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& system) - : m_system(std::move(system)) +std::unordered_map g_fileInfoCache; +std::mutex g_fileInfoCacheMutex; + +struct IncludeCacheEntry { - m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); + uint64_t size = 0; + int64_t ticks = 0; + nbl::core::blake3_hash_t hash = {}; + std::string contents; +}; + +std::unordered_map g_includeCache; +std::mutex g_includeCacheMutex; +std::unordered_map g_canonicalCache; +std::mutex g_canonicalCacheMutex; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +struct BuiltinIncludeCacheEntry +{ + nbl::asset::IShaderCompiler::IIncludeLoader::found_t value; +}; + +std::unordered_map g_builtinIncludeCache; +std::mutex g_builtinIncludeCacheMutex; + +inline bool tryGetBuiltinResource(const std::string& normalized, const nbl::system::SBuiltinFile*& outFile, std::string& outRel, std::string_view& outPrefix) +{ + auto tryNamespace = [&](std::string_view prefix, const nbl::system::SBuiltinFile& (*getResource)(const std::string&)) -> bool + { + if (normalized.rfind(prefix, 0) != 0) + return false; + std::string rel = normalized.substr(prefix.size()); + if (!rel.empty() && (rel.front() == '/' || rel.front() == '\\')) + rel.erase(rel.begin()); + const auto& resource = getResource(rel); + if (!resource.contents || resource.size == 0) + return false; + outFile = &resource; + outRel = std::move(rel); + outPrefix = prefix; + return true; + }; + + if (tryNamespace(nbl::builtin::pathPrefix, nbl::builtin::get_resource_runtime)) + return true; + if (tryNamespace(spirv::builtin::pathPrefix, spirv::builtin::get_resource_runtime)) + return true; + if (tryNamespace(boost::builtin::pathPrefix, boost::builtin::get_resource_runtime)) + return true; + if (tryNamespace(nbl::devicegen::builtin::pathPrefix, nbl::devicegen::builtin::get_resource_runtime)) + return true; + + return false; } -bool IShaderCompiler::writeDepfile( - const DepfileWriteParams& params, - const CCache::SEntry::dependency_container_t& dependencies, - const CIncludeFinder* includeFinder, - system::logger_opt_ptr logger) +inline bool tryGetBuiltinResourceHash(const nbl::system::path& path, nbl::core::blake3_hash_t& outHash) { - std::string depfilePathString; - if (!params.depfilePath.empty()) - depfilePathString = std::string(params.depfilePath); - else - depfilePathString = std::string(params.outputPath) + ".d"; + if (path.empty()) + return false; + const std::string normalized = path.generic_string(); + const nbl::system::SBuiltinFile* resource = nullptr; + std::string rel; + std::string_view prefix; + if (!tryGetBuiltinResource(normalized, resource, rel, prefix)) + return false; + std::memcpy(outHash.data, resource->xx256Hash.data(), sizeof(outHash.data)); + return true; +} - if (depfilePathString.empty()) - { - logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return false; - } +inline bool matchBuiltinResourceHash(const nbl::system::path& path, const nbl::core::blake3_hash_t& expected) +{ + nbl::core::blake3_hash_t hash = {}; + if (!tryGetBuiltinResourceHash(path, hash)) + return false; + return hash == expected; +} - const auto parentDirectory = std::filesystem::path(depfilePathString).parent_path(); - if (!parentDirectory.empty() && !std::filesystem::exists(parentDirectory)) - { - if (!std::filesystem::create_directories(parentDirectory)) - { - logger.log("Failed to create parent directory for depfile.", system::ILogger::ELL_ERROR); - return false; - } - } +class CBuiltinArchiveIncludeLoader final : public nbl::asset::IShaderCompiler::IIncludeLoader +{ + public: + using IIncludeLoader = nbl::asset::IShaderCompiler::IIncludeLoader; + + IIncludeLoader::found_t getInclude(const nbl::system::path& searchPath, const std::string& includeName) const override + { + std::string normalized = nbl::system::path(includeName).generic_string(); + if (!searchPath.empty()) + { + const std::string search = nbl::system::path(searchPath).generic_string(); + if (normalized.rfind(search, 0) != 0) + normalized = (nbl::system::path(search) / includeName).generic_string(); + } - std::vector depPaths; - depPaths.reserve(dependencies.size() + 1); + { + std::lock_guard lock(g_builtinIncludeCacheMutex); + const auto it = g_builtinIncludeCache.find(normalized); + if (it != g_builtinIncludeCache.end()) + return it->second.value; + } - auto addDepPath = [&depPaths, ¶ms](std::filesystem::path path) - { - if (path.empty()) - return; - if (path.is_relative()) - { - if (params.workingDirectory.empty()) - return; - path = std::filesystem::path(params.workingDirectory) / path; - } - std::error_code ec; - std::filesystem::path normalized = std::filesystem::weakly_canonical(path, ec); - if (ec) - { - normalized = std::filesystem::absolute(path, ec); - if (ec) - return; - } - if (normalized.empty() || !std::filesystem::exists(normalized)) - return; - auto normalizedString = normalized.generic_string(); - if (normalizedString.find_first_of("\r\n") != std::string::npos) - return; - depPaths.emplace_back(std::move(normalizedString)); - }; - - if (!params.sourceIdentifier.empty()) - { - std::filesystem::path rootPath{std::string(params.sourceIdentifier)}; - if (rootPath.is_relative()) - { - if (!params.workingDirectory.empty()) - rootPath = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / rootPath); - else - rootPath = std::filesystem::absolute(rootPath); - } - addDepPath(rootPath); - } + const nbl::system::SBuiltinFile* resource = nullptr; + std::string rel; + std::string_view prefix; + if (!tryGetBuiltinResource(normalized, resource, rel, prefix)) + return {}; + + IIncludeLoader::found_t ret = {}; + ret.absolutePath = nbl::system::path(std::string(prefix)) / rel; + ret.contents.assign(reinterpret_cast(resource->contents), resource->size); + if (!ret.contents.empty() && ret.contents.back() != '\n' && ret.contents.back() != '\r') + ret.contents.push_back('\n'); + std::memcpy(ret.hash.data, resource->xx256Hash.data(), sizeof(ret.hash.data)); + ret.hasHash = true; + ret.fileSize = resource->size; + ret.hasFileInfo = false; + { + std::lock_guard lock(g_builtinIncludeCacheMutex); + g_builtinIncludeCache.emplace(normalized, BuiltinIncludeCacheEntry{ ret }); + } + return ret; + } +}; +#endif - for (const auto& dep : dependencies) - { - if (includeFinder) - { - IShaderCompiler::IIncludeLoader::found_t header = dep.isStandardInclude() ? - includeFinder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())) : - includeFinder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); +inline bool getFileInfoFast(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut) +{ +#ifdef _WIN32 + WIN32_FILE_ATTRIBUTE_DATA data = {}; + if (!GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &data)) + return false; + ULARGE_INTEGER size = {}; + size.HighPart = data.nFileSizeHigh; + size.LowPart = data.nFileSizeLow; + ULARGE_INTEGER time = {}; + time.HighPart = data.ftLastWriteTime.dwHighDateTime; + time.LowPart = data.ftLastWriteTime.dwLowDateTime; + sizeOut = size.QuadPart; + using file_clock = std::chrono::file_clock; + const auto duration = file_clock::duration{ static_cast(time.QuadPart) }; + const auto fileTp = std::chrono::time_point{ duration }; + const auto utcTp = std::chrono::clock_cast(fileTp); + timeOut = utcTp.time_since_epoch().count(); + return true; +#else + std::error_code ec; + std::filesystem::directory_entry entry(path, ec); + if (ec) + return false; + const auto time = entry.last_write_time(ec); + if (ec) + return false; + const auto size = entry.file_size(ec); + if (ec) + return false; + sizeOut = size; + const auto utcTp = std::chrono::clock_cast(time); + timeOut = utcTp.time_since_epoch().count(); + return true; +#endif +} - if (!header) - continue; - addDepPath(header.absolutePath); - } - else - { - std::filesystem::path candidate = dep.isStandardInclude() ? std::filesystem::path(std::string(dep.getIdentifier())) : (dep.getRequestingSourceDir() / std::string(dep.getIdentifier())); - if (candidate.is_relative()) - { - if (!params.workingDirectory.empty()) - candidate = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / candidate); - else - candidate = std::filesystem::absolute(candidate); - } - addDepPath(candidate); - } - } +inline bool getFileInfoFast(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut, nbl::system::ISystem* system) +{ + if (getFileInfoFast(path, sizeOut, timeOut)) + return true; + if (!system || path.empty()) + return false; + + nbl::system::ISystem::future_t> future; + system->createFile(future, path, nbl::system::IFile::ECF_READ); + if (!future.wait()) + return false; + nbl::core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + sizeOut = file->getSize(); + timeOut = file->getLastWriteTime().time_since_epoch().count(); + return true; +} + +inline bool getFileInfoCached(const nbl::system::path& path, uint64_t& sizeOut, int64_t& timeOut, nbl::system::ISystem* system) +{ + if (path.empty()) + return false; + + { + std::lock_guard lock(g_fileInfoCacheMutex); + const auto it = g_fileInfoCache.find(path); + if (it != g_fileInfoCache.end()) + { + if (!it->second.ok) + return false; + sizeOut = it->second.size; + timeOut = it->second.ticks; + return true; + } + } + + uint64_t size = 0; + int64_t ticks = 0; + const bool ok = getFileInfoFast(path, size, ticks, system); + { + std::lock_guard lock(g_fileInfoCacheMutex); + g_fileInfoCache.emplace(path, FileInfoCacheEntry{ size, ticks, ok }); + } + if (!ok) + return false; + sizeOut = size; + timeOut = ticks; + return true; +} + +template +inline void collectFileInfoMismatchesParallel(const DepContainer& deps, std::vector& out, nbl::system::ISystem* system) +{ + const size_t count = deps.size(); + if (!count) + return; + + std::vector fileInfoIndices; + fileInfoIndices.reserve(count); + std::unordered_map seenPaths; + seenPaths.reserve(count); + + for (size_t i = 0; i < count; ++i) + { + const auto& dep = deps[i]; + const auto& path = dep.getAbsolutePath(); + const bool hasAbsolutePath = !path.empty() && path.is_absolute(); + const bool hasFileInfo = dep.getHasFileInfo() && hasAbsolutePath; + if (!hasFileInfo) + { +#ifdef NBL_EMBED_BUILTIN_RESOURCES + if (!path.empty()) + { + if (matchBuiltinResourceHash(path, dep.getHash())) + continue; + } + else + { + const nbl::system::path logicalPath(dep.getIdentifier()); + if (matchBuiltinResourceHash(logicalPath, dep.getHash())) + continue; + } +#endif + out.push_back(i); + continue; + } + if (seenPaths.emplace(path, true).second) + fileInfoIndices.push_back(i); + } + + const size_t fileCount = fileInfoIndices.size(); + if (!fileCount) + return; + + unsigned threads = std::thread::hardware_concurrency(); + if (!threads) + threads = 1u; + if (threads > 32u) + threads = 32u; + if (threads > fileCount) + threads = static_cast(fileCount); + + if (threads <= 1u || fileCount < 64u) + { + for (size_t k = 0; k < fileCount; ++k) + { + const size_t i = fileInfoIndices[k]; + const auto& dep = deps[i]; + const auto& path = dep.getAbsolutePath(); + uint64_t size = 0; + int64_t ticks = 0; + if (path.empty() || !getFileInfoFast(path, size, ticks) || dep.getLastWriteTime() != ticks || dep.getFileSize() != size) + out.push_back(i); + } + return; + } + + std::vector> perThread(threads); + const size_t chunk = (fileCount + threads - 1u) / threads; + std::vector workers; + workers.reserve(threads); + for (unsigned t = 0; t < threads; ++t) + { + const size_t start = t * chunk; + if (start >= fileCount) + break; + const size_t end = std::min(start + chunk, fileCount); + workers.emplace_back([&deps, &perThread, &fileInfoIndices, t, start, end, system]() + { + auto& local = perThread[t]; + for (size_t k = start; k < end; ++k) + { + const size_t i = fileInfoIndices[k]; + const auto& dep = deps[i]; + const auto& path = dep.getAbsolutePath(); + uint64_t size = 0; + int64_t ticks = 0; + if (path.empty() || !getFileInfoFast(path, size, ticks) || dep.getLastWriteTime() != ticks || dep.getFileSize() != size) + local.push_back(i); + } + }); + } + for (auto& worker : workers) + worker.join(); + for (auto& local : perThread) + out.insert(out.end(), local.begin(), local.end()); +} +} - std::sort(depPaths.begin(), depPaths.end()); - depPaths.erase(std::unique(depPaths.begin(), depPaths.end()), depPaths.end()); +using namespace nbl; +using namespace nbl::asset; - auto escapeDepPath = [](const std::string& path) -> std::string +namespace +{ + std::string buildMacroBlock(const std::vector& macros) { - std::string normalized = path; - std::replace(normalized.begin(), normalized.end(), '\\', '/'); + if (macros.empty()) + return {}; + size_t reserve = 0; + for (const auto& macro : macros) + reserve += macro.size() + 12; std::string out; - out.reserve(normalized.size()); - for (const char c : normalized) + out.reserve(reserve); + for (const auto& macro : macros) { - if (c == ' ' || c == '#') - out.push_back('\\'); - if (c == '$') + const size_t eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) { - out.push_back('$'); - out.push_back('$'); - continue; + out.push_back(' '); + out.append(def); } - out.push_back(c); + out.push_back('\n'); } return out; - }; - - if (!params.system) - { - logger.log("Depfile system is null.", system::ILogger::ELL_ERROR); - return false; } - const auto depfilePath = std::filesystem::path(depfilePathString); - auto tempPath = depfilePath; - tempPath += ".tmp"; - params.system->deleteFile(tempPath); - - core::smart_refctd_ptr depfile; - { - system::ISystem::future_t> future; - params.system->createFile(future, tempPath, system::IFileBase::ECF_WRITE); - if (!future.wait()) - { - logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - future.acquire().move_into(depfile); - } - if (!depfile) + void splitPrefix(std::string_view code, std::string_view& prefix, std::string_view& body) { - logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } + size_t pos = 0; + size_t prefixEnd = 0; + bool inContinuation = false; + bool inBlockComment = false; - std::string targetPathString; - if (params.outputPath.empty()) - { - std::filesystem::path targetPath = depfilePathString; - if (targetPath.extension() == ".d") - targetPath.replace_extension(); - targetPathString = targetPath.generic_string(); - } - else - { - targetPathString = std::string(params.outputPath); - } - if (targetPathString.empty()) - { - logger.log("Depfile target path is empty.", system::ILogger::ELL_ERROR); - return false; - } - const std::string target = escapeDepPath(std::filesystem::path(targetPathString).generic_string()); - std::vector escapedDeps; - escapedDeps.reserve(depPaths.size()); - for (const auto& depPath : depPaths) - escapedDeps.emplace_back(escapeDepPath(depPath)); - - std::string depfileContents; - depfileContents.append(target); - depfileContents.append(":"); - if (!escapedDeps.empty()) - { - depfileContents.append(" \\\n"); - for (size_t index = 0; index < escapedDeps.size(); ++index) + while (pos < code.size()) { - depfileContents.append(" "); - depfileContents.append(escapedDeps[index]); - if (index + 1 < escapedDeps.size()) - depfileContents.append(" \\\n"); + const size_t lineStart = pos; + size_t lineEnd = code.find('\n', pos); + if (lineEnd == std::string_view::npos) + lineEnd = code.size(); + + std::string_view line = code.substr(lineStart, lineEnd - lineStart); + if (!line.empty() && line.back() == '\r') + line.remove_suffix(1); + + bool directiveLine = false; + if (inContinuation || inBlockComment) + { + directiveLine = true; + } + else + { + size_t i = 0; + if (line.size() >= 3 && static_cast(line[0]) == 0xEF && + static_cast(line[1]) == 0xBB && static_cast(line[2]) == 0xBF) + i = 3; + while (i < line.size() && (line[i] == ' ' || line[i] == '\t')) + ++i; + if (i == line.size()) + { + directiveLine = true; + } + else if (line[i] == '#') + { + directiveLine = true; + } + else if (line[i] == '/' && i + 1 < line.size() && line[i + 1] == '/') + { + directiveLine = true; + } + else if (line[i] == '/' && i + 1 < line.size() && line[i + 1] == '*') + { + directiveLine = true; + if (line.find("*/", i + 2) == std::string_view::npos) + inBlockComment = true; + } + } + + if (!directiveLine) + break; + + prefixEnd = lineEnd < code.size() ? lineEnd + 1 : lineEnd; + + if (inBlockComment && line.find("*/") != std::string_view::npos) + inBlockComment = false; + + bool continuation = false; + if (!line.empty()) + { + size_t j = line.size(); + while (j > 0 && (line[j - 1] == ' ' || line[j - 1] == '\t')) + --j; + if (j > 0 && line[j - 1] == '\\') + continuation = true; + } + inContinuation = continuation; + if (lineEnd == code.size()) + break; + pos = lineEnd + 1; } - } - depfileContents.append("\n"); - system::IFile::success_t success; - depfile->write(success, depfileContents.data(), 0, depfileContents.size()); - if (!success) - { - logger.log("Failed to write depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; + prefix = code.substr(0, prefixEnd); + body = code.substr(prefixEnd); } - depfile = nullptr; +} - params.system->deleteFile(depfilePath); - const std::error_code moveError = params.system->moveFileOrDirectory(tempPath, depfilePath); - if (moveError) - { - logger.log("Failed to replace depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); - return false; - } - return true; +IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& system) + : m_system(std::move(system)) +{ + m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); } core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const { - const bool depfileEnabled = options.preprocessorOptions.depfile; - const bool supportsDependencies = options.getCodeContentType() == IShader::E_CONTENT_TYPE::ECT_HLSL; - - auto writeDepfileFromDependencies = [&](const CCache::SEntry::dependency_container_t& dependencies) -> bool - { - if (!depfileEnabled) - return true; - - if (options.preprocessorOptions.depfilePath.empty()) - { - options.preprocessorOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); - return false; - } - - IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = options.preprocessorOptions.depfilePath.generic_string(); - params.depfilePath = depfilePathString; - params.sourceIdentifier = options.preprocessorOptions.sourceIdentifier; - if (!params.sourceIdentifier.empty()) - params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); - params.system = m_system.get(); - return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); - }; + const auto* dependencyOverrides = options.dependencyOverrides; + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; CCache::SEntry entry; if (options.readCache || options.writeCache) - entry = CCache::SEntry(code, options); + entry = CCache::SEntry(cacheCode, options); + + if (options.cacheHit) + *options.cacheHit = false; if (options.readCache) { - auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); + auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder, true, nullptr, options.preprocessorOptions.fastSafeValidation); if (found != options.readCache->m_container.end()) { + if (options.cacheHit) + *options.cacheHit = true; if (options.writeCache) { CCache::SEntry writeEntry = *found; options.writeCache->insert(std::move(writeEntry)); } auto shader = found->decompressShader(); - if (depfileEnabled && !writeDepfileFromDependencies(found->dependencies)) - return nullptr; return shader; } } - CCache::SEntry::dependency_container_t depfileDependencies; CCache::SEntry::dependency_container_t* dependenciesPtr = nullptr; - if (options.writeCache) - dependenciesPtr = &entry.dependencies; - else if (depfileEnabled && supportsDependencies) - dependenciesPtr = &depfileDependencies; + if (!dependencyOverrides) + { + if (options.writeCache) + dependenciesPtr = &entry.dependencies; + } auto retVal = compileToSPIRV_impl(code, options, dependenciesPtr); if (retVal) @@ -290,15 +514,17 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); } - if (retVal && depfileEnabled && supportsDependencies) + if (retVal && options.writeCache && dependencyOverrides) { - const auto* deps = options.writeCache ? &entry.dependencies : &depfileDependencies; - if (!writeDepfileFromDependencies(*deps)) - return nullptr; + entry.dependencies.clear(); + entry.dependencies.reserve(dependencyOverrides->size()); + for (const auto& dep : *dependencyOverrides) + entry.dependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); } if (options.writeCache) { + entry.compression = options.writeCache->getDefaultCompression(); if (entry.setContent(retVal->getContent())) options.writeCache->insert(std::move(entry)); } @@ -356,8 +582,46 @@ IShaderCompiler::CFileSystemIncludeLoader::CFileSystemIncludeLoader(core::smart_ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName) const -> found_t { system::path path = searchPath / includeName; - if (std::filesystem::exists(path)) - path = std::filesystem::canonical(path); + if (!path.empty()) + { + const auto rawPath = path; + { + std::lock_guard lock(g_canonicalCacheMutex); + const auto it = g_canonicalCache.find(rawPath); + if (it != g_canonicalCache.end()) + path = it->second; + } + if (path == rawPath && std::filesystem::exists(path)) + { + auto canonicalPath = std::filesystem::canonical(path); + { + std::lock_guard lock(g_canonicalCacheMutex); + g_canonicalCache.emplace(rawPath, canonicalPath); + } + path = std::move(canonicalPath); + } + } + + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + const bool infoOk = getFileInfoFast(path, fileSize, lastWriteTime, m_system.get()); + if (infoOk) + { + std::lock_guard lock(g_includeCacheMutex); + auto it = g_includeCache.find(path); + if (it != g_includeCache.end() && it->second.size == fileSize && it->second.ticks == lastWriteTime) + { + found_t ret = {}; + ret.absolutePath = path; + ret.contents = it->second.contents; + ret.hash = it->second.hash; + ret.hasHash = true; + ret.fileSize = fileSize; + ret.lastWriteTime = lastWriteTime; + ret.hasFileInfo = true; + return ret; + } + } core::smart_refctd_ptr f; { @@ -376,13 +640,56 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s f->read(succ, contents.data(), 0, size); const bool success = bool(succ); assert(success); + if (!contents.empty() && contents.back() != '\n' && contents.back() != '\r') + contents.push_back('\n'); - return { f->getFileName(),std::move(contents) }; + found_t ret = {}; + ret.absolutePath = path; + ret.contents = std::move(contents); + if (auto precomputed = f->getPrecomputedHash()) + { + static_assert(sizeof(ret.hash.data) == sizeof(*precomputed)); + std::memcpy(ret.hash.data, &(*precomputed), sizeof(ret.hash.data)); + ret.hasHash = true; + ret.hasFileInfo = false; + } + else + { + ret.fileSize = infoOk ? fileSize : size; + ret.lastWriteTime = infoOk ? lastWriteTime : f->getLastWriteTime().time_since_epoch().count(); + ret.hasFileInfo = true; + } + if (!ret.hasHash) + { + std::array hash = {}; + core::XXHash_256(ret.contents.data(), ret.contents.size(), hash.data()); + std::memcpy(ret.hash.data, hash.data(), sizeof(ret.hash.data)); + ret.hasHash = true; + } + if (infoOk) + { + IncludeCacheEntry entry = {}; + entry.size = fileSize; + entry.ticks = lastWriteTime; + entry.hash = ret.hash; + entry.contents = ret.contents; + std::lock_guard lock(g_includeCacheMutex); + g_includeCache[path] = std::move(entry); + } + return ret; } IShaderCompiler::CIncludeFinder::CIncludeFinder(core::smart_refctd_ptr&& system) - : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(std::move(system))) + : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(core::smart_refctd_ptr(system))) + , m_system(std::move(system)) { +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto builtinLoader = core::make_smart_refctd_ptr(); + addSearchPath(std::string(nbl::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(spirv::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(boost::builtin::pathPrefix), builtinLoader); + addSearchPath(std::string(nbl::devicegen::builtin::pathPrefix), builtinLoader); +#endif addSearchPath("", m_defaultFileSystemLoader); } @@ -399,10 +706,26 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req retVal = std::move(contents); else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName); + if (retVal.fileSize == 0 && !retVal.contents.empty()) + retVal.fileSize = retVal.contents.size(); + if (!retVal.hasFileInfo && !retVal.absolutePath.empty() && !retVal.hasHash) + { + std::error_code ec; + const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); + if (!ec) + { + retVal.lastWriteTime = fileTime.time_since_epoch().count(); + retVal.hasFileInfo = true; + } + } - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (!retVal.hasHash) + { + std::array hash = {}; + core::XXHash_256(retVal.contents.data(), retVal.contents.size(), hash.data()); + std::memcpy(retVal.hash.data, hash.data(), sizeof(retVal.hash.data)); + retVal.hasHash = true; + } return retVal; } @@ -416,9 +739,26 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req retVal = std::move(contents); else retVal = std::move(trySearchPaths(includeName)); - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (retVal.fileSize == 0 && !retVal.contents.empty()) + retVal.fileSize = retVal.contents.size(); + if (!retVal.hasFileInfo && !retVal.absolutePath.empty() && !retVal.hasHash) + { + std::error_code ec; + const auto fileTime = std::filesystem::last_write_time(retVal.absolutePath, ec); + if (!ec) + { + retVal.lastWriteTime = fileTime.time_since_epoch().count(); + retVal.hasFileInfo = true; + } + } + + if (!retVal.hasHash) + { + std::array hash = {}; + core::XXHash_256(retVal.contents.data(), retVal.contents.size(), hash.data()); + std::memcpy(retVal.hash.data, hash.data(), sizeof(retVal.hash.data)); + retVal.hasHash = true; + } return retVal; } @@ -426,7 +766,16 @@ void IShaderCompiler::CIncludeFinder::addSearchPath(const std::string& searchPat { if (!loader) return; - m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); + if (searchPath.empty()) + { + m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); + return; + } + const auto insertPos = std::find_if(m_loaders.begin(), m_loaders.end(), [](const LoaderSearchPath& entry) + { + return entry.searchPath.empty(); + }); + m_loaders.insert(insertPos, LoaderSearchPath{ loader, searchPath }); } void IShaderCompiler::CIncludeFinder::addGenerator(const core::smart_refctd_ptr& generatorToAdd) @@ -505,35 +854,178 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in return {}; } -core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool fastSafeValidation) const { - const auto found = find_impl(mainFile, finder); + const auto found = find_impl(mainFile, finder, true, nullptr, fastSafeValidation); if (found==m_container.end()) return nullptr; return found->decompressShader(); } -IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const +bool IShaderCompiler::CCache::contains(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool fastSafeValidation) const +{ + return find_impl(mainFile, finder, true, nullptr, fastSafeValidation) != m_container.end(); +} + +bool IShaderCompiler::CCache::findEntryForCode(std::string_view code, const SCompilerOptions& options, const IShaderCompiler::CIncludeFinder* finder, SEntry& outEntry, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const +{ + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + const CCache::SEntry entry(cacheCode, options); + const auto found = find_impl(entry, finder, validateDependencies, depsUpdated, fastSafeValidation); + if (found == m_container.end()) + return false; + outEntry = SEntry(*found); + return true; +} + +core::smart_refctd_ptr IShaderCompiler::CCache::decompressEntry(const SEntry& entry) const +{ + return entry.decompressShader(); +} + +IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder, bool validateDependencies, bool* depsUpdated, bool fastSafeValidation) const { auto found = m_container.find(mainFile); + if (found == m_container.end() || !validateDependencies) + return found; + if (depsUpdated) + *depsUpdated = false; + bool updated = false; + auto* system = finder ? finder->getSystem() : nullptr; // go through all dependencies if (found!=m_container.end()) { - for (const auto& dependency : found->dependencies) + std::vector mismatches; + mismatches.reserve(found->dependencies.size()); + collectFileInfoMismatchesParallel(found->dependencies, mismatches, system); + if (mismatches.empty()) + return found; + if (fastSafeValidation) + return m_container.end(); + if (!finder) + return m_container.end(); + + std::unordered_map fileStatus; + std::unordered_map logicalStatus; + fileStatus.reserve(mismatches.size()); + logicalStatus.reserve(mismatches.size()); + for (size_t idx : mismatches) { - IIncludeLoader::found_t header; - if (dependency.standardInclude) - header = finder->getIncludeStandard(dependency.requestingSourceDir, dependency.identifier); + const auto& dependency = found->dependencies[idx]; + auto makeLogicalKey = [&dependency]() + { + std::string key; + key.reserve(dependency.getIdentifier().size() + dependency.getRequestingSourceDir().string().size() + 4); + key.append(dependency.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dependency.getIdentifier()); + key.push_back('|'); + key.push_back(dependency.isStandardInclude() ? '1' : '0'); + return key; + }; + + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + { + if (auto it = fileStatus.find(dependency.getAbsolutePath()); it != fileStatus.end()) + { + if (!it->second) + return m_container.end(); + continue; + } + } else - header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier); + { + auto key = makeLogicalKey(); + if (auto it = logicalStatus.find(key); it != logicalStatus.end()) + { + if (!it->second) + return m_container.end(); + continue; + } + } - if (header.hash != dependency.hash) + bool valid = false; + bool precomputedChecked = false; + if (!dependency.getAbsolutePath().empty()) { - return m_container.end(); + if (auto* system = finder->getSystem()) + { + system::ISystem::future_t> future; + system->createFile(future, dependency.getAbsolutePath(), system::IFile::ECF_READ); + if (future.wait()) + { + core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (file) + { + if (auto precomputed = file->getPrecomputedHash()) + { + precomputedChecked = true; + core::blake3_hash_t hash = {}; + std::memcpy(hash.data, &(*precomputed), sizeof(hash.data)); + if (hash == dependency.getHash()) + valid = true; + else + { + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return m_container.end(); + } + } + } + } + } } + + if (!valid && !precomputedChecked) + { + IIncludeLoader::found_t header; + if (dependency.standardInclude) + header = finder->getIncludeStandard(dependency.requestingSourceDir, dependency.identifier); + else + header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier); + + if (header.hash != dependency.hash) + { + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return m_container.end(); + } + + valid = true; + if (header.hasFileInfo && dependency.getAbsolutePath().is_absolute()) + { + dependency.setFileInfo(header.fileSize, header.lastWriteTime, true); + updated = true; + } + } + + if (valid && dependency.getHasFileInfo() && dependency.getAbsolutePath().is_absolute()) + { + uint64_t size = 0; + int64_t ticks = 0; + if (getFileInfoCached(dependency.getAbsolutePath(), size, ticks, system) && + (dependency.getFileSize() != size || dependency.getLastWriteTime() != ticks)) + { + dependency.setFileInfo(size, ticks, true); + updated = true; + } + } + + if (dependency.getHasFileInfo() && !dependency.getAbsolutePath().empty()) + fileStatus.emplace(dependency.getAbsolutePath(), true); + else + logicalStatus.emplace(makeLogicalKey(), true); } } + if (depsUpdated) + *depsUpdated = updated; return found; } @@ -542,28 +1034,67 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const size_t shaderBufferSize = 0; core::vector offsets(m_container.size()); core::vector sizes(m_container.size()); - json entries; + json entries = json::array(); core::vector shaderCreationParams; + std::vector depsBuffer; + depsBuffer.reserve(m_container.size() * 64u); - // In a first loop over entries we add all entries and their shader creation parameters to a json, and get the size of the shaders buffer + auto write_bytes = [](std::vector& out, const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](std::vector& out, uint32_t value) + { + write_bytes(out, &value, sizeof(value)); + }; + auto write_string = [&write_u32, &write_bytes](std::vector& out, std::string_view value) + { + write_u32(out, static_cast(value.size())); + if (!value.empty()) + write_bytes(out, value.data(), value.size()); + }; + + write_u32(depsBuffer, static_cast(m_container.size())); size_t i = 0u; for (auto& entry : m_container) { - // Add the entry as a json array - entries.emplace_back(entry); + json entryJson{ + { "mainFileContents", entry.mainFileContents }, + { "compilerArgs", entry.compilerArgs }, + { "hash", entry.hash.data }, + { "lookupHash", entry.lookupHash }, + { "uncompressedContentHash", entry.uncompressedContentHash.data }, + { "uncompressedSize", entry.uncompressedSize }, + { "compression", static_cast(entry.compression) }, + }; + entries.emplace_back(std::move(entryJson)); - // We keep a copy of the offsets and the sizes of each shader. This is so that later on, when we add the shaders to the buffer after json creation - // (where the params array has been moved) we don't have to read the json to get the offsets again offsets[i] = shaderBufferSize; sizes[i] = entry.spirv->getSize(); - - // And add the params to the shader creation parameters array shaderCreationParams.emplace_back(entry.compilerArgs.stage, entry.compilerArgs.preprocessorArgs.sourceIdentifier.data(), sizes[i], shaderBufferSize); - // Enlarge the shader buffer by the size of the current shader shaderBufferSize += sizes[i]; + + write_u32(depsBuffer, static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + write_string(depsBuffer, dir); + write_string(depsBuffer, dep.getIdentifier()); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(depsBuffer, abs); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(depsBuffer, &standardInclude, sizeof(standardInclude)); + write_bytes(depsBuffer, dep.getHash().data, sizeof(dep.getHash().data)); + const uint64_t fileSize = dep.getFileSize(); + write_bytes(depsBuffer, &fileSize, sizeof(fileSize)); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_bytes(depsBuffer, &lastWriteTime, sizeof(lastWriteTime)); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(depsBuffer, &hasFileInfo, sizeof(hasFileInfo)); + } i++; } - // Create the containerJson json containerJson{ { "version", VERSION }, { "entries", std::move(entries) }, @@ -572,68 +1103,1047 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const std::string dumpedContainerJson = std::move(containerJson.dump()); uint64_t dumpedContainerJsonLength = dumpedContainerJson.size(); - // Create a buffer able to hold all shaders + the containerJson - size_t retValSize = shaderBufferSize + SHADER_BUFFER_SIZE_BYTES + dumpedContainerJsonLength; + size_t retValSize = shaderBufferSize + SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + dumpedContainerJsonLength + depsBuffer.size(); core::vector retVal(retValSize); - // first SHADER_BUFFER_SIZE_BYTES (8) in the buffer are the size of the shader buffer memcpy(retVal.data(), &shaderBufferSize, SHADER_BUFFER_SIZE_BYTES); + memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES, &dumpedContainerJsonLength, sizeof(uint64_t)); - // Loop over entries again, adding each one's shader to the buffer. i = 0u; + const size_t shaderOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); for (auto& entry : m_container) { - memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.spirv->getPointer(), sizes[i]); + memcpy(retVal.data() + shaderOffset + offsets[i], entry.spirv->getPointer(), sizes[i]); i++; } - // Might as well memcpy everything - memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, dumpedContainerJson.data(), dumpedContainerJsonLength); + const size_t jsonOffset = shaderOffset + shaderBufferSize; + memcpy(retVal.data() + jsonOffset, dumpedContainerJson.data(), dumpedContainerJsonLength); + if (!depsBuffer.empty()) + memcpy(retVal.data() + jsonOffset + dumpedContainerJsonLength, depsBuffer.data(), depsBuffer.size()); auto memoryResource = core::make_smart_refctd_ptr>(std::move(retVal)); return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory); } -core::smart_refctd_ptr IShaderCompiler::CCache::deserialize(const std::span serializedCache) +core::smart_refctd_ptr IShaderCompiler::CCache::deserialize(const std::span serializedCache, bool skipDependencies) { auto retVal = core::make_smart_refctd_ptr(); - // First get the size of the shader buffer, stored in the first 8 bytes - const uint64_t* cacheStart = reinterpret_cast(serializedCache.data()); - uint64_t shaderBufferSize = cacheStart[0]; - // Next up get the json that stores the container data - std::span cacheAsChar = { reinterpret_cast(serializedCache.data()), serializedCache.size() }; - std::string_view containerJsonString(cacheAsChar.begin() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, cacheAsChar.end()); - json containerJson = json::parse(containerJsonString); + if (serializedCache.size() < SHADER_BUFFER_SIZE_BYTES) + return nullptr; - // Check that this cache is from the currently supported version - { - std::string version; - containerJson.at("version").get_to(version); - if (version != VERSION) { - return nullptr; - } - } + uint64_t shaderBufferSize = 0; + std::memcpy(&shaderBufferSize, serializedCache.data(), SHADER_BUFFER_SIZE_BYTES); - // Now retrieve two vectors, one with the entries and one with the extra data to recreate the CPUShaders - std::vector entries; - std::vector shaderCreationParams; - containerJson.at("entries").get_to(entries); - containerJson.at("shaderCreationParams").get_to(shaderCreationParams); + const size_t minOldHeader = SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + if (serializedCache.size() < minOldHeader) + return nullptr; - // We must now recreate the shaders, add them to each entry, then move the entry into the multiset - for (auto i = 0u; i < entries.size(); i++) { - // Create buffer to hold the code - auto code = ICPUBuffer::create({ shaderCreationParams[i].codeByteSize }); - // Copy the shader bytecode into the buffer + bool hasBinaryDeps = false; + uint64_t jsonSize = 0; + size_t jsonOffset = 0; + size_t depsOffset = 0; + size_t shaderOffset = SHADER_BUFFER_SIZE_BYTES; - memcpy(code->getPointer(), serializedCache.data() + SHADER_BUFFER_SIZE_BYTES + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); - code->setContentHash(code->computeContentHash()); - entries[i].spirv = std::move(code); + const size_t minNewHeader = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (serializedCache.size() >= minNewHeader) + { + std::memcpy(&jsonSize, serializedCache.data() + SHADER_BUFFER_SIZE_BYTES, sizeof(jsonSize)); + const size_t candidateJsonOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (candidateJsonOffset + jsonSize <= serializedCache.size()) + { + hasBinaryDeps = true; + jsonOffset = candidateJsonOffset; + depsOffset = candidateJsonOffset + jsonSize; + shaderOffset = SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); + } + } - retVal->insert(std::move(entries[i])); + if (!hasBinaryDeps) + { + jsonOffset = SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + jsonSize = serializedCache.size() - jsonOffset; + shaderOffset = SHADER_BUFFER_SIZE_BYTES; } - return retVal; + std::string_view containerJsonString(reinterpret_cast(serializedCache.data() + jsonOffset), jsonSize); + json containerJson; + if (skipDependencies) + { + bool skipNext = false; + auto cb = [&skipNext](int, json::parse_event_t event, json& parsed) + { + if (event == json::parse_event_t::key && parsed.is_string() && parsed.get_ref() == "dependencies") + { + skipNext = true; + return true; + } + if (skipNext) + { + skipNext = false; + return false; + } + return true; + }; + containerJson = json::parse(containerJsonString, cb, true, true); + } + else + { + containerJson = json::parse(containerJsonString); + } + + std::string version; + containerJson.at("version").get_to(version); + if (version != VERSION) + return nullptr; + + std::vector entries; + std::vector shaderCreationParams; + containerJson.at("entries").get_to(entries); + containerJson.at("shaderCreationParams").get_to(shaderCreationParams); + + for (auto i = 0u; i < entries.size(); i++) { + auto code = ICPUBuffer::create({ shaderCreationParams[i].codeByteSize }); + memcpy(code->getPointer(), serializedCache.data() + shaderOffset + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); + code->setContentHash(code->computeContentHash()); + entries[i].spirv = std::move(code); + } + + if (hasBinaryDeps && !skipDependencies) + { + auto read_bytes = [](const std::span data, size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](const std::span data, size_t& offset, uint32_t& out) -> bool + { + return read_bytes(data, offset, &out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](const std::span data, size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(data, offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = depsOffset; + uint32_t entryCount = 0; + if (!read_u32(serializedCache, offset, entryCount)) + return nullptr; + if (entryCount != entries.size()) + return nullptr; + + for (uint32_t i = 0; i < entryCount; ++i) + { + uint32_t depCount = 0; + if (!read_u32(serializedCache, offset, depCount)) + return nullptr; + entries[i].dependencies.clear(); + entries[i].dependencies.reserve(depCount); + for (uint32_t d = 0; d < depCount; ++d) + { + std::string dir; + std::string identifier; + std::string absolutePath; + if (!read_string(serializedCache, offset, dir)) + return nullptr; + if (!read_string(serializedCache, offset, identifier)) + return nullptr; + if (!read_string(serializedCache, offset, absolutePath)) + return nullptr; + uint8_t standardInclude = 0; + if (!read_bytes(serializedCache, offset, &standardInclude, sizeof(standardInclude))) + return nullptr; + core::blake3_hash_t hash = {}; + if (!read_bytes(serializedCache, offset, hash.data, sizeof(hash.data))) + return nullptr; + uint64_t fileSize = 0; + if (!read_bytes(serializedCache, offset, &fileSize, sizeof(fileSize))) + return nullptr; + int64_t lastWriteTime = 0; + if (!read_bytes(serializedCache, offset, &lastWriteTime, sizeof(lastWriteTime))) + return nullptr; + uint8_t hasFileInfo = 0; + if (!read_bytes(serializedCache, offset, &hasFileInfo, sizeof(hasFileInfo))) + return nullptr; + entries[i].dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + } + } + + for (auto& entry : entries) + retVal->insert(std::move(entry)); + + return retVal; +} + +static std::string normalizeLinePath(std::string_view path) +{ + std::string out(path); + std::replace(out.begin(), out.end(), '\\', '/'); + return out; +} + +std::string IShaderCompiler::applyForceIncludes(std::string_view code, std::span forceIncludes) +{ + if (forceIncludes.empty()) + return std::string(code); + + size_t reserveSize = code.size(); + for (const auto& inc : forceIncludes) + reserveSize += inc.size() + 16; + + std::string out; + out.reserve(reserveSize); + for (const auto& inc : forceIncludes) + { + const auto incPath = std::filesystem::path(inc).generic_string(); + out.append("#include \""); + out.append(incPath); + out.append("\"\n"); + } + out.append(code); + return out; +} + +bool IShaderCompiler::probeShaderCache(const CCache* cache, std::string_view code, const SCompilerOptions& options, const CIncludeFinder* finder) +{ + if (!cache) + return false; + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + const CCache::SEntry entry(cacheCode, options); + return cache->contains(entry, finder, options.preprocessorOptions.fastSafeValidation); +} + +bool IShaderCompiler::preprocessPrefixForCache(std::string_view code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache::SEntry& outEntry) const +{ + outEntry = {}; + std::vector deps; + auto text = preprocessShader(std::string(code), stage, preprocessOptions, &deps); + if (text.empty()) + return false; + outEntry.preprocessedPrefix = std::move(text); + outEntry.dependencies = std::move(deps); + outEntry.pragmaStage = static_cast(stage); + return true; +} + +IShaderCompiler::CPreprocessCache::SProbeResult IShaderCompiler::CPreprocessCache::probe(std::string_view code, const CPreprocessCache* cache, ELoadStatus loadStatus, const SPreprocessorOptions& preprocessOptions) +{ + SProbeResult result = {}; + const CIncludeFinder* finder = preprocessOptions.includeFinder; + std::string_view codeToSplit = code; + if (preprocessOptions.applyForceIncludes && !preprocessOptions.forceIncludes.empty()) + { + result.codeStorage = applyForceIncludes(code, preprocessOptions.forceIncludes); + codeToSplit = result.codeStorage; + } + splitPrefix(codeToSplit, result.prefix, result.body); + result.hasPrefix = !result.prefix.empty(); + if (!result.hasPrefix) + { + result.status = EProbeStatus::NoPrefix; + result.cacheHit = false; + return result; + } + + { + core::blake3_hasher hasher; + hasher.update(result.prefix.data(), result.prefix.size()); + const uint8_t waveFlags = + (static_cast(preprocessOptions.preserveComments) << 0u) | + (static_cast(preprocessOptions.emitLineDirectives) << 1u) | + (static_cast(preprocessOptions.emitPragmaDirectives) << 2u); + hasher.update(&waveFlags, sizeof(waveFlags)); + result.prefixHash = static_cast(hasher); + } + const bool hasEntry = cache && cache->hasEntry(); + if (!hasEntry) + { + result.cacheHit = false; + if (loadStatus == ELoadStatus::Missing) + result.status = EProbeStatus::Missing; + else if (loadStatus == ELoadStatus::Invalid) + result.status = EProbeStatus::Invalid; + else + result.status = EProbeStatus::EntryInvalid; + return result; + } + + const bool prefixMatch = cache->getEntry().prefixHash == result.prefixHash; + if (!prefixMatch) + { + result.cacheHit = false; + result.status = EProbeStatus::PrefixChanged; + return result; + } + bool depsUpdated = false; + const bool depsValid = cache->validateDependencies(finder, &depsUpdated, preprocessOptions.fastSafeValidation); + result.depsUpdated = depsUpdated; + if (prefixMatch && depsValid) + { + result.cacheHit = true; + result.status = EProbeStatus::Hit; + return result; + } + + result.cacheHit = false; + if (!prefixMatch) + result.status = EProbeStatus::PrefixChanged; + else if (!depsValid) + result.status = EProbeStatus::DependenciesChanged; + else + result.status = EProbeStatus::EntryInvalid; + + return result; +} + +const char* IShaderCompiler::CPreprocessCache::getProbeReason(EProbeStatus status) +{ + switch (status) + { + case EProbeStatus::Missing: + return "cache file missing; first build, cleaned, output moved, or out of date"; + case EProbeStatus::Invalid: + return "cache file invalid or version mismatch"; + case EProbeStatus::PrefixChanged: + return "prefix changed; cache invalidated"; + case EProbeStatus::DependenciesChanged: + return "dependencies changed; cache invalidated"; + case EProbeStatus::EntryInvalid: + return "cache entry invalid"; + case EProbeStatus::NoPrefix: + return "no prefix"; + case EProbeStatus::Hit: + return "hit"; + default: + return "unknown"; + } +} + +IShaderCompiler::SPreprocessCacheResult IShaderCompiler::preprocessWithCache(std::string_view code, IShader::E_SHADER_STAGE stage, const SPreprocessorOptions& preprocessOptions, CPreprocessCache& cache, CPreprocessCache::ELoadStatus loadStatus, std::string_view sourceIdentifier) const +{ + SPreprocessCacheResult result = {}; + result.stage = stage; + + const auto probe = CPreprocessCache::probe(code, &cache, loadStatus, preprocessOptions); + result.status = probe.status; + if (!probe.hasPrefix) + return result; + + if (probe.cacheHit) + { + result.cacheHit = true; + result.cacheUsed = true; + if (probe.depsUpdated) + result.cacheUpdated = true; + } + else + { + CPreprocessCache::SEntry entry; + IShader::E_SHADER_STAGE prefixStage = stage; + SPreprocessorOptions preCacheOpt = preprocessOptions; + preCacheOpt.depfile = false; + preCacheOpt.applyForceIncludes = false; + if (!preprocessPrefixForCache(probe.prefix, prefixStage, preCacheOpt, entry)) + { + result.ok = false; + return result; + } + entry.prefixHash = probe.prefixHash; + entry.pragmaStage = static_cast(prefixStage); + cache.setEntry(std::move(entry)); + result.cacheUsed = true; + result.cacheUpdated = true; + } + + if (!cache.hasEntry()) + { + result.ok = false; + return result; + } + + result.code = cache.buildCombinedCode(probe.body, sourceIdentifier); + if (result.code.empty()) + { + result.ok = false; + return result; + } + + const auto& entry = cache.getEntry(); + if (entry.pragmaStage != static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN)) + result.stage = static_cast(entry.pragmaStage); + + return result; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::serialize() const +{ + if (!m_hasEntry) + return nullptr; + ensurePrefixLoaded(); + + auto write_bytes = [](std::vector& out, const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](std::vector& out, uint32_t value) + { + write_bytes(out, &value, sizeof(value)); + }; + auto write_string = [&write_u32, &write_bytes](std::vector& out, std::string_view value) + { + write_u32(out, static_cast(value.size())); + if (!value.empty()) + write_bytes(out, value.data(), value.size()); + }; + + std::vector out; + out.reserve(m_entry.preprocessedPrefix.size() + 256); + const uint32_t magic = 0x50435250u; + write_u32(out, magic); + write_string(out, VERSION); + write_bytes(out, &m_entry.prefixHash, sizeof(m_entry.prefixHash)); + write_u32(out, m_entry.pragmaStage); + const uint32_t prefixSize = static_cast(m_entry.preprocessedPrefix.size()); + write_u32(out, prefixSize); + + write_u32(out, static_cast(m_entry.macroDefs.size())); + for (const auto& macro : m_entry.macroDefs) + write_string(out, macro); + + write_u32(out, static_cast(m_entry.dxcFlags.size())); + for (const auto& flag : m_entry.dxcFlags) + write_string(out, flag); + + write_u32(out, static_cast(m_entry.dependencies.size())); + for (const auto& dep : m_entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + write_string(out, dir); + write_string(out, dep.getIdentifier()); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(out, abs); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(out, &standardInclude, sizeof(standardInclude)); + write_bytes(out, dep.getHash().data, sizeof(dep.getHash().data)); + const uint64_t fileSize = dep.getFileSize(); + write_bytes(out, &fileSize, sizeof(fileSize)); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_bytes(out, &lastWriteTime, sizeof(lastWriteTime)); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(out, &hasFileInfo, sizeof(hasFileInfo)); + } + if (prefixSize) + write_bytes(out, m_entry.preprocessedPrefix.data(), m_entry.preprocessedPrefix.size()); + + auto buffer = ICPUBuffer::create({ out.size() }); + if (!buffer) + return nullptr; + std::memcpy(buffer->getPointer(), out.data(), out.size()); + return buffer; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::deserialize(const std::span serializedCache) +{ + if (serializedCache.empty()) + return nullptr; + + auto read_bytes = [](const std::span data, size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](const std::span data, size_t& offset, uint32_t& out) -> bool + { + return read_bytes(data, offset, &out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](const std::span data, size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(data, offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + size_t offset = 0; + uint32_t magic = 0; + if (!read_u32(serializedCache, offset, magic)) + return nullptr; + if (magic != 0x50435250u) + return nullptr; + + std::string version; + if (!read_string(serializedCache, offset, version)) + return nullptr; + if (version != VERSION) + return nullptr; + + auto retVal = core::make_smart_refctd_ptr(); + auto& entry = retVal->m_entry; + if (!read_bytes(serializedCache, offset, &entry.prefixHash, sizeof(entry.prefixHash))) + return nullptr; + if (!read_u32(serializedCache, offset, entry.pragmaStage)) + return nullptr; + uint32_t prefixSize = 0; + if (!read_u32(serializedCache, offset, prefixSize)) + return nullptr; + + uint32_t macroCount = 0; + if (!read_u32(serializedCache, offset, macroCount)) + return nullptr; + entry.macroDefs.clear(); + entry.macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(serializedCache, offset, macro)) + return nullptr; + entry.macroDefs.emplace_back(std::move(macro)); + } + entry.macroBlock = buildMacroBlock(entry.macroDefs); + + uint32_t flagCount = 0; + if (!read_u32(serializedCache, offset, flagCount)) + return nullptr; + entry.dxcFlags.clear(); + entry.dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(serializedCache, offset, flag)) + return nullptr; + entry.dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(serializedCache, offset, depCount)) + return nullptr; + entry.dependencies.clear(); + entry.dependencies.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + if (!read_string(serializedCache, offset, dir)) + return nullptr; + if (!read_string(serializedCache, offset, identifier)) + return nullptr; + std::string absolutePath; + if (!read_string(serializedCache, offset, absolutePath)) + return nullptr; + uint8_t standardInclude = 0; + if (!read_bytes(serializedCache, offset, &standardInclude, sizeof(standardInclude))) + return nullptr; + core::blake3_hash_t hash = {}; + if (!read_bytes(serializedCache, offset, hash.data, sizeof(hash.data))) + return nullptr; + uint64_t fileSize = 0; + if (!read_bytes(serializedCache, offset, &fileSize, sizeof(fileSize))) + return nullptr; + int64_t lastWriteTime = 0; + if (!read_bytes(serializedCache, offset, &lastWriteTime, sizeof(lastWriteTime))) + return nullptr; + uint8_t hasFileInfo = 0; + if (!read_bytes(serializedCache, offset, &hasFileInfo, sizeof(hasFileInfo))) + return nullptr; + entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + + if (offset + prefixSize > serializedCache.size()) + return nullptr; + if (prefixSize) + { + entry.preprocessedPrefix.assign(reinterpret_cast(serializedCache.data() + offset), prefixSize); + offset += prefixSize; + } + + retVal->m_prefixLoaded = true; + retVal->m_backingPath.clear(); + retVal->m_prefixOffset = 0; + retVal->m_prefixSize = 0; + retVal->m_hasEntry = true; + return retVal; +} + +core::smart_refctd_ptr IShaderCompiler::CPreprocessCache::loadFromFile(const system::path& path, ELoadStatus& status, bool loadPrefix) +{ + status = ELoadStatus::Missing; + if (!std::filesystem::exists(path)) + return nullptr; + + std::ifstream in(path, std::ios::binary); + if (!in) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + in.seekg(0, std::ios::end); + const auto size = static_cast(in.tellg()); + in.seekg(0, std::ios::beg); + if (!size) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + auto read_bytes = [&in](void* dst, size_t count) -> bool + { + return bool(in.read(reinterpret_cast(dst), count)); + }; + auto read_u32 = [&read_bytes](uint32_t& out) -> bool + { + return read_bytes(&out, sizeof(out)); + }; + auto read_string = [&read_u32, &read_bytes](std::string& out) -> bool + { + uint32_t len = 0; + if (!read_u32(len)) + return false; + if (!len) + { + out.clear(); + return true; + } + out.resize(len); + return read_bytes(out.data(), len); + }; + + uint32_t magic = 0; + if (!read_u32(magic) || magic != 0x50435250u) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + std::string version; + if (!read_string(version) || version != VERSION) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + auto retVal = core::make_smart_refctd_ptr(); + auto& entry = retVal->m_entry; + if (!read_bytes(&entry.prefixHash, sizeof(entry.prefixHash))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + if (!read_u32(entry.pragmaStage)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + uint32_t prefixSize = 0; + if (!read_u32(prefixSize)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + uint32_t macroCount = 0; + if (!read_u32(macroCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.macroDefs.clear(); + entry.macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(macro)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.macroDefs.emplace_back(std::move(macro)); + } + entry.macroBlock = buildMacroBlock(entry.macroDefs); + + uint32_t flagCount = 0; + if (!read_u32(flagCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dxcFlags.clear(); + entry.dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(flag)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(depCount)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dependencies.clear(); + entry.dependencies.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + if (!read_string(dir) || !read_string(identifier)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + std::string absolutePath; + if (!read_string(absolutePath)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint8_t standardInclude = 0; + if (!read_bytes(&standardInclude, sizeof(standardInclude))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + core::blake3_hash_t hash = {}; + if (!read_bytes(hash.data, sizeof(hash.data))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint64_t fileSize = 0; + if (!read_bytes(&fileSize, sizeof(fileSize))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + int64_t lastWriteTime = 0; + if (!read_bytes(&lastWriteTime, sizeof(lastWriteTime))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + uint8_t hasFileInfo = 0; + if (!read_bytes(&hasFileInfo, sizeof(hasFileInfo))) + { + status = ELoadStatus::Invalid; + return nullptr; + } + entry.dependencies.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } + + const auto prefixOffset = static_cast(in.tellg()); + if (prefixOffset + prefixSize > size) + { + status = ELoadStatus::Invalid; + return nullptr; + } + + if (loadPrefix) + { + entry.preprocessedPrefix.clear(); + if (prefixSize) + { + entry.preprocessedPrefix.resize(prefixSize); + if (!read_bytes(entry.preprocessedPrefix.data(), prefixSize)) + { + status = ELoadStatus::Invalid; + return nullptr; + } + } + retVal->m_prefixLoaded = true; + retVal->m_backingPath.clear(); + retVal->m_prefixOffset = 0; + retVal->m_prefixSize = 0; + } + else + { + if (prefixSize) + in.seekg(static_cast(prefixSize), std::ios::cur); + retVal->m_prefixLoaded = false; + retVal->m_backingPath = path; + retVal->m_prefixOffset = prefixOffset; + retVal->m_prefixSize = prefixSize; + } + + retVal->m_hasEntry = true; + status = ELoadStatus::Loaded; + return retVal; +} + +bool IShaderCompiler::CPreprocessCache::writeToFile(const system::path& path, const CPreprocessCache& cache) +{ + auto buffer = cache.serialize(); + if (!buffer) + return false; + + const auto parent = path.parent_path(); + if (!parent.empty() && !std::filesystem::exists(parent)) + std::filesystem::create_directories(parent); + + std::ofstream out(path, std::ios::binary | std::ios::trunc); + if (!out) + return false; + + out.write(reinterpret_cast(buffer->getPointer()), buffer->getSize()); + return bool(out); +} + +bool IShaderCompiler::CPreprocessCache::validateDependencies(const CIncludeFinder* finder, bool* depsUpdated, bool fastSafeValidation) const +{ + if (!m_hasEntry || !finder) + return false; + if (depsUpdated) + *depsUpdated = false; + bool updated = false; + auto* system = finder->getSystem(); + + std::vector mismatches; + mismatches.reserve(m_entry.dependencies.size()); + collectFileInfoMismatchesParallel(m_entry.dependencies, mismatches, system); + if (mismatches.empty()) + return true; + if (fastSafeValidation) + return false; + + std::unordered_map fileStatus; + std::unordered_map logicalStatus; + fileStatus.reserve(mismatches.size()); + logicalStatus.reserve(mismatches.size()); + for (size_t idx : mismatches) + { + const auto& dep = m_entry.dependencies[idx]; + auto makeLogicalKey = [&dep]() + { + std::string key; + key.reserve(dep.getIdentifier().size() + dep.getRequestingSourceDir().string().size() + 4); + key.append(dep.getRequestingSourceDir().string()); + key.push_back('|'); + key.append(dep.getIdentifier()); + key.push_back('|'); + key.push_back(dep.isStandardInclude() ? '1' : '0'); + return key; + }; + + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + { + if (auto it = fileStatus.find(dep.getAbsolutePath()); it != fileStatus.end()) + { + if (!it->second) + return false; + continue; + } + } + else + { + auto key = makeLogicalKey(); + if (auto it = logicalStatus.find(key); it != logicalStatus.end()) + { + if (!it->second) + return false; + continue; + } + } + + bool valid = false; + bool precomputedChecked = false; + if (system && !dep.getAbsolutePath().empty()) + { + system::ISystem::future_t> future; + system->createFile(future, dep.getAbsolutePath(), system::IFile::ECF_READ); + if (future.wait()) + { + core::smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (file) + { + if (auto precomputed = file->getPrecomputedHash()) + { + precomputedChecked = true; + core::blake3_hash_t hash = {}; + std::memcpy(hash.data, &(*precomputed), sizeof(hash.data)); + if (hash == dep.getHash()) + { + valid = true; + if (!dep.getHasFileInfo() && dep.getAbsolutePath().is_absolute()) + { + dep.setFileInfo(file->getSize(), file->getLastWriteTime().time_since_epoch().count(), true); + updated = true; + } + } + else + { + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return false; + } + } + } + } + } + + if (!valid && !precomputedChecked) + { + const std::string identifier(dep.getIdentifier()); + IIncludeLoader::found_t header; + if (dep.isStandardInclude()) + header = finder->getIncludeStandard(dep.getRequestingSourceDir(), identifier); + else + header = finder->getIncludeRelative(dep.getRequestingSourceDir(), identifier); + if (header.hash != dep.getHash()) + { + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), false); + else + logicalStatus.emplace(makeLogicalKey(), false); + return false; + } + + valid = true; + if (header.hasFileInfo && dep.getAbsolutePath().is_absolute()) + { + dep.setFileInfo(header.fileSize, header.lastWriteTime, true); + updated = true; + } + } + + if (valid && dep.getHasFileInfo() && dep.getAbsolutePath().is_absolute()) + { + uint64_t size = 0; + int64_t ticks = 0; + if (getFileInfoCached(dep.getAbsolutePath(), size, ticks, system) && + (dep.getFileSize() != size || dep.getLastWriteTime() != ticks)) + { + dep.setFileInfo(size, ticks, true); + updated = true; + } + } + + if (dep.getHasFileInfo() && !dep.getAbsolutePath().empty()) + fileStatus.emplace(dep.getAbsolutePath(), true); + else + logicalStatus.emplace(makeLogicalKey(), true); + } + + if (depsUpdated) + *depsUpdated = updated; + return true; +} + +void IShaderCompiler::CPreprocessCache::ensurePrefixLoaded() const +{ + if (m_prefixLoaded) + return; + if (m_prefixSize == 0) + { + m_prefixLoaded = true; + return; + } + if (m_backingPath.empty()) + return; + + std::ifstream in(m_backingPath, std::ios::binary); + if (!in) + return; + in.seekg(static_cast(m_prefixOffset), std::ios::beg); + if (!in) + return; + + std::string prefix; + prefix.resize(m_prefixSize); + if (!in.read(prefix.data(), prefix.size())) + return; + + m_entry.preprocessedPrefix = std::move(prefix); + m_prefixLoaded = true; +} + + +std::string IShaderCompiler::CPreprocessCache::buildCombinedCode(std::string_view body, std::string_view sourceIdentifier) const +{ + if (!m_hasEntry) + return std::string(body); + + ensurePrefixLoaded(); + std::string out; + size_t reserve = m_entry.preprocessedPrefix.size() + body.size(); + for (const auto& m : m_entry.macroDefs) + reserve += m.size() + 16; + for (const auto& f : m_entry.dxcFlags) + reserve += f.size() + 1; + reserve += 64; + out.reserve(reserve); + + if (!m_entry.dxcFlags.empty()) + { + out.append("#pragma dxc_compile_flags "); + for (size_t i = 0; i < m_entry.dxcFlags.size(); ++i) + { + if (i) + out.push_back(' '); + out.append(m_entry.dxcFlags[i]); + } + out.push_back('\n'); + } + + if (!m_entry.preprocessedPrefix.empty()) + { + out.append(m_entry.preprocessedPrefix); + if (out.back() != '\n') + out.push_back('\n'); + } + + for (const auto& macro : m_entry.macroDefs) + { + const auto eq = macro.find('='); + std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + out.append("#define "); + out.append(name); + if (!def.empty()) + { + out.push_back(' '); + out.append(def); + } + out.push_back('\n'); + } + + if (!sourceIdentifier.empty()) + { + out.append("#line 1 \""); + out.append(normalizeLinePath(sourceIdentifier)); + out.append("\"\n"); + } + + out.append(body); + return out; } static void* SzAlloc(ISzAllocPtr p, size_t size) { p = p; return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); } @@ -644,6 +2154,12 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu uncompressedContentHash = uncompressedSpirvBuffer->getContentHash(); uncompressedSize = uncompressedSpirvBuffer->getSize(); + if (compression == ECompression::RAW) + { + spirv = core::smart_refctd_ptr(const_cast(uncompressedSpirvBuffer)); + return static_cast(spirv); + } + size_t propsSize = LZMA_PROPS_SIZE; size_t destLen = uncompressedSpirvBuffer->getSize() + uncompressedSpirvBuffer->getSize() / 3 + 128; core::vector compressedSpirv(propsSize + destLen); @@ -671,6 +2187,14 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const { + if (compression == ECompression::RAW) + { + if (!spirv) + return nullptr; + auto buffer = spirv; + return core::make_smart_refctd_ptr(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data()); + } + auto uncompressedBuf = ICPUBuffer::create({ uncompressedSize }); uncompressedBuf->setContentHash(uncompressedContentHash); diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index 6ad33a2ff5..b32a975554 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -6,6 +6,7 @@ using json = nlohmann::json; using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; +using CacheCompression = nbl::asset::IShaderCompiler::CCache::ECompression; namespace nbl::asset @@ -35,6 +36,10 @@ inline void to_json(json& j, const SEntry::SPreprocessorArgs& preprocArgs) j = json{ { "sourceIdentifier", preprocArgs.sourceIdentifier }, { "extraDefines", preprocArgs.extraDefines}, + { "forceIncludes", preprocArgs.forceIncludes}, + { "preserveComments", preprocArgs.preserveComments}, + { "emitLineDirectives", preprocArgs.emitLineDirectives}, + { "emitPragmaDirectives", preprocArgs.emitPragmaDirectives}, }; } @@ -42,6 +47,13 @@ inline void from_json(const json& j, SEntry::SPreprocessorArgs& preprocArgs) { j.at("sourceIdentifier").get_to(preprocArgs.sourceIdentifier); j.at("extraDefines").get_to(preprocArgs.extraDefines); + j.at("forceIncludes").get_to(preprocArgs.forceIncludes); + if (j.contains("preserveComments")) + j.at("preserveComments").get_to(preprocArgs.preserveComments); + if (j.contains("emitLineDirectives")) + j.at("emitLineDirectives").get_to(preprocArgs.emitLineDirectives); + if (j.contains("emitPragmaDirectives")) + j.at("emitPragmaDirectives").get_to(preprocArgs.emitPragmaDirectives); } // Optimizer pass has its own method for easier vector serialization @@ -118,6 +130,10 @@ inline void to_json(json& j, const SEntry::SPreprocessingDependency& dependency) { "identifier", dependency.identifier }, { "hash", dependency.hash.data }, { "standardInclude", dependency.standardInclude }, + { "absolutePath", dependency.absolutePath }, + { "fileSize", dependency.fileSize }, + { "lastWriteTime", dependency.lastWriteTime }, + { "hasFileInfo", dependency.hasFileInfo }, }; } @@ -127,6 +143,14 @@ inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependenc j.at("identifier").get_to(dependency.identifier); j.at("hash").get_to(dependency.hash.data); j.at("standardInclude").get_to(dependency.standardInclude); + if (j.contains("absolutePath")) + j.at("absolutePath").get_to(dependency.absolutePath); + if (j.contains("fileSize")) + j.at("fileSize").get_to(dependency.fileSize); + if (j.contains("lastWriteTime")) + j.at("lastWriteTime").get_to(dependency.lastWriteTime); + if (j.contains("hasFileInfo")) + j.at("hasFileInfo").get_to(dependency.hasFileInfo); } // We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized @@ -169,6 +193,7 @@ inline void from_json(const json& j, CPUShaderCreationParams& creationParams) inline void to_json(json& j, const SEntry& entry) { + uint32_t compression = static_cast(entry.compression); j = json{ { "mainFileContents", entry.mainFileContents }, { "compilerArgs", entry.compilerArgs }, @@ -177,6 +202,7 @@ inline void to_json(json& j, const SEntry& entry) { "dependencies", entry.dependencies }, { "uncompressedContentHash", entry.uncompressedContentHash.data }, { "uncompressedSize", entry.uncompressedSize }, + { "compression", compression }, }; } @@ -186,11 +212,22 @@ inline void from_json(const json& j, SEntry& entry) j.at("compilerArgs").get_to(entry.compilerArgs); j.at("hash").get_to(entry.hash.data); j.at("lookupHash").get_to(entry.lookupHash); - j.at("dependencies").get_to(entry.dependencies); + if (j.contains("dependencies")) + j.at("dependencies").get_to(entry.dependencies); j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); j.at("uncompressedSize").get_to(entry.uncompressedSize); + if (j.contains("compression")) + { + uint32_t compression = 0; + j.at("compression").get_to(compression); + entry.compression = static_cast(compression); + } + else + { + entry.compression = CacheCompression::LZMA; + } entry.spirv = nullptr; } } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index f6c0014e39..4fff0b556b 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -7,8 +7,12 @@ #include #include +#include #include +#include +#include + #include "nbl/asset/utils/IShaderCompiler.h" namespace nbl::wave @@ -44,6 +48,7 @@ struct preprocessing_hooks final : public boost::wave::context_policies::default { preprocessing_hooks(const nbl::asset::IShaderCompiler::SPreprocessorOptions& _preprocessOptions) : m_includeFinder(_preprocessOptions.includeFinder), m_logger(_preprocessOptions.logger), m_pragmaStage(nbl::asset::IShader::E_SHADER_STAGE::ESS_UNKNOWN), m_dxc_compile_flags_override() + , m_preserveComments(_preprocessOptions.preserveComments), m_emitLineDirectives(_preprocessOptions.emitLineDirectives), m_emitPragmaDirectives(_preprocessOptions.emitPragmaDirectives) { hash_token_occurences = 0; } @@ -155,6 +160,9 @@ struct preprocessing_hooks final : public boost::wave::context_policies::default asset::IShader::E_SHADER_STAGE m_pragmaStage; int hash_token_occurences; std::vector m_dxc_compile_flags_override; + const bool m_preserveComments; + const bool m_emitLineDirectives; + const bool m_emitPragmaDirectives; }; @@ -186,21 +194,29 @@ class context : private boost::noncopyable typedef typename iteration_context_stack_type::size_type iter_size_type; context* this_() { return this; } // avoid warning in constructor + static boost::wave::language_support make_language(const preprocessing_hooks& hooks) + { + boost::wave::language_support lang = support_cpp20; + if (hooks.m_preserveComments) + lang = boost::wave::language_support(lang | support_option_preserve_comments); + if (hooks.m_emitLineDirectives) + lang = boost::wave::language_support(lang | support_option_emit_line_directives); + if (hooks.m_emitPragmaDirectives) + lang = boost::wave::language_support(lang | support_option_emit_pragma_directives); + lang = boost::wave::language_support(lang | support_option_include_guard_detection); + return lang; + } public: context(target_iterator_type const& first_, target_iterator_type const& last_, char const* fname, preprocessing_hooks const& hooks_) : first(first_), last(last_), filename(fname) , has_been_initialized(false) , current_relative_filename(fname) +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + , current_filename(fname ? fname : "") +#endif , macros(*this_()) - , language(language_support( - support_cpp20 - | support_option_preserve_comments - | support_option_emit_line_directives - | support_option_emit_pragma_directives -// | support_option_emit_contnewlines -// | support_option_insert_whitespace - )) + , language(make_language(hooks_)) , hooks(hooks_) { macros.init_predefined_macros(fname); @@ -280,6 +296,70 @@ class context : private boost::noncopyable { macros.reset_macromap(); macros.init_predefined_macros(); } + void dump_macro_definitions(std::vector& out) const + { + out.clear(); + std::vector names; + names.reserve(std::distance(macro_names_begin(), macro_names_end())); + for (auto it = macro_names_begin(); it != macro_names_end(); ++it) + names.emplace_back(util::to_string(*it)); + std::sort(names.begin(), names.end()); + for (const auto& name : names) + { + bool has_params = false; + bool is_predefined = false; + position_type pos; + std::vector parameters; + token_sequence_type definition; + if (!get_macro_definition(name, has_params, is_predefined, pos, parameters, definition)) + continue; + if (is_predefined) + continue; + if (name.size() >= 2 && name[0] == '_' && name[1] == '_') + continue; + + std::string params_str; + if (has_params) + { + bool first_param = true; + for (const auto& tok : parameters) + { + auto tok_str = util::to_string(tok.get_value()); + if (tok_str == ",") + continue; + if (!first_param) + params_str.append(", "); + params_str.append(tok_str); + first_param = false; + } + } + + std::string def_str; + std::string prev_tok; + for (const auto& tok : definition) + { + auto tok_str = util::to_string(tok.get_value()); + if (!def_str.empty()) + { + if (!(prev_tok == "__VA_OPT__" && tok_str == "(")) + def_str.push_back(' '); + } + def_str.append(tok_str); + prev_tok = std::move(tok_str); + } + + std::string full = name; + if (has_params) + { + full.push_back('('); + full.append(params_str); + full.push_back(')'); + } + full.push_back('='); + full.append(def_str); + out.push_back(std::move(full)); + } + } // Iterate over names of defined macros typedef boost::wave::util::macromap macromap_type; @@ -440,9 +520,34 @@ class context : private boost::noncopyable } public: +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + void set_current_filename(char const* real_name) + { + current_filename = real_name ? real_name : ""; + } + std::string const& get_current_filename() const + { + return current_filename; + } + bool has_pragma_once(std::string const& filename) + { + return pragma_once_headers.find(filename) != pragma_once_headers.end(); + } + bool add_pragma_once_header(std::string const& filename, std::string const& guard_name) + { + get_hooks().detected_include_guard(derived(), filename, guard_name); + return pragma_once_headers.insert(filename).second; + } + bool add_pragma_once_header(token_type const& pragma_, std::string const& filename) + { + get_hooks().detected_pragma_once(derived(), pragma_, filename); + return pragma_once_headers.insert(filename).second; + } +#endif + void set_current_relative_filename(char const* real_name) { - current_relative_filename = real_name; + current_relative_filename = real_name ? real_name : ""; } std::string const& get_current_relative_filename() const { @@ -465,6 +570,10 @@ class context : private boost::noncopyable bool has_been_initialized; // set cwd once std::string current_relative_filename; // real relative name of current preprocessed file +#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 + std::string current_filename; + std::unordered_set pragma_once_headers; +#endif // Nabla Additions Start // these are temporaries! @@ -524,15 +633,28 @@ template<> inline bool boost::wave::impl::pp_iterator_functor inline bool boost::wave::impl::pp_iterator_functor @@ -129,7 +130,7 @@ class {NBL_BR_API} CArchive final : public nbl::system::CFileArchive file_buffer_t getFileBuffer(const nbl::system::IFileArchive::SFileList::found_t& found) override {{ auto resource = get_resource_runtime(found->pathRelativeToArchive.string()); - return {{const_cast(resource.contents),resource.size,nullptr}}; + return {{const_cast(resource.contents),resource.size,nullptr,nbl::hlsl::uint64_t4{{resource.xx256Hash[0],resource.xx256Hash[1],resource.xx256Hash[2],resource.xx256Hash[3]}}}}; }} }}; }} @@ -143,4 +144,4 @@ class {NBL_BR_API} CArchive final : public nbl::system::CFileArchive if __name__ == "__main__": args: argparse.Namespace = parser.parse_args() - execute(args) \ No newline at end of file + execute(args) diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 6b25471f8d..ca092efd9a 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -337,6 +337,8 @@ void ISystem::unmountBuiltins() { }; removeByKey("nbl"); + removeByKey("nbl/builtin"); + removeByKey("nbl/video"); removeByKey("spirv"); removeByKey("boost"); } @@ -394,4 +396,4 @@ bool ISystem::isDebuggerAttached() return false; } -#endif \ No newline at end of file +#endif diff --git a/tools/nsc/CMakeLists.txt b/tools/nsc/CMakeLists.txt index 2765f02fa5..25444050d1 100644 --- a/tools/nsc/CMakeLists.txt +++ b/tools/nsc/CMakeLists.txt @@ -60,6 +60,8 @@ add_test(NAME NBL_NSC_DUMP_BUILD_INFO_TEST COMMAND_EXPAND_LISTS ) +add_subdirectory(test/cache_layers) + if(NBL_ENABLE_DOCKER_INTEGRATION) find_program(DOCKER_EXE NAMES docker REQUIRED) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 5ab01d72e5..e2d457ad02 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -9,12 +9,22 @@ #include #include #include -#include +#include +#include #include #include +#include +#include +#include #include #include "nbl/asset/metadata/CHLSLMetadata.h" +#include "nbl/asset/utils/shaderCompiler_serialization.h" +#include "nbl/core/hash/blake.h" +#include "nbl/core/hash/fnv1a64.h" #include "nlohmann/json.hpp" +#ifdef _WIN32 +#include +#endif using json = nlohmann::json; using namespace nbl; @@ -82,8 +92,8 @@ class ShaderLogger final : public IThreadsafeLogger return; const auto parent = std::filesystem::path(m_logPath).parent_path(); - if (!parent.empty() && !std::filesystem::exists(parent)) - std::filesystem::create_directories(parent); + if (!parent.empty() && m_system && !m_system->exists(parent, IFileBase::ECF_READ)) + m_system->createDirectory(parent); for (auto attempt = 0u; attempt < kDeleteRetries; ++attempt) { @@ -105,6 +115,9 @@ class ShaderLogger final : public IThreadsafeLogger if (!m_file) return; + std::error_code ec; + std::filesystem::resize_file(m_logPath, 0, ec); + m_fileLogger = make_smart_refctd_ptr(smart_refctd_ptr(m_file), true, m_fileMask); } @@ -163,6 +176,14 @@ class ShaderCompiler final : public IApplicationFramework { const auto rawArgs = std::vector(argv.begin(), argv.end()); const auto expandedArgs = expandJoinedArgs(rawArgs); + m_logger = make_smart_refctd_ptr(bitflag(ILogger::ELL_ALL)); + if (!rawArgs.empty()) + { + std::error_code ec; + m_executablePath = std::filesystem::absolute(std::filesystem::path(rawArgs.front()), ec); + if (ec) + m_executablePath = std::filesystem::path(rawArgs.front()); + } argparse::ArgumentParser program("nsc"); program.add_argument("--dump-build-info").default_value(false).implicit_value(true); @@ -178,6 +199,17 @@ class ShaderCompiler final : public IApplicationFramework program.add_argument("-nolog").default_value(false).implicit_value(true); program.add_argument("-quiet").default_value(false).implicit_value(true); program.add_argument("-verbose").default_value(false).implicit_value(true); + program.add_argument("-shader-cache").default_value(false).implicit_value(true); + program.add_argument("-shader-cache-file").default_value(std::string{}); + program.add_argument("-shader-cache-compression").default_value(std::string{}); + program.add_argument("-preprocess-cache").default_value(false).implicit_value(true); + program.add_argument("-preprocess-cache-file").default_value(std::string{}); + program.add_argument("-nbl-shader-cache").default_value(false).implicit_value(true); + program.add_argument("-nbl-shader-cache-compression").default_value(std::string{}); + program.add_argument("-nbl-preprocess-cache").default_value(false).implicit_value(true); + program.add_argument("-nbl-preprocess-preamble").default_value(false).implicit_value(true); + program.add_argument("-nbl-stdout-log").default_value(false).implicit_value(true); + program.add_argument("-nbl-report").default_value(std::string{}); std::vector unknownArgs; try @@ -186,36 +218,46 @@ class ShaderCompiler final : public IApplicationFramework } catch (const std::runtime_error& err) { - std::cerr << err.what() << std::endl << program; + std::ostringstream usage; + usage << program; + if (m_logger) + m_logger->log("%s\n%s", ILogger::ELL_ERROR, err.what(), usage.str().c_str()); return false; } - if (program.get("--dump-build-info")) - { - dumpBuildInfo(program); - std::exit(0); - } - if (!isAPILoaded()) { - std::cerr << "Could not load Nabla API, terminating!"; + if (m_logger) + m_logger->log("Could not load Nabla API, terminating!", ILogger::ELL_ERROR); return false; } m_system = system ? std::move(system) : IApplicationFramework::createSystem(); if (!m_system) + { + if (m_logger) + m_logger->log("Failed to create system.", ILogger::ELL_ERROR); return false; + } + + if (program.get("--dump-build-info")) + { + dumpBuildInfo(program); + std::exit(0); + } if (rawArgs.size() < 2) { - std::cerr << "Insufficient arguments.\n"; + if (m_logger) + m_logger->log("Insufficient arguments.", ILogger::ELL_ERROR); return false; } const std::string fileToCompile = rawArgs.back(); if (!m_system->exists(fileToCompile, IFileBase::ECF_READ)) { - std::cerr << "Input shader file does not exist: " << fileToCompile << "\n"; + if (m_logger) + m_logger->log("Input shader file does not exist: %s", ILogger::ELL_ERROR, fileToCompile.c_str()); return false; } @@ -226,24 +268,61 @@ class ShaderCompiler final : public IApplicationFramework if (hasFc == hasFo) { if (hasFc) - std::cerr << "Invalid arguments. Passed both -Fo and -Fc.\n"; + { + if (m_logger) + m_logger->log("Invalid arguments. Passed both -Fo and -Fc.", ILogger::ELL_ERROR); + } else - std::cerr << "Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.\n"; + { + if (m_logger) + m_logger->log("Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.", ILogger::ELL_ERROR); + } return false; } const std::string outputFilepath = hasFc ? program.get("-Fc") : program.get("-Fo"); if (outputFilepath.empty()) { - std::cerr << "Invalid output file path.\n"; + if (m_logger) + m_logger->log("Invalid output file path.", ILogger::ELL_ERROR); return false; } const bool quiet = program.get("-quiet"); const bool verbose = program.get("-verbose"); + const bool stdoutLog = program.get("-nbl-stdout-log"); + const std::string reportPath = program.get("-nbl-report"); + bool shaderCacheEnabled = program.get("-shader-cache") || program.get("-nbl-shader-cache"); + const std::string shaderCachePathOverride = program.is_used("-shader-cache-file") ? program.get("-shader-cache-file") : std::string{}; + if (!shaderCachePathOverride.empty()) + shaderCacheEnabled = true; + bool preprocessCacheEnabled = program.get("-preprocess-cache") || program.get("-nbl-preprocess-cache"); + const std::string preprocessCachePathOverride = program.is_used("-preprocess-cache-file") ? program.get("-preprocess-cache-file") : std::string{}; + if (!preprocessCachePathOverride.empty()) + preprocessCacheEnabled = true; + bool preambleEnabled = program.get("-nbl-preprocess-preamble"); + const std::string compressionArgPrimary = program.get("-nbl-shader-cache-compression"); + std::string compressionArg = !compressionArgPrimary.empty() ? compressionArgPrimary : program.get("-shader-cache-compression"); + IShaderCompiler::CCache::ECompression shaderCacheCompression = IShaderCompiler::CCache::ECompression::LZMA; + if (!compressionArg.empty()) + { + std::transform(compressionArg.begin(), compressionArg.end(), compressionArg.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (compressionArg == "raw") + shaderCacheCompression = IShaderCompiler::CCache::ECompression::RAW; + else if (compressionArg == "lzma") + shaderCacheCompression = IShaderCompiler::CCache::ECompression::LZMA; + else + { + if (m_logger) + m_logger->log("Invalid shader cache compression: %s (expected raw or lzma).", ILogger::ELL_ERROR, compressionArg.c_str()); + return false; + } + } if (quiet && verbose) { - std::cerr << "Invalid arguments. Passed both -quiet and -verbose.\n"; + if (m_logger) + m_logger->log("Invalid arguments. Passed both -quiet and -verbose.", ILogger::ELL_ERROR); return false; } @@ -251,19 +330,56 @@ class ShaderCompiler final : public IApplicationFramework const std::string logPathOverride = program.is_used("-log") ? program.get("-log") : std::string{}; if (noLog && !logPathOverride.empty()) { - std::cerr << "Invalid arguments. Passed both -nolog and -log.\n"; + if (m_logger) + m_logger->log("Invalid arguments. Passed both -nolog and -log.", ILogger::ELL_ERROR); return false; } const auto logPath = logPathOverride.empty() ? std::filesystem::path(outputFilepath).concat(".log") : std::filesystem::path(logPathOverride); const auto fileMask = bitflag(ILogger::ELL_ALL); - const auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + if (stdoutLog) + consoleMask = fileMask; m_logger = make_smart_refctd_ptr(m_system, logPath, fileMask, consoleMask, noLog); + const auto configName = std::filesystem::path(outputFilepath).parent_path().filename().string(); + const auto configLabel = configName.empty() ? "Unknown" : configName; m_arguments = std::move(unknownArgs); if (!m_arguments.empty() && m_arguments.back() == fileToCompile) m_arguments.pop_back(); + if (!m_arguments.empty()) + { + std::vector filteredArgs; + for (size_t i = 0; i < m_arguments.size(); ++i) + { + const auto& arg = m_arguments[i]; + if (arg == "-FI" || arg == "-include" || arg == "/FI") + { + if (i + 1 >= m_arguments.size()) + { + if (m_logger) + m_logger->log("Missing argument for %s.", ILogger::ELL_ERROR, arg.c_str()); + return false; + } + m_force_includes.push_back(m_arguments[i + 1]); + ++i; + continue; + } + if ((arg.rfind("-FI", 0) == 0 || arg.rfind("/FI", 0) == 0) && arg.size() > 3) + { + m_force_includes.push_back(arg.substr(3)); + continue; + } + if (arg.rfind("-include", 0) == 0 && arg.size() > 8) + { + m_force_includes.push_back(arg.substr(8)); + continue; + } + filteredArgs.push_back(arg); + } + m_arguments = std::move(filteredArgs); + } bool noNblBuiltins = program.get("-no-nbl-builtins"); if (noNblBuiltins) @@ -278,9 +394,21 @@ class ShaderCompiler final : public IApplicationFramework if (program.is_used("-MF")) dep.path = program.get("-MF"); if (dep.enabled && dep.path.empty()) - dep.path = outputFilepath + ".d"; - if (dep.enabled) - m_logger->log("Dependency file will be saved to %s", ILogger::ELL_INFO, dep.path.c_str()); + dep.path = outputFilepath + ".dep"; + + ShaderCacheConfig shaderCache; + shaderCache.enabled = shaderCacheEnabled && !preprocessOnly; + shaderCache.verbose = verbose; + shaderCache.compression = shaderCacheCompression; + if (shaderCache.enabled) + shaderCache.path = shaderCachePathOverride.empty() ? makeCachePath(outputFilepath) : std::filesystem::path(shaderCachePathOverride); + + PreprocessCacheConfig preCache; + preCache.enabled = preprocessCacheEnabled && !preprocessOnly; + preCache.verbose = verbose; + preCache.preamble = preCache.enabled && preambleEnabled; + if (preCache.enabled) + preCache.path = preprocessCachePathOverride.empty() ? makePreprocessCachePath(outputFilepath) : std::filesystem::path(preprocessCachePathOverride); #ifndef NBL_EMBED_BUILTIN_RESOURCES if (!noNblBuiltins) @@ -303,10 +431,64 @@ class ShaderCompiler final : public IApplicationFramework m_include_search_paths.emplace_back(m_arguments[i + 1]); } + auto addIncludePath = [&](const std::filesystem::path& path) + { + if (path.empty()) + return; + std::error_code ec; + const auto normalized = std::filesystem::weakly_canonical(path, ec).generic_string(); + if (normalized.empty()) + return; + if (std::find(m_include_search_paths.begin(), m_include_search_paths.end(), normalized) == m_include_search_paths.end()) + m_include_search_paths.emplace_back(normalized); + }; + + if (!rawArgs.empty()) + { + std::error_code ec; + std::filesystem::path exePath = rawArgs.front(); + if (std::filesystem::exists(exePath, ec)) + { + exePath = std::filesystem::weakly_canonical(exePath, ec); + if (!ec) + { + const auto root = exePath.parent_path().parent_path().parent_path(); + addIncludePath(root / "include"); + } + } + } + + if (verbose) + { + auto join = [](const std::vector& items) + { + std::string out; + for (const auto& item : items) + { + if (!out.empty()) + out.push_back(' '); + out.append(item); + } + return out; + }; + m_logger->log("Verbose logging enabled.", ILogger::ELL_DEBUG); + m_logger->log("Variant: %s", ILogger::ELL_DEBUG, configLabel.c_str()); + if (!rawArgs.empty()) + m_logger->log("Compiler: %s", ILogger::ELL_DEBUG, rawArgs.front().c_str()); + m_logger->log("Command line: %s", ILogger::ELL_DEBUG, join(rawArgs).c_str()); + m_logger->log("Input: %s", ILogger::ELL_DEBUG, fileToCompile.c_str()); + m_logger->log("Output: %s", ILogger::ELL_DEBUG, outputFilepath.c_str()); + if (dep.enabled) + m_logger->log("Depfile: %s", ILogger::ELL_DEBUG, dep.path.c_str()); + if (shaderCache.enabled) + m_logger->log("Shader Cache: %s", ILogger::ELL_DEBUG, shaderCache.path.string().c_str()); + if (preCache.enabled) + m_logger->log("Preprocess cache: %s", ILogger::ELL_DEBUG, preCache.path.string().c_str()); + } + const char* const action = preprocessOnly ? "Preprocessing" : "Compiling"; const char* const outType = preprocessOnly ? "Preprocessed" : "Compiled"; - m_logger->log("%s %s", ILogger::ELL_INFO, action, fileToCompile.c_str()); - m_logger->log("%s shader code will be saved to %s", ILogger::ELL_INFO, outType, outputFilepath.c_str()); + m_logger->log("%s the input file.", ILogger::ELL_INFO, action); auto [shader, shaderStage] = open_shader_file(fileToCompile); if (!shader || shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) @@ -316,7 +498,8 @@ class ShaderCompiler final : public IApplicationFramework } const auto start = std::chrono::high_resolution_clock::now(); - const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, preprocessOnly); + const std::string preprocessedOutputPath = outputFilepath + ".pre.hlsl"; + auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, shaderCache, preCache, preprocessOnly, outputFilepath, preprocessedOutputPath, verbose, !reportPath.empty()); const auto end = std::chrono::high_resolution_clock::now(); const char* const op = preprocessOnly ? "preprocessing" : "compilation"; @@ -326,44 +509,87 @@ class ShaderCompiler final : public IApplicationFramework return false; } - const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); m_logger->log("Shader %s successful.", ILogger::ELL_INFO, op); - m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + if (dep.enabled) + { + const bool depWritten = m_system->exists(dep.path, IFileBase::ECF_READ); + if (!depWritten) + m_logger->log("Dependency file missing at %s", ILogger::ELL_WARNING, dep.path.c_str()); + m_logger->log(depWritten ? "Depfile written successfully." : "Depfile write failed.", depWritten ? ILogger::ELL_INFO : ILogger::ELL_WARNING); + } const auto outParent = std::filesystem::path(outputFilepath).parent_path(); - if (!outParent.empty() && !std::filesystem::exists(outParent)) + if (!outParent.empty() && m_system && !m_system->exists(outParent, IFileBase::ECF_READ)) { - if (!std::filesystem::create_directories(outParent)) + if (!m_system->createDirectory(outParent)) { m_logger->log("Failed to create parent directory for output %s.", ILogger::ELL_ERROR, outputFilepath.c_str()); return false; } } - std::fstream out(outputFilepath, std::ios::out | std::ios::binary); - if (!out.is_open()) + bool outputWritten = false; + long long outputWriteMs = 0; + uint64_t outputSize = 0; + if (!job.view.empty()) { - m_logger->log("Failed to open output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - return false; + const auto writeStart = std::chrono::high_resolution_clock::now(); + if (!writeBinaryFile(m_system.get(), std::filesystem::path(outputFilepath), job.view.data(), job.view.size())) + { + m_logger->log("Failed to write output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + return false; + } + outputWritten = true; + outputSize = static_cast(job.view.size()); + OutputHashRecord record = {}; + record.size = job.view.size(); + { + core::blake3_hasher hasher; + hasher.update(job.view.data(), job.view.size()); + record.hash = static_cast(hasher); + } + const auto hashPath = makeOutputHashPath(std::filesystem::path(outputFilepath)); + if (!writeBinaryFile(m_system.get(), hashPath, &record, sizeof(record))) + m_logger->log("Failed to write output hash file: %s", ILogger::ELL_WARNING, hashPath.string().c_str()); + const auto writeEnd = std::chrono::high_resolution_clock::now(); + outputWriteMs = std::chrono::duration_cast(writeEnd - writeStart).count(); + if (verbose) + { + m_logger->log("Write output took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(outputWriteMs)); + } } - - out.write(job.view.data(), job.view.size()); - if (out.fail()) + else if (verbose) { - m_logger->log("Failed to write to output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - out.close(); - return false; + m_logger->log("Output up to date. Skipping write.", ILogger::ELL_DEBUG); } - out.close(); - if (out.fail()) + const auto totalMs = std::chrono::duration_cast(end - start).count(); + const auto overallEnd = std::chrono::high_resolution_clock::now(); + const auto totalWithOutputMs = std::chrono::duration_cast(overallEnd - start).count(); + m_logger->log("Total took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(totalMs)); + + if (!reportPath.empty() && !job.report.is_null()) { - m_logger->log("Failed to close output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); - return false; + job.report["version"] = 1; + job.report["input"] = fileToCompile; + job.report["config"] = configLabel; + job.report["builtins"] = !noNblBuiltins; + job.report["preprocess_only"] = preprocessOnly; + job.report["output"]["path"] = outputFilepath; + job.report["output"]["written"] = outputWritten; + job.report["output"]["ms"] = outputWriteMs; + job.report["output"]["size"] = outputSize; + job.report["depfile"]["enabled"] = dep.enabled; + job.report["depfile"]["path"] = dep.path; + job.report["total_ms"] = totalMs; + job.report["total_with_output_ms"] = totalWithOutputMs; + + const auto reportDump = job.report.dump(2); + if (!writeBinaryFile(m_system.get(), std::filesystem::path(reportPath), reportDump.data(), reportDump.size())) + m_logger->log("Failed to write report: %s", ILogger::ELL_WARNING, reportPath.c_str()); } - if (dep.enabled) - m_logger->log("Dependency file written to %s", ILogger::ELL_INFO, dep.path.c_str()); + flushSystemQueue(m_system.get(), std::filesystem::path(outputFilepath)); return true; } @@ -378,132 +604,2493 @@ class ShaderCompiler final : public IApplicationFramework std::string path; }; + struct ShaderCacheConfig + { + bool enabled = false; + bool verbose = false; + std::filesystem::path path; + IShaderCompiler::CCache::ECompression compression = IShaderCompiler::CCache::ECompression::LZMA; + }; + + struct PreprocessCacheConfig + { + bool enabled = false; + bool verbose = false; + bool preamble = false; + std::filesystem::path path; + }; + + enum class CacheLoadStatus : uint8_t + { + Missing, + Invalid, + Loaded + }; + struct RunResult { bool ok = false; std::string text; smart_refctd_ptr compiled; std::string_view view; + json report; }; - static std::vector expandJoinedArgs(const std::vector& args) + struct OutputHashRecord { - std::vector out; - out.reserve(args.size()); + core::blake3_hash_t hash = {}; + uint64_t size = 0; + }; - auto split = [&](const std::string& a, const char* p) + struct ShaderCacheIndexLocation + { + uint64_t spirvOffset = 0; + uint64_t spirvSize = 0; + }; + + static std::filesystem::path makeCachePath(std::filesystem::path outputPath) + { + outputPath += ".ppcache"; + return outputPath; + } + + static std::filesystem::path makeOutputHashPath(std::filesystem::path outputPath) + { + outputPath += ".hash"; + return outputPath; + } + + static std::filesystem::path makePreprocessCachePath(std::filesystem::path outputPath) + { + outputPath += ".ppcache.pre"; + return outputPath; + } + + static std::filesystem::path makeShaderCacheIndexPath(std::filesystem::path cachePath) + { + cachePath += ".idx"; + return cachePath; + } + + static std::filesystem::path makePreprocessCacheIndexPath(std::filesystem::path cachePath) + { + cachePath += ".idx"; + return cachePath; + } + + static void writeShaderCacheIndex(system::ISystem* system, const std::filesystem::path& path, const uint8_t* data, size_t size) + { + if (!system) + return; + if (size < IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES) + return; + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, path, cacheSize, cacheTime)) + return; + + uint64_t shaderBufferSize = 0; + std::memcpy(&shaderBufferSize, data, IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES); + if (size < IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + shaderBufferSize) + return; + + bool hasBinaryDeps = false; + uint64_t jsonSize = 0; + size_t jsonOffset = 0; + size_t shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES; + + const size_t minNewHeader = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (size >= minNewHeader) { - const size_t n = std::strlen(p); - if (a.rfind(p, 0) == 0 && a.size() > n) + std::memcpy(&jsonSize, data + IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES, sizeof(jsonSize)); + const size_t candidateJsonOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t) + shaderBufferSize; + if (candidateJsonOffset + jsonSize <= size) { - out.emplace_back(p); - out.emplace_back(a.substr(n)); + hasBinaryDeps = true; + jsonOffset = candidateJsonOffset; + shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + sizeof(uint64_t); + } + } + + if (!hasBinaryDeps) + { + jsonOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES + shaderBufferSize; + jsonSize = size - jsonOffset; + shaderOffset = IShaderCompiler::CCache::SHADER_BUFFER_SIZE_BYTES; + } + + std::string_view containerJsonString(reinterpret_cast(data + jsonOffset), jsonSize); + json containerJson = json::parse(containerJsonString); + std::vector entries; + std::vector shaderCreationParams; + containerJson.at("entries").get_to(entries); + containerJson.at("shaderCreationParams").get_to(shaderCreationParams); + if (entries.size() != shaderCreationParams.size()) + return; + + if (hasBinaryDeps) + { + const size_t depsOffset = jsonOffset + jsonSize; + auto read_bytes = [data, size](size_t& offset, void* dst, size_t count) -> bool + { + if (offset + count > size) + return false; + std::memcpy(dst, data + offset, count); + offset += count; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, data, size](size_t& offset, std::string& out) -> bool + { + uint32_t count = 0; + if (!read_u32(offset, count)) + return false; + if (offset + count > size) + return false; + out.assign(reinterpret_cast(data + offset), count); + offset += count; return true; + }; + + size_t offset = depsOffset; + uint32_t entryCount = 0; + if (!read_u32(offset, entryCount)) + return; + if (entryCount != entries.size()) + return; + + for (uint32_t i = 0; i < entryCount; ++i) + { + uint32_t depCount = 0; + if (!read_u32(offset, depCount)) + return; + auto& deps = entries[i].dependencies; + deps.clear(); + deps.reserve(depCount); + for (uint32_t d = 0; d < depCount; ++d) + { + std::string dir; + std::string identifier; + std::string absolutePath; + uint8_t standardInclude = 0; + core::blake3_hash_t hash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, absolutePath) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, hash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return; + } + + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, hash, system::path(absolutePath), fileSize, lastWriteTime, hasFileInfo != 0); + } } - return false; + } + + std::vector out; + auto write_bytes = [&out](const void* data, size_t size) + { + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](uint32_t value) { write_bytes(&value, sizeof(value)); }; + auto write_u64 = [&write_bytes](uint64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_i64 = [&write_bytes](int64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_hash = [&write_bytes](const core::blake3_hash_t& hash) { write_bytes(hash.data, sizeof(hash.data)); }; + auto write_string = [&write_u32, &write_bytes](std::string_view value) + { + write_u32(static_cast(value.size())); + if (!value.empty()) + write_bytes(value.data(), value.size()); }; - for (const auto& a : args) + const uint32_t magic = 0x4E534349u; + const uint32_t version = 2u; + write_u32(magic); + write_u32(version); + write_string(std::string_view(IShaderCompiler::CCache::VERSION)); + write_u64(cacheSize); + write_i64(cacheTime); + write_u32(static_cast(entries.size())); + + for (size_t i = 0; i < entries.size(); ++i) { - if (split(a, "-MF")) continue; - if (split(a, "-Fo")) continue; - if (split(a, "-Fc")) continue; - out.push_back(a); + const auto& entry = entries[i]; + const auto& params = shaderCreationParams[i]; + const uint64_t spirvOffset = shaderOffset + params.offset; + const uint64_t spirvSize = params.codeByteSize; + + write_hash(entry.hash); + write_u64(spirvOffset); + write_u64(spirvSize); + write_u64(entry.uncompressedSize); + write_hash(entry.uncompressedContentHash); + const uint8_t compression = static_cast(entry.compression); + write_bytes(&compression, sizeof(compression)); + write_u32(static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + write_string(dep.getRequestingSourceDir().generic_string()); + write_string(dep.getIdentifier()); + write_string(dep.getAbsolutePath().generic_string()); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(&standardInclude, sizeof(standardInclude)); + write_hash(dep.getHash()); + const uint64_t fileSize = dep.getFileSize(); + write_u64(fileSize); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_i64(lastWriteTime); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(&hasFileInfo, sizeof(hasFileInfo)); + } } - return out; + const auto indexPath = makeShaderCacheIndexPath(path); + writeBinaryFile(system, indexPath, out.data(), out.size()); } - static void dumpBuildInfo(const argparse::ArgumentParser& program) + struct PreprocessCacheIndexEntry { - json j; - auto& modules = j["modules"]; + core::blake3_hash_t prefixHash = {}; + uint32_t pragmaStage = 0; + uint32_t prefixSize = 0; + uint64_t prefixOffset = 0; + std::vector macroDefs; + std::vector dxcFlags; + IShaderCompiler::CCache::SEntry::dependency_container_t dependencies; + }; - auto serialize = [&](const gtml::GitInfo& info, std::string_view target) + static uint64_t computePreprocessPrefixOffset(const IShaderCompiler::CPreprocessCache::SEntry& entry) + { + uint64_t offset = 0; + auto add_u32 = [&offset]() { offset += sizeof(uint32_t); }; + auto add_string = [&offset](std::string_view value) { - auto& s = modules[target.data()]; - s["isPopulated"] = info.isPopulated; - s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? json(info.hasUncommittedChanges.value()) : json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); - s["commitAuthorName"] = info.commitAuthorName; - s["commitAuthorEmail"] = info.commitAuthorEmail; - s["commitHash"] = info.commitHash; - s["commitShortHash"] = info.commitShortHash; - s["commitDate"] = info.commitDate; - s["commitSubject"] = info.commitSubject; - s["commitBody"] = info.commitBody; - s["describe"] = info.describe; - s["branchName"] = info.branchName; - s["latestTag"] = info.latestTag; - s["latestTagName"] = info.latestTagName; + offset += sizeof(uint32_t); + offset += static_cast(value.size()); }; - serialize(gtml::nabla_git_info, "nabla"); - serialize(gtml::dxc_git_info, "dxc"); + add_u32(); + add_string(IShaderCompiler::CPreprocessCache::VERSION); + offset += sizeof(entry.prefixHash); + add_u32(); + add_u32(); - const auto pretty = j.dump(4); - std::cout << pretty << std::endl; + add_u32(); + for (const auto& macro : entry.macroDefs) + add_string(macro); - std::filesystem::path oPath = "build-info.json"; - if (program.is_used("--file")) + add_u32(); + for (const auto& flag : entry.dxcFlags) + add_string(flag); + + add_u32(); + for (const auto& dep : entry.dependencies) { - const auto filePath = program.get("--file"); - if (!filePath.empty()) - oPath = filePath; + const auto dir = dep.getRequestingSourceDir().generic_string(); + const auto abs = dep.getAbsolutePath().generic_string(); + add_string(dir); + add_string(dep.getIdentifier()); + add_string(abs); + offset += sizeof(uint8_t); + offset += sizeof(core::blake3_hash_t); + offset += sizeof(uint64_t); + offset += sizeof(int64_t); + offset += sizeof(uint8_t); } + return offset; + } - std::ofstream outFile(oPath); - if (!outFile.is_open()) + static void writePreprocessCacheIndex(system::ISystem* system, const std::filesystem::path& path, const IShaderCompiler::CPreprocessCache& cache) + { + if (!system || !cache.hasEntry()) + return; + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, path, cacheSize, cacheTime)) + return; + + const auto& entry = cache.getEntry(); + const uint32_t prefixSize = static_cast(entry.preprocessedPrefix.size()); + const uint64_t prefixOffset = computePreprocessPrefixOffset(entry); + if (prefixOffset + prefixSize > cacheSize) + return; + + std::vector out; + auto write_bytes = [&out](const void* data, size_t size) { - std::printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); - std::exit(-1); + const auto* ptr = reinterpret_cast(data); + out.insert(out.end(), ptr, ptr + size); + }; + auto write_u32 = [&write_bytes](uint32_t value) { write_bytes(&value, sizeof(value)); }; + auto write_u64 = [&write_bytes](uint64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_i64 = [&write_bytes](int64_t value) { write_bytes(&value, sizeof(value)); }; + auto write_hash = [&write_bytes](const core::blake3_hash_t& hash) { write_bytes(hash.data, sizeof(hash.data)); }; + auto write_string = [&write_u32, &write_bytes](std::string_view value) + { + write_u32(static_cast(value.size())); + if (!value.empty()) + write_bytes(value.data(), value.size()); + }; + + const uint32_t magic = 0x4E504349u; + const uint32_t version = 1u; + write_u32(magic); + write_u32(version); + write_string(IShaderCompiler::CPreprocessCache::VERSION); + write_u64(cacheSize); + write_i64(cacheTime); + write_hash(entry.prefixHash); + write_u32(entry.pragmaStage); + write_u32(prefixSize); + write_u64(prefixOffset); + + write_u32(static_cast(entry.macroDefs.size())); + for (const auto& macro : entry.macroDefs) + write_string(macro); + write_u32(static_cast(entry.dxcFlags.size())); + for (const auto& flag : entry.dxcFlags) + write_string(flag); + + write_u32(static_cast(entry.dependencies.size())); + for (const auto& dep : entry.dependencies) + { + const auto dir = dep.getRequestingSourceDir().generic_string(); + const auto abs = dep.getAbsolutePath().generic_string(); + write_string(dir); + write_string(dep.getIdentifier()); + write_string(abs); + const uint8_t standardInclude = dep.isStandardInclude() ? 1u : 0u; + write_bytes(&standardInclude, sizeof(standardInclude)); + write_hash(dep.getHash()); + const uint64_t fileSize = dep.getFileSize(); + write_u64(fileSize); + const int64_t lastWriteTime = dep.getLastWriteTime(); + write_i64(lastWriteTime); + const uint8_t hasFileInfo = dep.getHasFileInfo() ? 1u : 0u; + write_bytes(&hasFileInfo, sizeof(hasFileInfo)); } - outFile << pretty; - std::printf("Saved \"%s\"\n", oPath.string().c_str()); + writeBinaryFile(system, makePreprocessCacheIndexPath(path), out.data(), out.size()); } - RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const bool preprocessOnly) + static bool tryLoadPreprocessCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, const core::blake3_hash_t& prefixHash, PreprocessCacheIndexEntry& outEntry, std::string* reason) { - RunResult r; - auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + if (!system) + { + if (reason) + *reason = "no system"; + return false; + } - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - for (const auto& p : m_include_search_paths) - includeFinder->addSearchPath(p, includeLoader); + const auto indexPath = makePreprocessCacheIndexPath(cachePath); + if (!system->exists(indexPath, IFileBase::ECF_READ)) + { + if (reason) + *reason = "index missing"; + return false; + } - if (preprocessOnly) + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, cachePath, cacheSize, cacheTime)) + { + if (reason) + *reason = "cache info"; + return false; + } + + ISystem::future_t> future; + system->createFile(future, indexPath, IFileBase::ECF_READ); + if (!future.wait()) { - CHLSLCompiler::SPreprocessorOptions opt = {}; - opt.sourceIdentifier = sourceIdentifier; - opt.logger = m_logger.get(); - opt.includeFinder = includeFinder.get(); - opt.depfile = dep.enabled; - opt.depfilePath = dep.path; + if (reason) + *reason = "index open"; + return false; + } - const char* codePtr = (const char*)shader->getContent()->getPointer(); - std::string_view code(codePtr, std::strlen(codePtr)); + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() == 0) + { + if (reason) + *reason = "index empty"; + return false; + } - r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, opt, nullptr); - r.ok = !r.text.empty(); - r.view = r.text; - return r; + std::vector data(file->getSize()); + IFile::success_t succ; + file->read(succ, data.data(), 0, data.size()); + if (!succ || succ.getBytesProcessed(true) != data.size()) + { + if (reason) + *reason = "index read"; + return false; } - CHLSLCompiler::SOptions opt = {}; - opt.stage = shaderStage; - opt.preprocessorOptions.sourceIdentifier = sourceIdentifier; - opt.preprocessorOptions.logger = m_logger.get(); - opt.preprocessorOptions.includeFinder = includeFinder.get(); - opt.preprocessorOptions.depfile = dep.enabled; - opt.preprocessorOptions.depfilePath = dep.path; - opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); - opt.dxcOptions = std::span(m_arguments); + auto read_bytes = [&data](size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, &data](size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; - r.compiled = hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), opt); - r.ok = bool(r.compiled); - if (r.ok) - r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + size_t offset = 0; + uint32_t magic = 0; + uint32_t version = 0; + if (!read_u32(offset, magic) || !read_u32(offset, version)) + return false; + if (magic != 0x4E504349u || version != 1u) + { + if (reason) + *reason = "index header"; + return false; + } + + std::string cacheVersion; + if (!read_string(offset, cacheVersion)) + return false; + if (cacheVersion != IShaderCompiler::CPreprocessCache::VERSION) + { + if (reason) + *reason = "cache version"; + return false; + } + + uint64_t indexCacheSize = 0; + int64_t indexCacheTime = 0; + if (!read_u64(offset, indexCacheSize) || !read_i64(offset, indexCacheTime)) + return false; + if (indexCacheSize != cacheSize || indexCacheTime != cacheTime) + { + if (reason) + *reason = "cache mismatch size=" + std::to_string(indexCacheSize) + "/" + std::to_string(cacheSize) + + " time=" + std::to_string(indexCacheTime) + "/" + std::to_string(cacheTime); + return false; + } + + core::blake3_hash_t storedPrefixHash = {}; + if (!read_hash(offset, storedPrefixHash)) + return false; + if (storedPrefixHash != prefixHash) + { + if (reason) + *reason = "prefix mismatch"; + return false; + } + + uint32_t pragmaStage = 0; + uint32_t prefixSize = 0; + uint64_t prefixOffset = 0; + if (!read_u32(offset, pragmaStage) || !read_u32(offset, prefixSize) || !read_u64(offset, prefixOffset)) + return false; + if (prefixOffset + prefixSize > cacheSize) + { + if (reason) + *reason = "prefix range"; + return false; + } + + uint32_t macroCount = 0; + if (!read_u32(offset, macroCount)) + return false; + std::vector macroDefs; + macroDefs.reserve(macroCount); + for (uint32_t i = 0; i < macroCount; ++i) + { + std::string macro; + if (!read_string(offset, macro)) + return false; + macroDefs.emplace_back(std::move(macro)); + } + + uint32_t flagCount = 0; + if (!read_u32(offset, flagCount)) + return false; + std::vector dxcFlags; + dxcFlags.reserve(flagCount); + for (uint32_t i = 0; i < flagCount; ++i) + { + std::string flag; + if (!read_string(offset, flag)) + return false; + dxcFlags.emplace_back(std::move(flag)); + } + + uint32_t depCount = 0; + if (!read_u32(offset, depCount)) + return false; + IShaderCompiler::CCache::SEntry::dependency_container_t deps; + deps.reserve(depCount); + for (uint32_t i = 0; i < depCount; ++i) + { + std::string dir; + std::string identifier; + std::string abs; + uint8_t standardInclude = 0; + core::blake3_hash_t depHash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, abs) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, depHash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return false; + } + + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); + } + + outEntry.prefixHash = storedPrefixHash; + outEntry.pragmaStage = pragmaStage; + outEntry.prefixSize = prefixSize; + outEntry.prefixOffset = prefixOffset; + outEntry.macroDefs = std::move(macroDefs); + outEntry.dxcFlags = std::move(dxcFlags); + outEntry.dependencies = std::move(deps); + return true; + } + + static smart_refctd_ptr loadShaderCache(system::ISystem* system, const std::filesystem::path& path, CacheLoadStatus& status, bool skipDependencies, bool refreshIndex) + { + status = CacheLoadStatus::Missing; + if (!system) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + if (!system->exists(path, IFileBase::ECF_READ)) + return nullptr; + + auto openFile = [&](const core::bitflag flags) -> smart_refctd_ptr + { + ISystem::future_t> future; + system->createFile(future, path, flags); + if (!future.wait()) + return nullptr; + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + return file; + }; + + smart_refctd_ptr file = openFile(bitflag(IFileBase::ECF_READ) | IFileBase::ECF_MAPPABLE); + if (!file) + file = openFile(bitflag(IFileBase::ECF_READ)); + if (!file) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + const size_t size = file->getSize(); + if (!size) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + const auto* mapped = static_cast(file->getMappedPointer()); + std::vector data; + std::span serialized; + if (mapped) + { + serialized = std::span(mapped, size); + } + else + { + data.resize(size); + IFile::success_t succ; + file->read(succ, data.data(), 0, size); + if (!succ || succ.getBytesProcessed(true) != size) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + serialized = std::span(data.data(), data.size()); + } + + const auto indexPath = makeShaderCacheIndexPath(path); + if (refreshIndex || !system->exists(indexPath, IFileBase::ECF_READ)) + writeShaderCacheIndex(system, path, serialized.data(), serialized.size()); + + auto cache = IShaderCompiler::CCache::deserialize(serialized, skipDependencies); + if (!cache) + { + status = CacheLoadStatus::Invalid; + return nullptr; + } + + status = CacheLoadStatus::Loaded; + return cache; + } + + static bool getFileInfo(system::ISystem* system, const std::filesystem::path& path, uint64_t& sizeOut, int64_t& timeOut) + { + if (path.empty()) + return false; + (void)system; +#ifdef _WIN32 + WIN32_FILE_ATTRIBUTE_DATA data = {}; + if (GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &data)) + { + ULARGE_INTEGER size = {}; + size.HighPart = data.nFileSizeHigh; + size.LowPart = data.nFileSizeLow; + ULARGE_INTEGER time = {}; + time.HighPart = data.ftLastWriteTime.dwHighDateTime; + time.LowPart = data.ftLastWriteTime.dwLowDateTime; + sizeOut = size.QuadPart; + timeOut = static_cast(time.QuadPart); + return sizeOut != 0; + } +#endif + std::error_code ec; + std::filesystem::directory_entry entry(path, ec); + if (ec) + return false; + const auto size = entry.file_size(ec); + if (ec) + return false; + const auto time = entry.last_write_time(ec); + if (ec) + return false; + sizeOut = size; + timeOut = time.time_since_epoch().count(); + return sizeOut != 0; + } + + static bool readBinaryFile(system::ISystem* system, const std::filesystem::path& path, void* data, size_t size) + { + if (!system) + return false; + if (!system->exists(path, IFileBase::ECF_READ)) + return false; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() != size) + return false; + + IFile::success_t succ; + file->read(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; + } + + static bool readBinaryFileRange(system::ISystem* system, const std::filesystem::path& path, size_t offset, size_t size, std::vector& out) + { + if (!system) + return false; + if (!system->exists(path, IFileBase::ECF_READ)) + return false; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() < offset + size) + return false; + + out.resize(size); + IFile::success_t succ; + file->read(succ, out.data(), offset, size); + return succ.getBytesProcessed(true) == size; + } + + static bool writeBinaryFile(system::ISystem* system, const std::filesystem::path& path, const void* data, size_t size) + { + if (!system) + return false; + + const auto parent = path.parent_path(); + if (!parent.empty() && !system->exists(parent, IFileBase::ECF_READ)) + system->createDirectory(parent); + + if (!system->exists(path, IFileBase::ECF_READ)) + { + ISystem::future_t> future; + system->createFile(future, path, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + + IFile::success_t succ; + file->write(succ, data, 0, size); + return succ.getBytesProcessed(true) == size; + } + + std::filesystem::path tempPath = path; + tempPath += ".tmp"; + tempPath += std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + system->deleteFile(tempPath); + + ISystem::future_t> future; + system->createFile(future, tempPath, bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE); + if (!future.wait()) + return false; + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file) + return false; + + IFile::success_t succ; + file->write(succ, data, 0, size); + if (succ.getBytesProcessed(true) != size) + { + system->deleteFile(tempPath); + return false; + } + + file = nullptr; + const std::error_code moveError = system->moveFileOrDirectory(tempPath, path); + if (!moveError) + return true; + + if (!system->exists(path, IFileBase::ECF_READ)) + { + system->deleteFile(tempPath); + return false; + } + + std::filesystem::path backupPath = path; + backupPath += ".bak"; + backupPath += std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + system->deleteFile(backupPath); + if (system->moveFileOrDirectory(path, backupPath)) + { + system->deleteFile(tempPath); + return false; + } + if (system->moveFileOrDirectory(tempPath, path)) + { + system->moveFileOrDirectory(backupPath, path); + system->deleteFile(tempPath); + system->deleteFile(backupPath); + return false; + } + system->deleteFile(backupPath); + return true; + } + + static void flushSystemQueue(system::ISystem* system, const std::filesystem::path& path) + { + if (!system) + return; + + ISystem::future_t> future; + system->createFile(future, path, IFileBase::ECF_READ); + if (!future.wait()) + return; + if (auto lock = future.acquire(); lock) + lock.discard(); + } + + static bool writeShaderCache(system::ISystem* system, const std::filesystem::path& path, const IShaderCompiler::CCache& cache) + { + auto buffer = cache.serialize(); + if (!buffer) + return false; + if (!writeBinaryFile(system, path, buffer->getPointer(), buffer->getSize())) + return false; + + const auto* data = static_cast(buffer->getPointer()); + writeShaderCacheIndex(system, path, data, buffer->getSize()); + return true; + } + + static bool computeShaderCacheHash(std::string_view code, const IShaderCompiler::SCompilerOptions& options, core::blake3_hash_t& out) + { + if (options.spirvOptimizer) + return false; + + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + size_t reserveSize = 0; + reserveSize += options.preprocessorOptions.sourceIdentifier.size(); + for (const auto& def : options.preprocessorOptions.extraDefines) + reserveSize += def.identifier.size() + def.definition.size(); + for (const auto& inc : options.preprocessorOptions.forceIncludes) + reserveSize += inc.size(); + reserveSize += sizeof(options.stage) + sizeof(options.preprocessorOptions.targetSpirvVersion) + sizeof(options.debugInfoFlags.value) + 3u; + reserveSize += cacheCode.size(); + + std::vector defines; + defines.reserve(options.preprocessorOptions.extraDefines.size()); + for (const auto& def : options.preprocessorOptions.extraDefines) + defines.emplace_back(def); + std::sort(defines.begin(), defines.end(), [](const IShaderCompiler::SMacroDefinition& lhs, const IShaderCompiler::SMacroDefinition& rhs) + { + return lhs.identifier < rhs.identifier; + }); + + std::vector hashable; + hashable.reserve(reserveSize); + + hashable.insert(hashable.end(), options.preprocessorOptions.sourceIdentifier.begin(), options.preprocessorOptions.sourceIdentifier.end()); + for (const auto& def : defines) + { + hashable.insert(hashable.end(), def.identifier.begin(), def.identifier.end()); + hashable.insert(hashable.end(), def.definition.begin(), def.definition.end()); + } + for (const auto& inc : options.preprocessorOptions.forceIncludes) + hashable.insert(hashable.end(), inc.begin(), inc.end()); + hashable.push_back(static_cast(options.preprocessorOptions.preserveComments)); + hashable.push_back(static_cast(options.preprocessorOptions.emitLineDirectives)); + hashable.push_back(static_cast(options.preprocessorOptions.emitPragmaDirectives)); + + const auto stage = options.stage; + const auto spirvVersion = options.preprocessorOptions.targetSpirvVersion; + const auto debugFlags = options.debugInfoFlags.value; + hashable.insert(hashable.end(), reinterpret_cast(&stage), reinterpret_cast(&stage) + sizeof(stage)); + hashable.insert(hashable.end(), reinterpret_cast(&spirvVersion), reinterpret_cast(&spirvVersion) + sizeof(spirvVersion)); + hashable.insert(hashable.end(), reinterpret_cast(&debugFlags), reinterpret_cast(&debugFlags) + sizeof(debugFlags)); + + hashable.insert(hashable.end(), cacheCode.begin(), cacheCode.end()); + + core::blake3_hasher hasher; + hasher.update(hashable.data(), hashable.size()); + out = static_cast(hasher); + return true; + } + + static bool fillCompilerArgsFromOptions(const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry::SCompilerArgs& out) + { + if (options.spirvOptimizer) + return false; + + json pre; + pre["sourceIdentifier"] = std::string(options.preprocessorOptions.sourceIdentifier); + json extraDefines = json::array(); + std::vector defines; + defines.reserve(options.preprocessorOptions.extraDefines.size()); + for (const auto& def : options.preprocessorOptions.extraDefines) + defines.emplace_back(def); + std::sort(defines.begin(), defines.end(), [](const IShaderCompiler::SMacroDefinition& lhs, const IShaderCompiler::SMacroDefinition& rhs) + { + return lhs.identifier < rhs.identifier; + }); + for (const auto& def : defines) + { + extraDefines.push_back({ + { "identifier", std::string(def.identifier) }, + { "definition", std::string(def.definition) } + }); + } + pre["extraDefines"] = std::move(extraDefines); + json forceIncludes = json::array(); + for (const auto& inc : options.preprocessorOptions.forceIncludes) + forceIncludes.push_back(inc); + pre["forceIncludes"] = std::move(forceIncludes); + pre["preserveComments"] = options.preprocessorOptions.preserveComments; + pre["emitLineDirectives"] = options.preprocessorOptions.emitLineDirectives; + pre["emitPragmaDirectives"] = options.preprocessorOptions.emitPragmaDirectives; + + json j; + j["shaderStage"] = static_cast(options.stage); + j["spirvVersion"] = static_cast(options.preprocessorOptions.targetSpirvVersion); + j["optimizerPasses"] = json::array(); + j["debugFlags"] = static_cast(options.debugInfoFlags.value); + j["preprocessorArgs"] = std::move(pre); + from_json(j, out); + return true; + } + + static bool tryLoadShaderCacheIndex(system::ISystem* system, const std::filesystem::path& cachePath, std::string_view code, const IShaderCompiler::SCompilerOptions& options, IShaderCompiler::CCache::SEntry& outEntry, ShaderCacheIndexLocation* outLocation, std::string* reason) + { + if (!system) + { + if (reason) + *reason = "no system"; + return false; + } + + const auto indexPath = makeShaderCacheIndexPath(cachePath); + if (!system->exists(indexPath, IFileBase::ECF_READ)) + { + if (reason) + *reason = "index missing"; + return false; + } + + uint64_t cacheSize = 0; + int64_t cacheTime = 0; + if (!getFileInfo(system, cachePath, cacheSize, cacheTime)) + { + if (reason) + *reason = "cache info"; + return false; + } + + ISystem::future_t> future; + system->createFile(future, indexPath, IFileBase::ECF_READ); + if (!future.wait()) + { + if (reason) + *reason = "index open"; + return false; + } + + smart_refctd_ptr file; + if (auto lock = future.acquire(); lock) + lock.move_into(file); + if (!file || file->getSize() == 0) + { + if (reason) + *reason = "index empty"; + return false; + } + + std::vector data(file->getSize()); + IFile::success_t succ; + file->read(succ, data.data(), 0, data.size()); + if (!succ || succ.getBytesProcessed(true) != data.size()) + { + if (reason) + *reason = "index read"; + return false; + } + + auto read_bytes = [&data](size_t& offset, void* dst, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + std::memcpy(dst, data.data() + offset, size); + offset += size; + return true; + }; + auto read_u32 = [&read_bytes](size_t& offset, uint32_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_u64 = [&read_bytes](size_t& offset, uint64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_i64 = [&read_bytes](size_t& offset, int64_t& out) -> bool { return read_bytes(offset, &out, sizeof(out)); }; + auto read_hash = [&read_bytes](size_t& offset, core::blake3_hash_t& out) -> bool { return read_bytes(offset, out.data, sizeof(out.data)); }; + auto read_string = [&read_u32, &read_bytes, &data](size_t& offset, std::string& out) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + out.assign(reinterpret_cast(data.data() + offset), size); + offset += size; + return true; + }; + + auto skip_string = [&read_u32, &data](size_t& offset) -> bool + { + uint32_t size = 0; + if (!read_u32(offset, size)) + return false; + if (offset + size > data.size()) + return false; + offset += size; + return true; + }; + auto skip_bytes = [&data](size_t& offset, size_t size) -> bool + { + if (offset + size > data.size()) + return false; + offset += size; + return true; + }; + + size_t offset = 0; + uint32_t magic = 0; + uint32_t version = 0; + if (!read_u32(offset, magic) || !read_u32(offset, version)) + return false; + if (magic != 0x4E534349u || version != 2u) + { + if (reason) + *reason = "index header"; + return false; + } + + std::string cacheVersion; + if (!read_string(offset, cacheVersion)) + return false; + if (cacheVersion != IShaderCompiler::CCache::VERSION) + { + if (reason) + *reason = "cache version"; + return false; + } + + uint64_t indexCacheSize = 0; + int64_t indexCacheTime = 0; + if (!read_u64(offset, indexCacheSize) || !read_i64(offset, indexCacheTime)) + return false; + if (indexCacheSize != cacheSize || indexCacheTime != cacheTime) + { + if (reason) + *reason = "cache mismatch size=" + std::to_string(indexCacheSize) + "/" + std::to_string(cacheSize) + + " time=" + std::to_string(indexCacheTime) + "/" + std::to_string(cacheTime); + return false; + } + + uint32_t entryCount = 0; + if (!read_u32(offset, entryCount)) + return false; + + core::blake3_hash_t targetHash = {}; + if (!computeShaderCacheHash(code, options, targetHash)) + { + if (reason) + *reason = "hash compute"; + return false; + } + + for (uint32_t i = 0; i < entryCount; ++i) + { + core::blake3_hash_t hash = {}; + if (!read_hash(offset, hash)) + return false; + + uint64_t spirvOffset = 0; + uint64_t spirvSize = 0; + uint64_t uncompressedSize = 0; + core::blake3_hash_t uncompressedHash = {}; + uint8_t compression = 0; + uint32_t depCount = 0; + if (!read_u64(offset, spirvOffset) || !read_u64(offset, spirvSize) || !read_u64(offset, uncompressedSize) || !read_hash(offset, uncompressedHash) || + !read_bytes(offset, &compression, sizeof(compression)) || !read_u32(offset, depCount)) + return false; + + const bool match = (hash == targetHash); + if (match && (spirvSize == 0 || spirvOffset + spirvSize > cacheSize)) + { + if (reason) + *reason = "cache range"; + return false; + } + std::vector deps; + if (match) + deps.reserve(depCount); + + for (uint32_t d = 0; d < depCount; ++d) + { + if (!match) + { + if (!skip_string(offset) || !skip_string(offset) || !skip_string(offset)) + return false; + if (!skip_bytes(offset, sizeof(uint8_t) + sizeof(core::blake3_hash_t) + sizeof(uint64_t) + sizeof(int64_t) + sizeof(uint8_t))) + return false; + continue; + } + + std::string dir; + std::string identifier; + std::string abs; + uint8_t standardInclude = 0; + core::blake3_hash_t depHash = {}; + uint64_t fileSize = 0; + int64_t lastWriteTime = 0; + uint8_t hasFileInfo = 0; + if (!read_string(offset, dir) || !read_string(offset, identifier) || !read_string(offset, abs) || + !read_bytes(offset, &standardInclude, sizeof(standardInclude)) || !read_hash(offset, depHash) || + !read_u64(offset, fileSize) || !read_i64(offset, lastWriteTime) || !read_bytes(offset, &hasFileInfo, sizeof(hasFileInfo))) + { + return false; + } + + deps.emplace_back(system::path(dir), identifier, standardInclude != 0, depHash, system::path(abs), fileSize, lastWriteTime, hasFileInfo != 0); + } + + if (!match) + continue; + + if (outLocation) + { + outLocation->spirvOffset = spirvOffset; + outLocation->spirvSize = spirvSize; + } + + const std::string_view cacheCode = options.preprocessorOptions.codeForCache.empty() ? code : options.preprocessorOptions.codeForCache; + outEntry = {}; + outEntry.mainFileContents.assign(cacheCode.begin(), cacheCode.end()); + if (!fillCompilerArgsFromOptions(options, outEntry.compilerArgs)) + return false; + outEntry.hash = targetHash; + outEntry.lookupHash = std::hash{}(outEntry.hash); + outEntry.dependencies = std::move(deps); + outEntry.uncompressedSize = uncompressedSize; + outEntry.uncompressedContentHash = uncompressedHash; + outEntry.compression = static_cast(compression); + return true; + } + + if (reason) + *reason = "entry not found"; + return false; + } + + + static std::vector expandJoinedArgs(const std::vector& args) + { + std::vector out; + out.reserve(args.size()); + + auto split = [&](const std::string& a, const char* p) + { + const size_t n = std::strlen(p); + if (a.rfind(p, 0) == 0 && a.size() > n) + { + out.emplace_back(p); + out.emplace_back(a.substr(n)); + return true; + } + return false; + }; + + for (const auto& a : args) + { + if (split(a, "-I")) continue; + if (split(a, "-MF")) continue; + if (split(a, "-Fo")) continue; + if (split(a, "-Fc")) continue; + out.push_back(a); + } + + return out; + } + + void dumpBuildInfo(const argparse::ArgumentParser& program) + { + json j; + auto& modules = j["modules"]; + + auto serialize = [&](const gtml::GitInfo& info, std::string_view target) + { + auto& s = modules[target.data()]; + s["isPopulated"] = info.isPopulated; + s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? json(info.hasUncommittedChanges.value()) : json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); + s["commitAuthorName"] = info.commitAuthorName; + s["commitAuthorEmail"] = info.commitAuthorEmail; + s["commitHash"] = info.commitHash; + s["commitShortHash"] = info.commitShortHash; + s["commitDate"] = info.commitDate; + s["commitSubject"] = info.commitSubject; + s["commitBody"] = info.commitBody; + s["describe"] = info.describe; + s["branchName"] = info.branchName; + s["latestTag"] = info.latestTag; + s["latestTagName"] = info.latestTagName; + }; + + serialize(gtml::nabla_git_info, "nabla"); + serialize(gtml::dxc_git_info, "dxc"); + + const auto pretty = j.dump(4); + std::cout << pretty << std::endl; + + std::filesystem::path oPath = "build-info.json"; + if (program.is_used("--file")) + { + const auto filePath = program.get("--file"); + if (!filePath.empty()) + oPath = filePath; + } + + if (!m_system) + { + if (m_logger) + m_logger->log("Failed to create system for writing \"%s\"", ILogger::ELL_ERROR, oPath.string().c_str()); + std::exit(-1); + } + + if (!writeBinaryFile(m_system.get(), oPath, pretty.data(), pretty.size())) + { + if (m_logger) + m_logger->log("Failed to write \"%s\"", ILogger::ELL_ERROR, oPath.string().c_str()); + std::exit(-1); + } + + if (m_logger) + m_logger->log("Saved \"%s\"", ILogger::ELL_INFO, oPath.string().c_str()); + } + + RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const ShaderCacheConfig& shaderCache, const PreprocessCacheConfig& preCache, const bool preprocessOnly, std::string_view outputFilepath, std::string_view preprocessedOutputPath, const bool verbose, const bool reportEnabled) + { + RunResult r; + if (reportEnabled) + { + r.report = json::object(); + r.report["shader_cache"] = json::object(); + r.report["preprocess_cache"] = json::object(); + r.report["preamble"] = json::object(); + r.report["compile"] = json::object(); + r.report["preprocess"] = json::object(); + r.report["output"] = json::object(); + r.report["depfile"] = json::object(); + } + auto makeIncludeFinder = [&]() + { + auto finder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto loader = finder->getDefaultFileSystemLoader(); + for (const auto& p : m_include_search_paths) + finder->addSearchPath(p, loader); + return finder; + }; + + const char* codePtr = (const char*)shader->getContent()->getPointer(); + const size_t codeSize = shader->getContent()->getSize(); + std::string_view code(codePtr, codeSize); + if (!code.empty() && code.back() == '\0') + code.remove_suffix(1); + const bool useShaderCache = shaderCache.enabled && !preprocessOnly; + const bool usePreCache = preCache.enabled && !preprocessOnly; + const bool needCacheKey = useShaderCache || usePreCache; + CHLSLCompiler::SPreprocessorOptions preOpt = {}; + preOpt.sourceIdentifier = sourceIdentifier; + preOpt.logger = m_logger.get(); + preOpt.forceIncludes = std::span(m_force_includes); + preOpt.depfile = false; + preOpt.depfilePath = dep.path; + preOpt.preserveComments = preprocessOnly; + preOpt.emitLineDirectives = preprocessOnly; + preOpt.fastSafeValidation = useShaderCache || usePreCache; + std::string codeForCacheStorage; + if (needCacheKey && !sourceIdentifier.empty()) + { + uint64_t srcSize = 0; + int64_t srcTime = 0; + const std::filesystem::path srcPath{std::string(sourceIdentifier)}; + if (getFileInfo(m_system.get(), srcPath, srcSize, srcTime)) + { + codeForCacheStorage.reserve(code.size() + 64); + codeForCacheStorage.append(code.data(), code.size()); + codeForCacheStorage.append("\n// nsc-file-info "); + codeForCacheStorage.append(std::to_string(srcSize)); + codeForCacheStorage.push_back(':'); + codeForCacheStorage.append(std::to_string(srcTime)); + preOpt.codeForCache = codeForCacheStorage; + if (verbose) + { + m_logger->log("Cache key file info: size=%llu, mtime=%lld.", ILogger::ELL_PERFORMANCE, + static_cast(srcSize), + static_cast(srcTime)); + } + } + } + if (needCacheKey && preOpt.codeForCache.empty()) + preOpt.codeForCache = code; + + CHLSLCompiler::SOptions opt = {}; + opt.stage = static_cast(shaderStage); + opt.preprocessorOptions = preOpt; + opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); + opt.dxcOptions = std::span(m_arguments); + + auto writeTextFile = [&](std::string_view path, std::string_view contents) -> bool + { + if (path.empty()) + return false; + return writeBinaryFile(m_system.get(), std::filesystem::path(std::string(path)), contents.data(), contents.size()); + }; + + const bool validateCacheDeps = true; + if (reportEnabled) + { + r.report["shader_cache"]["enabled"] = useShaderCache; + r.report["preprocess_cache"]["enabled"] = usePreCache; + r.report["preamble"]["enabled"] = usePreCache && preCache.preamble; + r.report["compile"]["called"] = false; + r.report["compile"]["ms"] = 0; + r.report["preprocess"]["called"] = false; + r.report["preprocess"]["ms"] = 0; + r.report["depfile"]["enabled"] = dep.enabled; + r.report["depfile"]["written"] = false; + r.report["depfile"]["ms"] = 0; + } + + struct ShaderCacheProbeResult + { + CacheLoadStatus status = CacheLoadStatus::Missing; + bool hit = false; + bool entryReady = false; + bool depsUpdated = false; + bool usedIndex = false; + bool cachePartial = false; + bool hasIndexLocation = false; + smart_refctd_ptr cacheObj; + IShaderCompiler::CCache::SEntry entry; + ShaderCacheIndexLocation indexLocation = {}; + std::chrono::nanoseconds duration = {}; + std::chrono::nanoseconds loadDuration = {}; + std::chrono::nanoseconds validateDuration = {}; + }; + + struct PreprocessCacheProbeResult + { + bool skipped = false; + bool updateSkipped = false; + bool ok = false; + IShaderCompiler::SPreprocessCacheResult result = {}; + IShaderCompiler::CPreprocessCache::ELoadStatus loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; + smart_refctd_ptr cacheObj; + std::string body; + bool hasPrefix = false; + std::chrono::nanoseconds duration = {}; + }; + + ShaderCacheProbeResult shaderProbe; + PreprocessCacheProbeResult preProbe; + using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](const std::chrono::nanoseconds duration) -> long long + { + return std::chrono::duration_cast(duration).count(); + }; + auto cacheMissReason = [](CacheLoadStatus status) -> const char* + { + if (status == CacheLoadStatus::Missing) + return "cache file missing; first build, cleaned, output moved, or out of date"; + if (status == CacheLoadStatus::Invalid) + return "cache file invalid or version mismatch"; + return "input/deps/options changed; cache invalidated"; + }; + const auto probeStart = clock_t::now(); + + core::smart_refctd_ptr sharedFinder; + auto getFinder = [&]() -> IShaderCompiler::CIncludeFinder* + { + if (!sharedFinder) + sharedFinder = makeIncludeFinder(); + return sharedFinder.get(); + }; + core::smart_refctd_ptr sharedCompiler; + auto getCompiler = [&]() -> CHLSLCompiler* + { + if (!sharedCompiler) + sharedCompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + return sharedCompiler.get(); + }; + + if (useShaderCache) + { + const auto start = clock_t::now(); + const auto loadStart = clock_t::now(); + bool indexExists = false; + if (m_system) + { + const auto indexPath = makeShaderCacheIndexPath(shaderCache.path); + indexExists = m_system->exists(indexPath, IFileBase::ECF_READ); + } + IShaderCompiler::CCache::SEntry indexedEntry; + ShaderCacheIndexLocation indexLocation = {}; + std::string indexReason; + if (tryLoadShaderCacheIndex(m_system.get(), shaderCache.path, code, opt, indexedEntry, &indexLocation, &indexReason)) + { + shaderProbe.cacheObj = make_smart_refctd_ptr(); + shaderProbe.cacheObj->insert(std::move(indexedEntry)); + shaderProbe.status = CacheLoadStatus::Loaded; + shaderProbe.usedIndex = true; + shaderProbe.cachePartial = true; + shaderProbe.indexLocation = indexLocation; + shaderProbe.hasIndexLocation = indexLocation.spirvSize != 0; + if (verbose && m_logger) + m_logger->log("Shader cache index hit.", ILogger::ELL_DEBUG); + } + else + { + if (indexExists && verbose && m_logger) + m_logger->log("Shader cache index miss (%s).", ILogger::ELL_DEBUG, indexReason.empty() ? "unknown" : indexReason.c_str()); + const bool refreshIndex = indexExists && indexReason.rfind("cache mismatch", 0) == 0; + shaderProbe.cacheObj = loadShaderCache(m_system.get(), shaderCache.path, shaderProbe.status, false, refreshIndex); + shaderProbe.cachePartial = false; + } + const auto loadEnd = clock_t::now(); + shaderProbe.loadDuration = loadEnd - loadStart; + if (!shaderProbe.cacheObj) + shaderProbe.cacheObj = make_smart_refctd_ptr(); + shaderProbe.cacheObj->setDefaultCompression(shaderCache.compression); + if (shaderProbe.status == CacheLoadStatus::Loaded) + { + auto* finder = getFinder(); + const auto validateStart = clock_t::now(); + shaderProbe.hit = shaderProbe.cacheObj->findEntryForCode(code, opt, finder, shaderProbe.entry, validateCacheDeps, &shaderProbe.depsUpdated, opt.preprocessorOptions.fastSafeValidation); + const auto validateEnd = clock_t::now(); + shaderProbe.entryReady = shaderProbe.hit; + shaderProbe.validateDuration = validateEnd - validateStart; + if (!shaderProbe.hit && shaderProbe.usedIndex && verbose && m_logger) + m_logger->log("Shader cache index entry rejected, treating as miss.", ILogger::ELL_DEBUG); + } + shaderProbe.duration = clock_t::now() - start; + } + + if (usePreCache) + { + if (useShaderCache && shaderProbe.hit) + { + preProbe.skipped = true; + preProbe.ok = true; + preProbe.duration = {}; + } + else + { + const auto start = clock_t::now(); + auto* finder = getFinder(); + bool preIndexExists = false; + if (m_system) + preIndexExists = m_system->exists(makePreprocessCacheIndexPath(preCache.path), IFileBase::ECF_READ); + + const auto codeProbe = IShaderCompiler::CPreprocessCache::probe(code, nullptr, IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded, preOpt); + if (preCache.preamble) + { + preProbe.hasPrefix = codeProbe.hasPrefix; + preProbe.body.assign(codeProbe.body.data(), codeProbe.body.size()); + } + PreprocessCacheIndexEntry preIndexEntry; + std::string preIndexReason; + bool preIndexHit = false; + bool preIndexNeedsRefresh = !preIndexExists; + bool preIndexSkipLoad = !codeProbe.hasPrefix; + + if (codeProbe.hasPrefix && tryLoadPreprocessCacheIndex(m_system.get(), preCache.path, codeProbe.prefixHash, preIndexEntry, &preIndexReason)) + { + preIndexHit = true; + if (verbose && m_logger) + m_logger->log("Preprocess cache index hit.", ILogger::ELL_DEBUG); + } + else + { + if (preIndexExists && verbose && m_logger) + m_logger->log("Preprocess cache index miss (%s).", ILogger::ELL_DEBUG, preIndexReason.empty() ? "unknown" : preIndexReason.c_str()); + if (preIndexReason.rfind("cache mismatch", 0) == 0) + preIndexNeedsRefresh = true; + if (preIndexReason == "prefix mismatch" || preIndexReason == "cache version" || preIndexReason == "index header" || preIndexReason == "index empty") + preIndexSkipLoad = true; + } + + if (preIndexHit) + { + std::vector prefixBytes; + if (preIndexEntry.prefixSize) + { + if (!readBinaryFileRange(m_system.get(), preCache.path, static_cast(preIndexEntry.prefixOffset), static_cast(preIndexEntry.prefixSize), prefixBytes)) + preIndexHit = false; + } + if (preIndexHit) + { + auto cacheObj = make_smart_refctd_ptr(); + IShaderCompiler::CPreprocessCache::SEntry entry; + entry.prefixHash = preIndexEntry.prefixHash; + entry.pragmaStage = preIndexEntry.pragmaStage; + entry.macroDefs = std::move(preIndexEntry.macroDefs); + entry.dxcFlags = std::move(preIndexEntry.dxcFlags); + entry.dependencies = std::move(preIndexEntry.dependencies); + if (!prefixBytes.empty()) + entry.preprocessedPrefix.assign(reinterpret_cast(prefixBytes.data()), prefixBytes.size()); + cacheObj->setEntry(std::move(entry)); + + bool depsUpdated = false; + const bool depsValid = cacheObj->validateDependencies(finder, &depsUpdated, preOpt.fastSafeValidation); + if (depsValid) + { + IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); + if (preIndexEntry.pragmaStage != static_cast(IShader::E_SHADER_STAGE::ESS_UNKNOWN)) + stageOverrideThread = static_cast(preIndexEntry.pragmaStage); + + preProbe.cacheObj = cacheObj; + preProbe.loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded; + preProbe.result.cacheUsed = true; + preProbe.result.cacheHit = true; + preProbe.result.cacheUpdated = depsUpdated; + preProbe.result.status = IShaderCompiler::CPreprocessCache::EProbeStatus::Hit; + preProbe.result.stage = stageOverrideThread; + if (preCache.preamble) + { + preProbe.ok = true; + preProbe.duration = clock_t::now() - start; + } + else + { + preProbe.result.code = cacheObj->buildCombinedCode(codeProbe.body, sourceIdentifier); + preProbe.ok = !preProbe.result.code.empty(); + if (preProbe.ok) + preProbe.duration = clock_t::now() - start; + else + preIndexHit = false; + } + } + else + { + preIndexHit = false; + } + } + } + + if (!preIndexHit) + { + if (preIndexSkipLoad) + { + preProbe.cacheObj = make_smart_refctd_ptr(); + preProbe.loadStatus = IShaderCompiler::CPreprocessCache::ELoadStatus::Missing; + } + else + { + const bool loadPrefix = preCache.preamble; + preProbe.cacheObj = IShaderCompiler::CPreprocessCache::loadFromFile(preCache.path, preProbe.loadStatus, loadPrefix); + if (!preProbe.cacheObj) + preProbe.cacheObj = make_smart_refctd_ptr(); + } + + auto* localCompiler = getCompiler(); + CHLSLCompiler::SPreprocessorOptions preOptThread = preOpt; + preOptThread.includeFinder = finder; + IShader::E_SHADER_STAGE stageOverrideThread = static_cast(shaderStage); + preProbe.result = localCompiler->preprocessWithCache(code, stageOverrideThread, preOptThread, *preProbe.cacheObj, preProbe.loadStatus, sourceIdentifier); + preProbe.ok = preProbe.result.ok; + preProbe.duration = clock_t::now() - start; + + if (preIndexNeedsRefresh && preProbe.loadStatus == IShaderCompiler::CPreprocessCache::ELoadStatus::Loaded && preProbe.cacheObj && preProbe.cacheObj->hasEntry()) + { + preProbe.cacheObj->buildCombinedCode(std::string_view{}, std::string_view{}); + writePreprocessCacheIndex(m_system.get(), preCache.path, *preProbe.cacheObj); + } + } + } + } + + const auto probeEnd = clock_t::now(); + if (reportEnabled) + { + r.report["cache_probe_ms"] = toMs(probeEnd - probeStart); + auto& sc = r.report["shader_cache"]; + sc["hit"] = shaderProbe.hit; + sc["used_index"] = shaderProbe.usedIndex; + sc["probe_ms"] = toMs(shaderProbe.duration); + sc["load_ms"] = toMs(shaderProbe.loadDuration); + sc["validate_ms"] = toMs(shaderProbe.validateDuration); + sc["status"] = useShaderCache ? (shaderProbe.hit ? "hit" : "miss") : "disabled"; + if (useShaderCache && !shaderProbe.hit) + sc["miss_reason"] = cacheMissReason(shaderProbe.status); + + auto& pc = r.report["preprocess_cache"]; + pc["hit"] = preProbe.result.cacheHit; + pc["used"] = preProbe.result.cacheUsed; + pc["skipped"] = preProbe.skipped; + pc["updated"] = preProbe.result.cacheUpdated; + pc["probe_ms"] = toMs(preProbe.duration); + if (!usePreCache) + pc["status"] = "disabled"; + else if (preProbe.skipped) + pc["status"] = "skipped"; + else if (preProbe.result.cacheHit) + pc["status"] = "hit"; + else + pc["status"] = "miss"; + if (usePreCache && !preProbe.skipped && !preProbe.result.cacheHit) + pc["miss_reason"] = IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status); + } + + std::string preprocessedCode; + bool preprocessedReady = false; + bool preprocessedFromFullPreprocess = false; + bool preprocessedNeedsWrite = false; + bool preambleUsed = false; + std::vector preambleDependencies; + std::vector preambleDxcFlags; + std::chrono::nanoseconds preambleDuration = {}; + std::span preambleDxcFlagsView = {}; + std::string_view codeToCompile = code; + smart_refctd_ptr preCacheObj; + IShader::E_SHADER_STAGE stageOverride = static_cast(shaderStage); + auto ensureIndexSpirvLoaded = [&](IShaderCompiler::CCache::SEntry& entry) -> bool + { + if (entry.spirv) + return true; + if (!shaderProbe.hasIndexLocation || shaderProbe.indexLocation.spirvSize == 0) + return false; + if (!m_system) + return false; + std::vector compressed; + if (!readBinaryFileRange(m_system.get(), shaderCache.path, static_cast(shaderProbe.indexLocation.spirvOffset), + static_cast(shaderProbe.indexLocation.spirvSize), compressed)) + return false; + auto memoryResource = core::make_smart_refctd_ptr>(std::move(compressed)); + entry.spirv = ICPUBuffer::create({ { static_cast(shaderProbe.indexLocation.spirvSize) }, + memoryResource->getBacker().data(), std::move(memoryResource) }, core::adopt_memory); + return static_cast(entry.spirv); + }; + + auto ensureFullCacheForWrite = [&](smart_refctd_ptr& cacheObj) -> bool + { + if (!shaderProbe.cachePartial) + return true; + CacheLoadStatus fullStatus = CacheLoadStatus::Missing; + auto fullCache = loadShaderCache(m_system.get(), shaderCache.path, fullStatus, false, false); + if (!fullCache) + return false; + fullCache->setDefaultCompression(shaderCache.compression); + cacheObj = std::move(fullCache); + shaderProbe.cachePartial = false; + return true; + }; + + auto writeDepfileFromDependencies = [&](const IShaderCompiler::CCache::SEntry::dependency_container_t& dependencies, bool allowSkipIfExists) -> bool + { + if (!dep.enabled) + return true; + if (preOpt.depfilePath.empty()) + { + m_logger->log("Depfile path is empty.", ILogger::ELL_ERROR); + return false; + } + if (allowSkipIfExists && m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ)) + return true; + + auto escapeDepPath = [](const std::string& path) -> std::string + { + std::string normalized = path; + std::replace(normalized.begin(), normalized.end(), '\\', '/'); + std::string out; + out.reserve(normalized.size()); + for (const char c : normalized) + { + if (c == ' ' || c == '#') + out.push_back('\\'); + if (c == '$') + { + out.push_back('$'); + out.push_back('$'); + continue; + } + out.push_back(c); + } + return out; + }; + + std::vector depPaths; + depPaths.reserve(dependencies.size() + 1); + + auto addDepPath = [&](std::filesystem::path path) + { + if (path.empty()) + return; + if (path.is_relative()) + return; + auto normalized = path.generic_string(); + if (normalized.empty() || normalized.find_first_of("\r\n") != std::string::npos) + return; + depPaths.emplace_back(std::move(normalized)); + }; + + if (!preOpt.sourceIdentifier.empty()) + addDepPath(std::filesystem::path(std::string(preOpt.sourceIdentifier))); + if (!m_executablePath.empty()) + addDepPath(m_executablePath); + + for (const auto& depEntry : dependencies) + { + if (!depEntry.getHasFileInfo()) + continue; + const auto& absPath = depEntry.getAbsolutePath(); + if (absPath.empty()) + continue; + addDepPath(absPath); + } + + std::sort(depPaths.begin(), depPaths.end()); + depPaths.erase(std::unique(depPaths.begin(), depPaths.end()), depPaths.end()); + + std::filesystem::path targetPath = preOpt.depfilePath; + if (targetPath.extension() == ".d") + targetPath.replace_extension(); + const std::string target = escapeDepPath(targetPath.generic_string()); + + std::string depfileContents; + depfileContents.append(target); + depfileContents.append(":"); + if (!depPaths.empty()) + { + depfileContents.append(" \\\n"); + for (size_t index = 0; index < depPaths.size(); ++index) + { + depfileContents.append(" "); + depfileContents.append(escapeDepPath(depPaths[index])); + if (index + 1 < depPaths.size()) + depfileContents.append(" \\\n"); + } + } + depfileContents.append("\n"); + + return writeBinaryFile(m_system.get(), std::filesystem::path(preOpt.depfilePath), depfileContents.data(), depfileContents.size()); + }; + + auto isOutputUpToDate = [&](const IShaderCompiler::CCache::SEntry& entry) -> bool + { + if (outputFilepath.empty()) + return false; + uint64_t outSize = 0; + int64_t outTime = 0; + if (!getFileInfo(m_system.get(), std::filesystem::path(outputFilepath), outSize, outTime)) + return false; + if (entry.uncompressedSize == 0 || outSize != entry.uncompressedSize) + return false; + const auto hashPath = makeOutputHashPath(std::filesystem::path(outputFilepath)); + OutputHashRecord record = {}; + const bool hashOk = readBinaryFile(m_system.get(), hashPath, &record, sizeof(record)); + if (!hashOk || record.size != entry.uncompressedSize || record.hash != entry.uncompressedContentHash) + return false; + uint64_t hashSize = 0; + int64_t hashTime = 0; + if (!getFileInfo(m_system.get(), hashPath, hashSize, hashTime)) + return false; + return outTime <= hashTime; + }; + + if (verbose && (useShaderCache || usePreCache)) + { + if (useShaderCache) + { + if (shaderProbe.loadDuration.count()) + m_logger->log("Shader cache load took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.loadDuration))); + if (shaderProbe.validateDuration.count()) + m_logger->log("Shader cache validate took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.validateDuration))); + m_logger->log("Shader cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(shaderProbe.duration))); + } + if (usePreCache) + m_logger->log("Preprocess cache lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(preProbe.duration))); + m_logger->log("Total cache probe took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(toMs(std::chrono::duration_cast(probeEnd - probeStart)))); + } + + smart_refctd_ptr cacheObj = shaderProbe.cacheObj; + if (!cacheObj && dep.enabled && !preprocessOnly) + cacheObj = make_smart_refctd_ptr(); + if (cacheObj) + cacheObj->setDefaultCompression(shaderCache.compression); + CacheLoadStatus cacheStatus = shaderProbe.status; + const bool shaderCacheHitExpected = shaderProbe.hit; + + if (usePreCache && preCache.verbose && useShaderCache) + { + if (shaderCacheHitExpected) + m_logger->log("Cache hit! Preprocess cache skipped.", ILogger::ELL_DEBUG); + else + m_logger->log("Cache miss! Cold run (%s). Checking preprocess cache.", ILogger::ELL_DEBUG, cacheMissReason(cacheStatus)); + } + + if (usePreCache && !shaderCacheHitExpected) + { + if (!preProbe.ok) + return r; + if (preCache.verbose) + { + if (preProbe.result.cacheHit) + m_logger->log("Preprocess cache hit!", ILogger::ELL_DEBUG); + else + m_logger->log("Preprocess cache miss! Cold run (%s).", ILogger::ELL_DEBUG, IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status)); + } + if (preProbe.result.cacheUsed) + { + stageOverride = preProbe.result.stage; + preCacheObj = preProbe.cacheObj; + if (!preCache.preamble) + { + preprocessedCode = std::move(preProbe.result.code); + preprocessedReady = true; + preprocessedNeedsWrite = !preprocessedOutputPath.empty(); + preprocessedFromFullPreprocess = true; + } + } + } + else if (usePreCache && preCache.verbose) + { + if (preProbe.skipped) + { + m_logger->log("Preprocess cache lookup skipped (shader cache hit).", ILogger::ELL_DEBUG); + } + else if (preProbe.ok) + { + if (preProbe.result.cacheHit) + m_logger->log("Preprocess cache hit (ignored, shader cache hit).", ILogger::ELL_DEBUG); + else + m_logger->log("Preprocess cache miss! Cold run (%s). (ignored, shader cache hit).", ILogger::ELL_DEBUG, IShaderCompiler::CPreprocessCache::getProbeReason(preProbe.result.status)); + } + else + { + m_logger->log("Preprocess cache failed (ignored, shader cache hit).", ILogger::ELL_DEBUG); + } + } + if (usePreCache && preProbe.result.cacheUpdated && preProbe.cacheObj) + { + const auto preCacheWriteStart = clock_t::now(); + const bool preCacheWritten = IShaderCompiler::CPreprocessCache::writeToFile(preCache.path, *preProbe.cacheObj); + const auto preCacheWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocess cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preCacheWriteEnd - preCacheWriteStart))); + } + if (preCacheWritten) + { + const auto preCacheIndexStart = clock_t::now(); + writePreprocessCacheIndex(m_system.get(), preCache.path, *preProbe.cacheObj); + const auto preCacheIndexEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocess cache index write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preCacheIndexEnd - preCacheIndexStart))); + } + } + } + + if (useShaderCache && shaderProbe.hit && shaderProbe.entryReady) + { + if (verbose) + m_logger->log("Shader cache hit: using cached SPIR-V.", ILogger::ELL_DEBUG); + if (shaderProbe.depsUpdated) + { + bool canWrite = true; + if (shaderProbe.cachePartial) + { + if (ensureFullCacheForWrite(cacheObj)) + { + IShaderCompiler::CCache::SEntry fullEntry; + if (cacheObj->findEntryForCode(code, opt, nullptr, fullEntry, false, nullptr, false)) + { + fullEntry.dependencies.clear(); + fullEntry.dependencies.reserve(shaderProbe.entry.dependencies.size()); + for (auto& dep : shaderProbe.entry.dependencies) + fullEntry.dependencies.emplace_back(dep); + cacheObj->insert(std::move(fullEntry)); + } + else + { + canWrite = false; + if (verbose && m_logger) + m_logger->log("Shader cache write skipped (entry missing after reload).", ILogger::ELL_DEBUG); + } + } + else + { + canWrite = false; + if (verbose && m_logger) + m_logger->log("Shader cache write skipped (failed to load full cache).", ILogger::ELL_DEBUG); + } + } + if (canWrite) + { + const auto cacheWriteStart = clock_t::now(); + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + if (verbose) + { + const auto cacheWriteEnd = clock_t::now(); + m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + } + } + } + if (isOutputUpToDate(shaderProbe.entry)) + { + const auto hitDepfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) + return r; + const auto hitDepfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(hitDepfileEnd - hitDepfileStart); + } + r.ok = true; + if (verbose) + { + m_logger->log("HIT timings: decompress=0 ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(hitDepfileEnd - hitDepfileStart))); + } + return r; + } + const auto hitDecompressStart = clock_t::now(); + if (!ensureIndexSpirvLoaded(shaderProbe.entry)) + return r; + r.compiled = cacheObj->decompressEntry(shaderProbe.entry); + const auto hitDecompressEnd = clock_t::now(); + r.ok = bool(r.compiled); + if (!r.ok) + return r; + r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + + const auto hitDepfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(shaderProbe.entry.dependencies, true)) + return r; + const auto hitDepfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(hitDepfileEnd - hitDepfileStart); + } + if (verbose) + { + m_logger->log("HIT timings: decompress=%lld ms, depfile=%lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(hitDecompressEnd - hitDecompressStart)), + static_cast(toMs(hitDepfileEnd - hitDepfileStart))); + } + + return r; + } + + auto* hlslcompiler = getCompiler(); + clock_t::duration preambleBodyDuration = {}; + clock_t::duration preambleAssembleDuration = {}; + clock_t::duration preambleProbeDuration = {}; + clock_t::duration preambleFinderDuration = {}; + const bool usePreamble = preCache.preamble && preCacheObj && preCacheObj->hasEntry() && !preprocessedFromFullPreprocess; + if (usePreamble) + { + const auto preambleStart = clock_t::now(); + std::string body = std::move(preProbe.body); + if (!preProbe.hasPrefix) + { + const auto preambleProbeStart = clock_t::now(); + const auto bodyProbe = IShaderCompiler::CPreprocessCache::probe(code, preCacheObj.get(), preProbe.loadStatus, preOpt); + preambleProbeDuration = clock_t::now() - preambleProbeStart; + if (!bodyProbe.hasPrefix) + return r; + body.assign(bodyProbe.body.data(), bodyProbe.body.size()); + } + + const auto& entry = preCacheObj->getEntry(); + + auto bodyStage = stageOverride; + CHLSLCompiler::SPreprocessorOptions bodyOpt = preOpt; + bodyOpt.applyForceIncludes = false; + + std::vector bodyDeps; + std::vector bodyDxcFlags; + std::string bodyPreprocessed; + if (!body.empty()) + { + const auto macroName = [](const std::string& macro) -> std::string_view + { + std::string_view name(macro); + const auto eq = name.find('='); + if (eq != std::string_view::npos) + name = name.substr(0, eq); + const auto paren = name.find('('); + if (paren != std::string_view::npos) + name = name.substr(0, paren); + while (!name.empty() && (name.back() == ' ' || name.back() == '\t')) + name.remove_suffix(1); + return name; + }; + struct StringViewHash + { + size_t operator()(std::string_view value) const noexcept + { + return std::hash{}(value); + } + }; + struct BodyScanResult + { + bool hasDirective = false; + bool usesMacro = false; + bool hasInclude = false; + }; + const auto buildMacroNameSet = [&](const std::vector& macros) + { + std::unordered_set names; + names.reserve(macros.size() + 8); + for (const auto& macro : macros) + { + const std::string_view name = macroName(macro); + if (!name.empty()) + names.emplace(name); + } + static constexpr std::string_view kBuiltinMacros[] = + { + "__LINE__", + "__FILE__", + "__COUNTER__", + "__DATE__", + "__TIME__", + "__TIMESTAMP__" + }; + for (const auto builtin : kBuiltinMacros) + names.emplace(builtin); + return names; + }; + const auto scanBody = [&](std::string_view text, const std::unordered_set& macroNames) -> BodyScanResult + { + BodyScanResult result = {}; + bool atLineStart = true; + bool inLineComment = false; + bool inBlockComment = false; + bool inString = false; + char stringDelim = 0; + + auto isIdentStart = [](char c) { return (c == '_') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); }; + auto isIdentChar = [](char c) { return (c == '_') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); }; + + size_t i = 0; + while (i < text.size()) + { + const char c = text[i]; + const char next = (i + 1 < text.size()) ? text[i + 1] : '\0'; + + if (inLineComment) + { + if (c == '\n') + { + inLineComment = false; + atLineStart = true; + } + ++i; + continue; + } + if (inBlockComment) + { + if (c == '*' && next == '/') + { + inBlockComment = false; + i += 2; + continue; + } + if (c == '\n') + atLineStart = true; + else + atLineStart = false; + ++i; + continue; + } + if (inString) + { + if (c == '\\' && i + 1 < text.size()) + { + i += 2; + continue; + } + if (c == stringDelim) + inString = false; + if (c == '\n') + atLineStart = true; + else + atLineStart = false; + ++i; + continue; + } + + if (c == '/' && next == '/') + { + inLineComment = true; + i += 2; + continue; + } + if (c == '/' && next == '*') + { + inBlockComment = true; + i += 2; + continue; + } + if (c == '"' || c == '\'') + { + inString = true; + stringDelim = c; + atLineStart = false; + ++i; + continue; + } + + if (c == '\n') + { + atLineStart = true; + ++i; + continue; + } + + if (atLineStart) + { + if (c == ' ' || c == '\t' || c == '\r') + { + ++i; + continue; + } + if (c == '#') + { + result.hasDirective = true; + size_t j = i + 1; + while (j < text.size() && (text[j] == ' ' || text[j] == '\t')) + ++j; + if (j + 7 <= text.size() && text.compare(j, 7, "include") == 0) + result.hasInclude = true; + } + atLineStart = false; + } + + if (isIdentStart(c)) + { + size_t j = i + 1; + while (j < text.size() && isIdentChar(text[j])) + ++j; + const std::string_view ident(text.data() + i, j - i); + if (!macroNames.empty() && macroNames.find(ident) != macroNames.end()) + result.usesMacro = true; + i = j; + continue; + } + + atLineStart = false; + ++i; + } + return result; + }; + const auto macroNames = buildMacroNameSet(entry.macroDefs); + const auto scan = scanBody(body, macroNames); + const bool needsPreprocess = scan.hasDirective || scan.usesMacro; + if (needsPreprocess) + { + if (!entry.macroBlock.empty()) + { + std::string withDefines; + withDefines.reserve(body.size() + entry.macroBlock.size()); + withDefines.append(entry.macroBlock); + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + else if (!entry.macroDefs.empty()) + { + size_t reserve = body.size(); + for (const auto& macro : entry.macroDefs) + reserve += macro.size() + 12; + std::string withDefines; + withDefines.reserve(reserve); + for (const auto& macro : entry.macroDefs) + { + const auto eq = macro.find('='); + const std::string_view name = eq == std::string::npos ? std::string_view(macro) : std::string_view(macro).substr(0, eq); + const std::string_view def = eq == std::string::npos ? std::string_view() : std::string_view(macro).substr(eq + 1); + withDefines.append("#define "); + withDefines.append(name); + if (!def.empty()) + { + withDefines.push_back(' '); + withDefines.append(def); + } + withDefines.push_back('\n'); + } + withDefines.append(body); + body = std::move(withDefines); + bodyOpt.extraDefines = {}; + } + + auto* bodyDepsOut = scan.hasInclude ? &bodyDeps : nullptr; + const auto preambleFinderStart = clock_t::now(); + auto* finder = getFinder(); + preambleFinderDuration = clock_t::now() - preambleFinderStart; + bodyOpt.includeFinder = finder; + const auto bodyPreprocessStart = clock_t::now(); + bodyPreprocessed = hlslcompiler->preprocessShader(std::move(body), bodyStage, bodyOpt, bodyDxcFlags, bodyDepsOut, nullptr); + preambleBodyDuration = clock_t::now() - bodyPreprocessStart; + if (bodyPreprocessed.empty()) + return r; + } + else + { + bodyPreprocessed = body; + } + } + + stageOverride = bodyStage; + if (!bodyDxcFlags.empty()) + { + preambleDxcFlags = std::move(bodyDxcFlags); + preambleDxcFlagsView = std::span(preambleDxcFlags.data(), preambleDxcFlags.size()); + } + else + { + preambleDxcFlagsView = std::span(entry.dxcFlags.data(), entry.dxcFlags.size()); + } + + const auto preambleAssembleStart = clock_t::now(); + preprocessedCode.clear(); + size_t reserve = entry.preprocessedPrefix.size() + bodyPreprocessed.size() + 64; + for (const auto& flag : preambleDxcFlagsView) + reserve += flag.size() + 1; + preprocessedCode.reserve(reserve); + if (!preambleDxcFlagsView.empty()) + { + preprocessedCode.append("#pragma dxc_compile_flags "); + for (size_t i = 0; i < preambleDxcFlagsView.size(); ++i) + { + if (i) + preprocessedCode.push_back(' '); + preprocessedCode.append(preambleDxcFlagsView[i]); + } + preprocessedCode.push_back('\n'); + } + if (!entry.preprocessedPrefix.empty()) + { + preprocessedCode.append(entry.preprocessedPrefix); + if (preprocessedCode.back() != '\n') + preprocessedCode.push_back('\n'); + } + preprocessedCode.append(bodyPreprocessed); + + preambleDependencies.clear(); + preambleDependencies.reserve(entry.dependencies.size() + bodyDeps.size()); + for (const auto& dep : entry.dependencies) + preambleDependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); + for (const auto& dep : bodyDeps) + preambleDependencies.emplace_back(dep.getRequestingSourceDir(), dep.getIdentifier(), dep.isStandardInclude(), dep.getHash(), dep.getAbsolutePath(), dep.getFileSize(), dep.getLastWriteTime(), dep.getHasFileInfo()); + + preprocessedReady = true; + preprocessedNeedsWrite = !preprocessedOutputPath.empty(); + preambleUsed = true; + preambleAssembleDuration = clock_t::now() - preambleAssembleStart; + preambleDuration = clock_t::now() - preambleStart; + } + + if (preprocessedNeedsWrite) + { + const auto preprocessedWriteStart = clock_t::now(); + if (!writeTextFile(preprocessedOutputPath, preprocessedCode)) + return r; + const auto preprocessedWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Preprocessed output write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preprocessedWriteEnd - preprocessedWriteStart))); + } + } + + if (verbose && preambleUsed) + { + if (preambleProbeDuration.count()) + { + m_logger->log("Preamble body probe took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleProbeDuration))); + } + if (preambleFinderDuration.count()) + { + m_logger->log("Preamble finder setup took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleFinderDuration))); + } + if (preambleBodyDuration.count()) + { + m_logger->log("Preamble body lex took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleBodyDuration))); + } + if (preambleAssembleDuration.count()) + { + m_logger->log("Preamble assemble took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleAssembleDuration))); + } + m_logger->log("Preamble body preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(preambleDuration))); + } + if (reportEnabled) + { + auto& p = r.report["preamble"]; + p["used"] = preambleUsed; + p["probe_ms"] = toMs(preambleProbeDuration); + p["finder_ms"] = toMs(preambleFinderDuration); + p["body_ms"] = toMs(preambleBodyDuration); + p["assemble_ms"] = toMs(preambleAssembleDuration); + p["total_ms"] = toMs(preambleDuration); + } + + if (preprocessOnly) + { + const auto preprocessStart = std::chrono::high_resolution_clock::now(); + auto finder = makeIncludeFinder(); + preOpt.includeFinder = finder.get(); + r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, preOpt, nullptr); + r.ok = !r.text.empty(); + r.view = r.text; + const auto preprocessEnd = std::chrono::high_resolution_clock::now(); + if (verbose) + { + const auto duration = std::chrono::duration_cast(preprocessEnd - preprocessStart).count(); + m_logger->log("Preprocess took: %lld ms.", ILogger::ELL_PERFORMANCE, static_cast(duration)); + } + if (reportEnabled) + { + r.report["preprocess"]["called"] = true; + r.report["preprocess"]["ms"] = std::chrono::duration_cast(preprocessEnd - preprocessStart).count(); + } + return r; + } + + opt.stage = stageOverride; + + bool cacheHit = false; + bool canWriteCache = shaderCache.enabled && cacheObj; + if (canWriteCache && shaderProbe.cachePartial) + { + if (!ensureFullCacheForWrite(cacheObj)) + { + canWriteCache = false; + if (verbose && m_logger) + m_logger->log("Shader cache write disabled (failed to load full cache).", ILogger::ELL_DEBUG); + } + } + if (shaderCache.enabled && cacheObj) + { + opt.readCache = cacheObj.get(); + opt.writeCache = canWriteCache ? cacheObj.get() : nullptr; + opt.cacheHit = &cacheHit; + } + else if (dep.enabled && cacheObj) + { + opt.writeCache = cacheObj.get(); + } + + if (preprocessedReady) + { + opt.preprocessorOptions.applyForceIncludes = false; + if (preambleUsed) + { + opt.assumePreprocessed = true; + opt.dxcCompileFlagsOverride = preambleDxcFlagsView; + opt.dependencyOverrides = &preambleDependencies; + } + else if (preCacheObj && preCacheObj->hasEntry()) + { + opt.dependencyOverrides = &preCacheObj->getEntry().dependencies; + } + codeToCompile = preprocessedCode; + } + + auto* compileFinder = getFinder(); + opt.preprocessorOptions.includeFinder = compileFinder; + const auto compileStart = clock_t::now(); + r.compiled = hlslcompiler->compileToSPIRV(codeToCompile, opt); + const auto compileEnd = clock_t::now(); + r.ok = bool(r.compiled); + if (r.ok) + r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + if (reportEnabled) + { + r.report["compile"]["called"] = true; + r.report["compile"]["ms"] = toMs(compileEnd - compileStart); + } + if (verbose) + { + m_logger->log("Compile call took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(compileEnd - compileStart))); + } + + if (shaderCache.enabled && cacheObj) + { + const bool logShaderCache = verbose && !usePreCache; + if (logShaderCache) + { + if (cacheHit) + { + m_logger->log("Cache hit!", ILogger::ELL_DEBUG); + } + else + { + m_logger->log("Cache miss! Cold run (%s).", ILogger::ELL_DEBUG, cacheMissReason(cacheStatus)); + } + } + if (canWriteCache) + { + const auto cacheWriteStart = clock_t::now(); + if (!writeShaderCache(m_system.get(), shaderCache.path, *cacheObj)) + m_logger->log("Failed to write shader cache: %s", ILogger::ELL_WARNING, shaderCache.path.string().c_str()); + const auto cacheWriteEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Shader cache write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(cacheWriteEnd - cacheWriteStart))); + } + } + } + + if (dep.enabled && r.ok) + { + const IShaderCompiler::CCache::SEntry::dependency_container_t* deps = nullptr; + IShaderCompiler::CCache::SEntry depEntry; + if (preambleUsed) + { + deps = &preambleDependencies; + } + else if (preCacheObj && preCacheObj->hasEntry()) + { + deps = &preCacheObj->getEntry().dependencies; + } + else if (cacheObj) + { + const auto depLookupStart = clock_t::now(); + const bool depFound = cacheObj->findEntryForCode(code, opt, compileFinder, depEntry, validateCacheDeps, nullptr, opt.preprocessorOptions.fastSafeValidation); + const auto depLookupEnd = clock_t::now(); + if (verbose) + { + m_logger->log("Depfile dependency lookup took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(depLookupEnd - depLookupStart))); + } + if (depFound) + deps = &depEntry.dependencies; + } + + if (!deps) + { + m_logger->log("Depfile requested but dependencies unavailable.", ILogger::ELL_ERROR); + r.ok = false; + return r; + } + + const auto depfileStart = clock_t::now(); + if (!writeDepfileFromDependencies(*deps, false)) + { + r.ok = false; + return r; + } + const auto depfileEnd = clock_t::now(); + if (reportEnabled) + { + r.report["depfile"]["written"] = m_system && m_system->exists(preOpt.depfilePath, IFileBase::ECF_READ); + r.report["depfile"]["ms"] = toMs(depfileEnd - depfileStart); + } + if (verbose) + { + m_logger->log("Depfile write took: %lld ms.", ILogger::ELL_PERFORMANCE, + static_cast(toMs(depfileEnd - depfileStart))); + } + } return r; } @@ -547,8 +3134,9 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr m_system; smart_refctd_ptr m_logger; - std::vector m_arguments, m_include_search_paths; + std::vector m_arguments, m_include_search_paths, m_force_includes; smart_refctd_ptr m_assetMgr; + std::filesystem::path m_executablePath; }; NBL_MAIN_FUNC(ShaderCompiler) diff --git a/tools/nsc/test/cache_layers/CMakeLists.txt b/tools/nsc/test/cache_layers/CMakeLists.txt new file mode 100644 index 0000000000..580a427241 --- /dev/null +++ b/tools/nsc/test/cache_layers/CMakeLists.txt @@ -0,0 +1,256 @@ +include(common) + +set(NSC_JSON_REPORT ON) +set(NSC_SHADER_CACHE ON) +set(NSC_PREPROCESS_CACHE ON) + +if(NOT Python3_EXECUTABLE) + find_package(Python3 COMPONENTS Interpreter REQUIRED) +endif() + +set(NBL_NSC_CACHE_TEST_SEED "0" CACHE STRING "Seed for NSC cache layer tests (0 = deterministic)") +set(NBL_NSC_CACHE_TEST_ITERATIONS "5" CACHE STRING "Iterations for NSC cache layer stress test") +set(NBL_NSC_CACHE_TEST_PARALLEL_JOBS "3" CACHE STRING "Parallel jobs for NSC cache layer test") +set(NBL_NSC_CACHE_PREAMBLE_BUDGET_MS "0" CACHE STRING "Optional max total_with_output_ms for preamble hit time test (0 disables check)") + +set(NBL_NSC_CACHE_LAYER_ROOT "${CMAKE_CURRENT_BINARY_DIR}/cache_layers") +set(NBL_NSC_CACHE_LAYER_SRC "${NBL_NSC_CACHE_LAYER_ROOT}/src") +file(MAKE_DIRECTORY "${NBL_NSC_CACHE_LAYER_SRC}") + +set(NBL_NSC_CACHE_PROXY "${NBL_NSC_CACHE_LAYER_SRC}/proxy.hlsl") +file(WRITE "${NBL_NSC_CACHE_PROXY}" [=[ +#ifndef NBL_NSC_CACHE_TEST_PROXY_HLSL +#define NBL_NSC_CACHE_TEST_PROXY_HLSL +// NBL_NSC_CACHE_TEST_DEFINES_BEGIN +// NBL_NSC_CACHE_TEST_DEFINES_END +// NBL_NSC_CACHE_TEST_INCLUDES_BEGIN +#include +#include +#include +// NBL_NSC_CACHE_TEST_INCLUDES_END +#endif +]=]) + +function(nbl_nsc_write_input _path) + file(WRITE "${_path}" [=[ +#include "proxy.hlsl" + +[numthreads(1,1,1)] +[shader("compute")] +void main(uint3 tid : SV_DispatchThreadID) +{ + uint sink = 1u; + if (tid.x == 0u && sink == 0u) + return; +} +]=]) +endfunction() + +set(NBL_NSC_CACHE_INPUT "${NBL_NSC_CACHE_LAYER_SRC}/cache_layers_input.hlsl") +nbl_nsc_write_input("${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_SHADER_INPUT "${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_PREPROCESS_INPUT "${NBL_NSC_CACHE_INPUT}") +set(NBL_NSC_PREAMBLE_INPUT "${NBL_NSC_CACHE_INPUT}") + +function(nbl_nsc_add_cache_target _name _input _binary_dir _output_var _export_prefix) + set(JSON_TEMPLATE [=[[ + { + "INPUT": "@INPUT@", + "KEY": "@KEY@", + "COMPILE_OPTIONS": ["-T", "cs_6_7"], + "CAPS": [] + } +]]]=]) + set(INPUT "${_input}") + set(KEY "${_name}") + string(CONFIGURE "${JSON_TEMPLATE}" JSON @ONLY) + + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${_name}_spirv + BINARY_DIR "${_binary_dir}" + MOUNT_POINT_DEFINE NBL_NSC_CACHE_LAYERS_MOUNT + COMMON_OPTIONS -I "${NBL_NSC_CACHE_LAYER_SRC}" + OUTPUT_VAR ${_output_var} + EXPORT_RULES ${_export_prefix} + DISABLE_CUSTOM_COMMANDS + INCLUDE nbl/nsc/tests/cache_layers/keys.hpp + NAMESPACE nbl::nsc::tests::cache_layers + INPUTS ${JSON} + DISCARD_DEFAULT_GLOB + ) + + if(DEFINED ${_output_var}) + set(${_output_var} ${${_output_var}} PARENT_SCOPE) + endif() + if(_export_prefix AND DEFINED ${_export_prefix}_COUNT) + set(${_export_prefix}_COUNT "${${_export_prefix}_COUNT}" PARENT_SCOPE) + if(${_export_prefix}_COUNT GREATER 0) + math(EXPR _export_last "${${_export_prefix}_COUNT} - 1") + foreach(_export_idx RANGE 0 ${_export_last}) + set(${_export_prefix}_COMMAND_${_export_idx} ${${_export_prefix}_COMMAND_${_export_idx}} PARENT_SCOPE) + set(${_export_prefix}_OUTPUT_${_export_idx} "${${_export_prefix}_OUTPUT_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_LOG_${_export_idx} "${${_export_prefix}_LOG_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_DEPFILE_${_export_idx} "${${_export_prefix}_DEPFILE_${_export_idx}}" PARENT_SCOPE) + set(${_export_prefix}_REPORT_${_export_idx} "${${_export_prefix}_REPORT_${_export_idx}}" PARENT_SCOPE) + endforeach() + endif() + endif() +endfunction() + +set(_BIN_SHADER "${NBL_NSC_CACHE_LAYER_ROOT}/shader_cache") +set(_BIN_PREPROCESS "${NBL_NSC_CACHE_LAYER_ROOT}/preprocess_cache") +set(_BIN_PREAMBLE "${NBL_NSC_CACHE_LAYER_ROOT}/preamble_cache") + +set(NSC_PREPROCESS_PREAMBLE ON) +set(_EXPORT_SHADER NSC_CACHE_SHADER) +nbl_nsc_add_cache_target(nsc_cache_shader_hit "${NBL_NSC_SHADER_INPUT}" "${_BIN_SHADER}" KEYS_SHADER_CACHE ${_EXPORT_SHADER}) + +set(NSC_PREPROCESS_PREAMBLE OFF) +set(_EXPORT_PREPROCESS NSC_CACHE_PREPROCESS) +nbl_nsc_add_cache_target(nsc_cache_preprocess_hit "${NBL_NSC_PREPROCESS_INPUT}" "${_BIN_PREPROCESS}" KEYS_PREPROCESS_CACHE ${_EXPORT_PREPROCESS}) + +set(NSC_PREPROCESS_PREAMBLE ON) +set(_EXPORT_PREAMBLE NSC_CACHE_PREAMBLE) +nbl_nsc_add_cache_target(nsc_cache_preamble_hit "${NBL_NSC_PREAMBLE_INPUT}" "${_BIN_PREAMBLE}" KEYS_PREAMBLE_CACHE ${_EXPORT_PREAMBLE}) + +set(NBL_NSC_CACHE_TEST_BASE + "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/cache_layers_test.py" +) + +function(nbl_nsc_make_test_args _out _mode _input _output _report _log _depfile _shader_cache _preprocess_cache _preprocessed _command) + set(_args + ${NBL_NSC_CACHE_TEST_BASE} + --mode ${_mode} + --input "$" + --output "$" + --report "$" + --log "$" + --depfile "$" + --shader-cache "$" + --preprocess-cache "$" + --preprocessed "$" + --seed "${NBL_NSC_CACHE_TEST_SEED}" + ) + if(ARGN) + list(APPEND _args ${ARGN}) + endif() + list(APPEND _args --command ${_command}) + set(${_out} ${_args} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_add_cache_tests _prefix _fixture _cold_args _hit_args) + add_test(NAME ${_prefix}_COLD_RUN_TEST + COMMAND ${_cold_args} + COMMAND_EXPAND_LISTS + ) + add_test(NAME ${_prefix}_HIT_TEST + COMMAND ${_hit_args} + COMMAND_EXPAND_LISTS + ) + set_tests_properties(${_prefix}_COLD_RUN_TEST PROPERTIES FIXTURES_SETUP ${_fixture} RESOURCE_LOCK nbl_nsc_cache_layers) + set_tests_properties(${_prefix}_HIT_TEST PROPERTIES FIXTURES_REQUIRED ${_fixture} RESOURCE_LOCK nbl_nsc_cache_layers) +endfunction() + +function(nbl_nsc_add_single_test _name _args) + add_test(NAME ${_name} + COMMAND ${_args} + COMMAND_EXPAND_LISTS + ) + set_tests_properties(${_name} PROPERTIES RESOURCE_LOCK nbl_nsc_cache_layers) +endfunction() + +function(nbl_nsc_make_args_from_export _out _mode _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out_path "${${_export_prefix}_OUTPUT_0}") + set(_report_path "${${_export_prefix}_REPORT_0}") + set(_log_path "${${_export_prefix}_LOG_0}") + set(_depfile_path "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_make_test_args(${_out} ${_mode} "${_input}" "${_out_path}" "${_report_path}" "${_log_path}" "${_depfile_path}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}" ${ARGN}) + set(${_out} ${${_out}} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_path_with_suffix _out _path _suffix) + cmake_path(GET _path PARENT_PATH _dir) + cmake_path(GET _path STEM _stem) + cmake_path(GET _path EXTENSION _ext) + set(${_out} "${_dir}/${_stem}${_suffix}${_ext}" PARENT_SCOPE) +endfunction() + +function(nbl_nsc_make_no_cache_args _out _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out_path "${${_export_prefix}_OUTPUT_0}") + set(_report_path "${${_export_prefix}_REPORT_0}") + set(_log_path "${${_export_prefix}_LOG_0}") + set(_depfile_path "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_path_with_suffix(_out_no_cache "${_out_path}" ".no_cache") + nbl_nsc_path_with_suffix(_report_no_cache "${_report_path}" ".no_cache") + nbl_nsc_path_with_suffix(_log_no_cache "${_log_path}" ".no_cache") + nbl_nsc_path_with_suffix(_depfile_no_cache "${_depfile_path}" ".no_cache") + nbl_nsc_path_with_suffix(_preprocessed_no_cache "${_preprocessed}" ".no_cache") + + nbl_nsc_make_test_args(${_out} no_cache_cold "${_input}" "${_out_no_cache}" "${_report_no_cache}" "${_log_no_cache}" "${_depfile_no_cache}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed_no_cache}" "${_command}") + set(${_out} ${${_out}} PARENT_SCOPE) +endfunction() + +function(nbl_nsc_register_cache_layer _prefix _fixture _mode_base _input _export_prefix) + set(_command ${${_export_prefix}_COMMAND_0}) + set(_out "${${_export_prefix}_OUTPUT_0}") + set(_report "${${_export_prefix}_REPORT_0}") + set(_log "${${_export_prefix}_LOG_0}") + set(_depfile "${${_export_prefix}_DEPFILE_0}") + set(_cache_shader "${${_export_prefix}_CACHE_SHADER_0}") + set(_cache_preprocess "${${_export_prefix}_CACHE_PREPROCESS_0}") + set(_preprocessed "${${_export_prefix}_PREPROCESSED_0}") + + nbl_nsc_make_test_args(_cold_args "${_mode_base}_cold" "${_input}" "${_out}" "${_report}" "${_log}" "${_depfile}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}") + nbl_nsc_make_test_args(_hit_args "${_mode_base}_hit" "${_input}" "${_out}" "${_report}" "${_log}" "${_depfile}" "${_cache_shader}" "${_cache_preprocess}" "${_preprocessed}" "${_command}") + + nbl_nsc_add_cache_tests(${_prefix} ${_fixture} "${_cold_args}" "${_hit_args}") +endfunction() + +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_SHADER nbl_nsc_shader_cache shader_cache "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_PREPROCESS nbl_nsc_preprocess_cache preprocess_cache "${NBL_NSC_PREPROCESS_INPUT}" ${_EXPORT_PREPROCESS}) +nbl_nsc_register_cache_layer(NBL_NSC_CACHE_PREAMBLE nbl_nsc_preamble_cache preamble_cache "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) + +nbl_nsc_make_args_from_export(_NBL_NSC_SHADER_DISABLED_ARGS shader_cache_disabled "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREPROCESS_DISABLED_ARGS preprocess_cache_disabled "${NBL_NSC_PREPROCESS_INPUT}" ${_EXPORT_PREPROCESS}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREAMBLE_DISABLED_ARGS preamble_cache_disabled "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) +nbl_nsc_make_args_from_export(_NBL_NSC_ISOLATION_ARGS shader_cache_isolation "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_DEPS_ARGS deps_invalidation "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) +nbl_nsc_make_args_from_export(_NBL_NSC_PATH_ARGS path_normalization "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_RANDOM_DEFINES_ARGS random_defines "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PARALLEL_ARGS parallel_smoke "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER} --parallel-jobs ${NBL_NSC_CACHE_TEST_PARALLEL_JOBS}) +nbl_nsc_make_args_from_export(_NBL_NSC_STRESS_ARGS stress "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER} --iterations ${NBL_NSC_CACHE_TEST_ITERATIONS}) +nbl_nsc_make_args_from_export(_NBL_NSC_REPORT_SCHEMA_ARGS report_schema "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_DEPFILE_ARGS depfile_contents "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_CACHE_OVERRIDE_ARGS cache_path_override "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_LARGE_GRAPH_ARGS large_include_graph "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_UNUSED_INCLUDE_ARGS unused_include "${NBL_NSC_SHADER_INPUT}" ${_EXPORT_SHADER}) +nbl_nsc_make_args_from_export(_NBL_NSC_PREAMBLE_TIME_ARGS preamble_cache_hit_time "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE} --budget-ms ${NBL_NSC_CACHE_PREAMBLE_BUDGET_MS}) +nbl_nsc_make_no_cache_args(_NBL_NSC_NO_CACHE_ARGS "${NBL_NSC_PREAMBLE_INPUT}" ${_EXPORT_PREAMBLE}) + +nbl_nsc_add_single_test(NBL_NSC_CACHE_SHADER_DISABLED_TEST "${_NBL_NSC_SHADER_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREPROCESS_DISABLED_TEST "${_NBL_NSC_PREPROCESS_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREAMBLE_DISABLED_TEST "${_NBL_NSC_PREAMBLE_DISABLED_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_SHADER_ISOLATION_TEST "${_NBL_NSC_ISOLATION_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_DEPS_INVALIDATION_TEST "${_NBL_NSC_DEPS_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PATH_NORMALIZATION_TEST "${_NBL_NSC_PATH_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_RANDOM_DEFINES_TEST "${_NBL_NSC_RANDOM_DEFINES_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PARALLEL_SMOKE_TEST "${_NBL_NSC_PARALLEL_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_STRESS_TEST "${_NBL_NSC_STRESS_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_REPORT_SCHEMA_TEST "${_NBL_NSC_REPORT_SCHEMA_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_DEPFILE_CONTENTS_TEST "${_NBL_NSC_DEPFILE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PATH_OVERRIDE_TEST "${_NBL_NSC_CACHE_OVERRIDE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_LARGE_GRAPH_TEST "${_NBL_NSC_LARGE_GRAPH_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_UNUSED_INCLUDE_TEST "${_NBL_NSC_UNUSED_INCLUDE_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST "${_NBL_NSC_PREAMBLE_TIME_ARGS}") +nbl_nsc_add_single_test(NBL_NSC_CACHE_NO_CACHE_COLD_TEST "${_NBL_NSC_NO_CACHE_ARGS}") +set_tests_properties(NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST PROPERTIES FIXTURES_REQUIRED nbl_nsc_preamble_cache) diff --git a/tools/nsc/test/cache_layers/README.md b/tools/nsc/test/cache_layers/README.md new file mode 100644 index 0000000000..d5e82d48e4 --- /dev/null +++ b/tools/nsc/test/cache_layers/README.md @@ -0,0 +1,194 @@ +# NSC Cache Layer Tests + +This directory defines a cache-layer test suite for `nsc`. +The tests are driven by `tools/nsc/test/cache_layers/CMakeLists.txt` and the +Python runner `tools/nsc/test/cache_layers/cache_layers_test.py`. + +## What is being tested + +NSC has three cache layers that must stay correct and fast: + +1) Shader cache +- Stores final SPIR-V. +- Hit path skips preprocess + compile and returns cached SPIR-V. + +2) Preprocess cache +- Stores preprocessed source + dependency graph. +- Hit path skips full include processing, but still compiles. + +3) Preamble cache (preamble/prefix cache) +- Stores preprocessed prefix for heavy include graphs. +- Hit path only preprocesses the body and reuses prefix. +- If the body has no preprocessor directives and no macro usage, it is passed through without running Wave. + +## How the layers interact + +The compile flow is: +- Shader cache probe (key + deps). On hit, return cached SPIR-V and skip all other work. +- If shader cache misses, probe preprocess cache. + - If preprocess cache hits, compile using preprocessed code. + - If preamble is enabled and available, only preprocess the body and reuse the cached prefix. +- On preprocess miss, do full preprocess + compile, then update caches. + +All three layers together give: +- cold run: full preprocess + compile (safe baseline) +- body-only change: shader cache miss, preprocess cache hit, compile runs +- deps change: cold run (all caches miss) +- preamble hit: avoids re-lexing heavy includes on body edits + +The build system enables all three by default, but each layer can be toggled +with CLI flags to verify behavior. + +## Why use all three layers + +- Shader cache gives the fastest hit path for unchanged inputs. +- Preprocess cache avoids re-walking includes when only the body changes. +- Preamble cache cuts Wave time for large include graphs even when the body changes. + +Dropping any layer regresses a specific edit pattern. Using all three maximizes +iteration speed while keeping correctness, because every layer is validated by +its dependency tracking. + +## Correctness and safety + +Each cache entry is validated against its dependency graph and compilation +inputs. Any change in inputs, options, or includes invalidates the cache and +forces a cold run. When caches are enabled, `nsc` uses fast-safe validation: +mtime/size mismatches force a miss without hashing, so hits are never stale. +The tests do not enable "fast unsafe" paths. + +## Test overview (CMake/CTest) + +The suite defines a set of CTest entries with explicit cold/hit tests plus +additional integrity checks. All tests are executed in the same build +configuration you configured (`builtins` ON/OFF are respected automatically). + +Core cache tests: +- `NBL_NSC_CACHE_SHADER_COLD_RUN_TEST` +- `NBL_NSC_CACHE_SHADER_HIT_TEST` +- `NBL_NSC_CACHE_PREPROCESS_COLD_RUN_TEST` +- `NBL_NSC_CACHE_PREPROCESS_HIT_TEST` +- `NBL_NSC_CACHE_PREAMBLE_COLD_RUN_TEST` +- `NBL_NSC_CACHE_PREAMBLE_HIT_TEST` + +Extra correctness tests (no cross-config builds): +- Cache layer disable checks: + - `NBL_NSC_CACHE_SHADER_DISABLED_TEST` + - `NBL_NSC_CACHE_PREPROCESS_DISABLED_TEST` + - `NBL_NSC_CACHE_PREAMBLE_DISABLED_TEST` +- Isolation / invalidation: + - `NBL_NSC_CACHE_SHADER_ISOLATION_TEST` + - `NBL_NSC_CACHE_DEPS_INVALIDATION_TEST` +- Path normalization: + - `NBL_NSC_CACHE_PATH_NORMALIZATION_TEST` +- Randomized defines: + - `NBL_NSC_CACHE_RANDOM_DEFINES_TEST` +- Parallel smoke (multi-process nsc calls with unique outputs): + - `NBL_NSC_CACHE_PARALLEL_SMOKE_TEST` +- Stress (multiple repeated runs, timing stats only): + - `NBL_NSC_CACHE_STRESS_TEST` +- Report schema sanity: + - `NBL_NSC_CACHE_REPORT_SCHEMA_TEST` +- Depfile content check: + - `NBL_NSC_CACHE_DEPFILE_CONTENTS_TEST` +- Cache override paths: + - `NBL_NSC_CACHE_PATH_OVERRIDE_TEST` +- Large include graph: + - `NBL_NSC_CACHE_LARGE_GRAPH_TEST` +- Unused include is excluded from depfile: + - `NBL_NSC_CACHE_UNUSED_INCLUDE_TEST` +- Preamble hit timing (logs total time, optional budget): + - `NBL_NSC_CACHE_PREAMBLE_HIT_TIME_TEST` +- No-cache cold baseline: + - `NBL_NSC_CACHE_NO_CACHE_COLD_TEST` + +## How it works + +The tests compile a small HLSL shader that includes a local `proxy.hlsl`. +The proxy includes the same heavy builtins used in the cache test targets and +has injected markers for adding/removing `#define`s and include variants. + +The Python runner: +- edits the body or proxy as required by a test mode +- runs `nsc` directly using the exact command line exported by + `NBL_CREATE_NSC_COMPILE_RULES` +- reads the JSON report (`.spv.report.json`) to assert hit/miss and behavior + +The JSON report fields used by the tests include: +- `shader_cache.hit`, `shader_cache.status` +- `preprocess_cache.status`, `preprocess_cache.hit` +- `preamble.enabled`, `preamble.used` +- `compile.called` + +## Example timings (Release) + +Measured from JSON reports (total_with_output_ms). +Cold-run and hit numbers are medians of 5 runs. Each hit sample is preceded by its cold-run seed. +Baseline is "No cache cold" per builtins mode. Relative vs no-cache is baseline / row. +Values below 1.0x mean slower than baseline. +Machine: AMD Ryzen 5 5600G with Radeon Graphics. +Config: Release, builtins OFF/ON (two baselines). +Includes stress: the proxy pulls three heavy builtins (intrinsics/matrix/vector). The full preprocessed output is ~11.3k lines (11274, measured from the Release preprocess-cache `.spv.pre.hlsl`). + +Cold runs (no cache hits; preamble split can still be used): + +| Scenario | Caches enabled | Preprocess path | total_with_output_ms (builtins OFF) | Relative vs no-cache (OFF) | total_with_output_ms (builtins ON) | Relative vs no-cache (ON) | +| --- | --- | --- | --- | --- | --- | --- | +| Baseline no-cache cold | none | full preprocess | 1233 | 1.00x | 693 | 1.00x | +| Cold run (preprocess cache enabled) | shader + preprocess | full preprocess | 1449 | 0.85x | 912 | 0.76x | +| Cold run (all caches enabled) | shader + preprocess + preamble | full preprocess | 1276 | 0.97x | 748 | 0.93x | + +Note: "Cold run (all caches enabled)" is still a cache miss and uses full preprocess; `preamble.used` stays false on miss. Small deltas between the cold rows (including an occasional slight speedup vs baseline) are measurement noise and OS caching effects. + +Hit paths (caches enabled as configured by the test target): + +| Scenario | Caches enabled | Hit path | total_with_output_ms (builtins OFF) | Speedup vs no-cache (OFF) | total_with_output_ms (builtins ON) | Speedup vs no-cache (ON) | +| --- | --- | --- | --- | --- | --- | --- | +| Shader cache hit | shader + preprocess + preamble | cached SPIR-V | 17 | 72.5x | 19 | 36.5x | +| Preprocess cache hit | shader + preprocess | preprocessed code + compile | 404 | 3.05x | 412 | 1.68x | +| Preamble cache hit | shader + preprocess + preamble | prefix reuse + body preprocess + compile | 219 | 5.63x | 251 | 2.76x | + +These numbers are expected to vary across machines and drivers. +Builtins ON uses embedded archives, which reduces filesystem IO and typically improves cold-run times. + +## Running the suite + +From the build directory: + +``` +ctest -C Release -R NBL_NSC_CACHE_ --output-on-failure +``` + +Repeat runs for sampling: + +``` +ctest -C Release -R NBL_NSC_CACHE_ --repeat until-pass:5 --output-on-failure +``` + +## Tuning knobs + +These are regular CMake cache variables: + +- `NBL_NSC_CACHE_TEST_SEED` + - Seed used for randomized define/body changes (0 = deterministic). +- `NBL_NSC_CACHE_TEST_ITERATIONS` + - Number of iterations used by the stress test. +- `NBL_NSC_CACHE_TEST_PARALLEL_JOBS` + - Number of parallel jobs used in the parallel smoke test. +- `NBL_NSC_CACHE_PREAMBLE_BUDGET_MS` + - Optional budget for the preamble hit timing test (0 disables check). + +## Build system defaults + +The build system enables all three layers by default. CLI toggles exist for +validation: +- `-nbl-shader-cache` +- `-nbl-preprocess-cache` +- `-nbl-preprocess-preamble` + +## Notes + +- Tests are protected by a CTest `RESOURCE_LOCK` so they do not fight over the + same inputs. The parallel smoke test uses unique outputs internally. +- The suite uses the current build configuration only; no extra Release/Debug + builds are required. diff --git a/tools/nsc/test/cache_layers/cache_layers_test.py b/tools/nsc/test/cache_layers/cache_layers_test.py new file mode 100644 index 0000000000..019b8640d6 --- /dev/null +++ b/tools/nsc/test/cache_layers/cache_layers_test.py @@ -0,0 +1,675 @@ +import argparse +import concurrent.futures +import json +import random +import re +import statistics +import subprocess +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--cmake") + parser.add_argument("--build-dir") + parser.add_argument("--target") + parser.add_argument("--config", default="") + parser.add_argument("--mode", required=True) + parser.add_argument("--input", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--report", required=True) + parser.add_argument("--log", default="") + parser.add_argument("--depfile", default="") + parser.add_argument("--shader-cache", default="") + parser.add_argument("--preprocess-cache", default="") + parser.add_argument("--preprocessed", default="") + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--iterations", type=int, default=5) + parser.add_argument("--parallel-jobs", type=int, default=3) + parser.add_argument("--budget-ms", type=int, default=0) + parser.add_argument("--command", nargs=argparse.REMAINDER) + return parser.parse_args() + + +def normalize_command(cmd): + if not cmd: + return [] + return [arg for arg in cmd if arg] + + +def strip_option(cmd, flag, takes_value): + out = [] + skip = False + for arg in cmd: + if skip: + skip = False + continue + if arg == flag: + if takes_value: + skip = True + continue + out.append(arg) + return out + + +def strip_options(cmd, options_with_values, options_flags): + result = cmd + for flag in options_with_values: + result = strip_option(result, flag, True) + for flag in options_flags: + result = strip_option(result, flag, False) + return result + + +def replace_option_value(cmd, flag, value): + result = list(cmd) + for idx in range(len(result) - 1): + if result[idx] == flag: + result[idx + 1] = value + return result + + +def apply_output_overrides(cmd, args): + result = list(cmd) + if args.output: + result = replace_option_value(result, "-Fc", args.output) + if args.depfile: + result = replace_option_value(result, "-MF", args.depfile) + if args.report: + result = replace_option_value(result, "-nbl-report", args.report) + if args.log: + result = replace_option_value(result, "-log", args.log) + return result + + +def command_without_shader_cache(cmd): + return strip_options( + cmd, + options_with_values=["-shader-cache-file", "-nbl-shader-cache-compression", "-shader-cache-compression"], + options_flags=["-nbl-shader-cache", "-shader-cache"], + ) + + +def command_without_preprocess_cache(cmd): + return strip_options( + cmd, + options_with_values=["-preprocess-cache-file"], + options_flags=["-nbl-preprocess-cache", "-preprocess-cache"], + ) + + +def command_without_preamble(cmd): + return strip_options(cmd, options_with_values=[], options_flags=["-nbl-preprocess-preamble"]) + +def command_without_all_caches(cmd): + cmd = command_without_shader_cache(cmd) + cmd = command_without_preprocess_cache(cmd) + cmd = command_without_preamble(cmd) + return cmd + + +def run_cmd(args): + cmd = normalize_command(args) + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + if res.returncode != 0: + print(res.stdout) + raise RuntimeError("command failed") + + +def run_build(args, command_override=None): + if command_override is not None: + run_cmd(command_override) + return + if args.command: + run_cmd(args.command) + return + if not (args.cmake and args.build_dir and args.target): + raise RuntimeError("missing --command or --cmake/--build-dir/--target") + cmd = [args.cmake, "--build", args.build_dir, "--target", args.target] + if args.config: + cmd.extend(["--config", args.config]) + run_cmd(cmd) + + +def load_report(path): + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def set_body_value(path, value): + text = path.read_text(encoding="utf-8") + match = re.search(r"uint\s+sink\s*=\s*(\d+)u;", text) + if not match: + raise RuntimeError("missing body marker: uint sink = u;") + current = int(match.group(1)) + if current == value: + return + replacement = f"uint sink = {value}u;" + new_text = re.sub(r"uint\s+sink\s*=\s*\d+u;", replacement, text, count=1) + path.write_text(new_text, encoding="utf-8") + + +def pick_body_value(rng, exclude): + choices = [2, 3, 5, 7] + value = rng.choice(choices) + if value == exclude: + value = choices[(choices.index(value) + 1) % len(choices)] + return value + + +def pick_defines(rng): + pool = [ + "#define NBL_NSC_TEST_DEF_A 1", + "#define NBL_NSC_TEST_DEF_B 2", + "#define NBL_NSC_TEST_DEF_C 3", + "#define NBL_NSC_TEST_DEF_D 4", + ] + rng.shuffle(pool) + count = rng.randint(1, 3) + return pool[:count] + + +def normalized_includes(): + return [ + "#include ", + "#include ", + "#include ", + ] + + +def default_builtin_includes(): + return [ + "#include ", + "#include ", + "#include ", + ] + + +def replace_section(text, begin, end, lines): + begin_idx = text.find(begin) + end_idx = text.find(end, begin_idx) + if begin_idx == -1 or end_idx == -1: + raise RuntimeError(f"missing proxy markers: {begin} / {end}") + end_idx += len(end) + content = "\n".join(lines) + if content: + content = f"\n{content}\n" + else: + content = "\n" + return text[:begin_idx] + begin + content + end + text[end_idx:] + + +def set_proxy_defines(proxy_path, defines): + text = proxy_path.read_text(encoding="utf-8") + updated = replace_section( + text, + "// NBL_NSC_CACHE_TEST_DEFINES_BEGIN", + "// NBL_NSC_CACHE_TEST_DEFINES_END", + defines, + ) + proxy_path.write_text(updated, encoding="utf-8") + + +def set_proxy_includes(proxy_path, includes): + text = proxy_path.read_text(encoding="utf-8") + updated = replace_section( + text, + "// NBL_NSC_CACHE_TEST_INCLUDES_BEGIN", + "// NBL_NSC_CACHE_TEST_INCLUDES_END", + includes, + ) + proxy_path.write_text(updated, encoding="utf-8") + + +def delete_path(path): + if path.exists(): + path.unlink() + + +def assert_true(expr, message): + if not expr: + raise RuntimeError(message) + + +def assert_not_exists(path, message): + if path.exists(): + raise RuntimeError(message) + + +def assert_eq(actual, expected, message): + if actual != expected: + raise RuntimeError(f"{message}: expected {expected}, got {actual}") + +def cleanup_outputs(output_path, report_path, args): + delete_path(output_path) + delete_path(report_path) + + log_path = Path(args.log) if args.log else Path(str(output_path) + ".log") + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + shader_cache_path = Path(args.shader_cache) if args.shader_cache else Path(str(output_path) + ".ppcache") + preprocess_cache_path = Path(args.preprocess_cache) if args.preprocess_cache else Path(str(output_path) + ".ppcache.pre") + preprocessed_path = Path(args.preprocessed) if args.preprocessed else Path(str(output_path) + ".pre.hlsl") + + delete_path(log_path) + delete_path(depfile_path) + delete_path(shader_cache_path) + delete_path(preprocess_cache_path) + delete_path(preprocessed_path) + + +def assert_exists(path, message): + if not path.exists(): + raise RuntimeError(message) + + +def check_artifacts(output_path, report_path, args, expect_shader_cache=True, expect_preprocess_cache=True, expect_preprocessed=True): + log_path = Path(args.log) if args.log else Path(str(output_path) + ".log") + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + shader_cache_path = Path(args.shader_cache) if args.shader_cache else Path(str(output_path) + ".ppcache") + preprocess_cache_path = Path(args.preprocess_cache) if args.preprocess_cache else Path(str(output_path) + ".ppcache.pre") + preprocessed_path = Path(args.preprocessed) if args.preprocessed else Path(str(output_path) + ".pre.hlsl") + + assert_exists(output_path, "output .spv not found after cold run") + assert_exists(report_path, "report not found after cold run") + assert_exists(log_path, "log not found after cold run") + assert_exists(depfile_path, "depfile not found after cold run") + if expect_shader_cache: + assert_exists(shader_cache_path, "shader cache not found after cold run") + else: + assert_not_exists(shader_cache_path, "shader cache should not be created") + if expect_preprocess_cache: + assert_exists(preprocess_cache_path, "preprocess cache not found after cold run") + else: + assert_not_exists(preprocess_cache_path, "preprocess cache should not be created") + if expect_preprocessed: + assert_exists(preprocessed_path, "preprocessed output not found after cold run") + else: + assert_not_exists(preprocessed_path, "preprocessed output should not be created") + + +def normalize_dep_path(text): + return text.replace("\\", "/") + + +def assert_report_schema(report): + assert_true(isinstance(report, dict), "report should be an object") + required_sections = ["shader_cache", "preprocess_cache", "compile", "output", "input", "total_ms"] + for key in required_sections: + assert_true(key in report, f"report missing key: {key}") + assert_true(isinstance(report["shader_cache"], dict), "shader_cache should be object") + assert_true(isinstance(report["preprocess_cache"], dict), "preprocess_cache should be object") + assert_true(isinstance(report["compile"], dict), "compile should be object") + assert_true(isinstance(report["output"], dict), "output should be object") + assert_true(isinstance(report["input"], str), "input should be string") + assert_true(isinstance(report["total_ms"], int), "total_ms should be int") + if "preamble" in report: + assert_true(isinstance(report["preamble"], dict), "preamble should be object") + + +def percentile(values, pct): + if not values: + return 0 + ordered = sorted(values) + idx = int(round((pct / 100.0) * (len(ordered) - 1))) + return ordered[idx] + + +def report_time_ms(report): + return int(report.get("total_with_output_ms", report.get("total_ms", 0))) + + +def main(): + args = parse_args() + rng = random.Random(args.seed) + input_path = Path(args.input) + output_path = Path(args.output) + report_path = Path(args.report) + proxy_path = input_path.parent / "proxy.hlsl" + + if args.mode == "shader_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "shader_cache_hit": + set_body_value(input_path, 1) + delete_path(output_path) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected") + assert_eq(report["preprocess_cache"]["status"], "skipped", "preprocess cache should be skipped on shader hit") + assert_eq(report["compile"]["called"], False, "compile should be skipped on shader cache hit") + return + + if args.mode == "preprocess_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "preprocess_cache_hit": + set_body_value(input_path, 1) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preprocess cache hit") + assert_true(report["preamble"]["used"] is False, "preamble should be unused") + set_body_value(input_path, 1) + return + + if args.mode == "preamble_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + check_artifacts(output_path, report_path, args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + assert_eq(report["compile"]["called"], True, "compile should run on cold run") + return + + if args.mode == "preamble_cache_hit": + set_body_value(input_path, 1) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preamble hit") + assert_true(report["preamble"]["used"] is True, "preamble should be used") + set_body_value(input_path, 1) + return + + if args.mode == "preamble_cache_hit_time": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args) + assert_true(report_path.exists(), "report not found") + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + assert_eq(report["compile"]["called"], True, "compile should run on preamble hit") + assert_true(report["preamble"]["used"] is True, "preamble should be used") + total_ms = report_time_ms(report) + print(f"preamble_hit_total_with_output_ms={total_ms}") + if args.budget_ms > 0: + assert_true(total_ms <= args.budget_ms, "preamble hit time budget exceeded") + set_body_value(input_path, 1) + return + + if args.mode == "no_cache_cold": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = apply_output_overrides(command_without_all_caches(normalize_command(args.command)), args) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=False, expect_preprocess_cache=False, expect_preprocessed=False) + report = load_report(report_path) + assert_true(report.get("shader_cache", {}).get("enabled") is False, "shader cache should be disabled") + assert_true(report.get("preprocess_cache", {}).get("enabled") is False, "preprocess cache should be disabled") + preamble = report.get("preamble", {}) + assert_true(preamble.get("enabled") is False, "preamble should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run with caches disabled") + return + + if args.mode == "shader_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_shader_cache(normalize_command(args.command)) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=False, expect_preprocess_cache=True, expect_preprocessed=True) + report = load_report(report_path) + assert_true(report.get("shader_cache", {}).get("enabled") is False, "shader cache should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run when shader cache is disabled") + return + + if args.mode == "preprocess_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preprocess_cache(normalize_command(args.command)) + run_build(args, cmd) + check_artifacts(output_path, report_path, args, expect_shader_cache=True, expect_preprocess_cache=False, expect_preprocessed=False) + report = load_report(report_path) + assert_true(report.get("preprocess_cache", {}).get("enabled") is False, "preprocess cache should be disabled") + assert_eq(report["compile"]["called"], True, "compile should run when preprocess cache is disabled") + return + + if args.mode == "preamble_cache_disabled": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preamble(normalize_command(args.command)) + run_build(args, cmd) + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should miss on body change") + assert_eq(report["preprocess_cache"]["status"], "hit", "preprocess cache hit expected") + preamble = report.get("preamble", {}) + assert_true(preamble.get("enabled") is False, "preamble should be disabled") + assert_true(preamble.get("used", False) is False, "preamble should not be used when disabled") + set_body_value(input_path, 1) + return + + if args.mode == "shader_cache_isolation": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + cmd = command_without_preprocess_cache(command_without_preamble(normalize_command(args.command))) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on first run") + set_body_value(input_path, pick_body_value(rng, 1)) + delete_path(report_path) + run_build(args, cmd) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache should not hit on changed body") + assert_eq(report["compile"]["called"], True, "compile should run on shader cache miss") + set_body_value(input_path, 1) + return + + if args.mode == "deps_invalidation": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected on cold run") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected on cold run") + set_proxy_defines(proxy_path, pick_defines(rng)) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], False, "shader cache miss expected after dep change") + assert_eq(report["preprocess_cache"]["status"], "miss", "preprocess cache miss expected after dep change") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "path_normalization": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + set_proxy_includes(proxy_path, normalized_includes()) + run_build(args) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected with normalized includes") + assert_eq(report["preprocess_cache"]["status"], "skipped", "preprocess cache should be skipped on shader hit") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "random_defines": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + try: + set_proxy_defines(proxy_path, pick_defines(rng)) + run_build(args) + delete_path(report_path) + run_build(args) + report = load_report(report_path) + assert_eq(report["shader_cache"]["hit"], True, "shader cache hit expected after randomized defines") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + return + + if args.mode == "parallel_smoke": + base_cmd = normalize_command(args.command) + base_output = output_path + + def make_paths(idx): + new_output = base_output.with_name(f"{base_output.stem}.p{idx}{base_output.suffix}") + new_report = Path(str(new_output) + ".report.json") + new_log = Path(str(new_output) + ".log") + new_dep = Path(str(new_output) + ".dep") + return new_output, new_report, new_log, new_dep + + def worker(idx): + new_output, new_report, new_log, new_dep = make_paths(idx) + for p in [new_output, new_report, new_log, new_dep]: + delete_path(p) + cmd = replace_option_value(base_cmd, "-Fc", str(new_output)) + cmd = replace_option_value(cmd, "-MF", str(new_dep)) + cmd = replace_option_value(cmd, "-nbl-report", str(new_report)) + run_build(args, cmd) + assert_true(new_report.exists(), "parallel report not found") + load_report(new_report) + + with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.parallel_jobs)) as pool: + futures = [pool.submit(worker, idx) for idx in range(args.parallel_jobs)] + for fut in futures: + fut.result() + return + + if args.mode == "stress": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + totals = [] + totals_with_output = [] + for _ in range(args.iterations): + delete_path(report_path) + run_build(args) + report = load_report(report_path) + totals.append(report_time_ms(report)) + totals_with_output.append(int(report.get("total_with_output_ms", report_time_ms(report)))) + if totals: + print(f"stress total_ms median={statistics.median(totals)} p95={percentile(totals, 95)} samples={len(totals)}") + print(f"stress total_with_output_ms median={statistics.median(totals_with_output)} p95={percentile(totals_with_output, 95)} samples={len(totals_with_output)}") + return + + if args.mode == "report_schema": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + report = load_report(report_path) + assert_report_schema(report) + return + + if args.mode == "depfile_contents": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + input_text = normalize_dep_path(str(input_path)) + proxy_text = normalize_dep_path(str(proxy_path)) + assert_true(input_text in dep_text, "depfile missing input path") + assert_true(proxy_text in dep_text, "depfile missing proxy path") + return + + if args.mode == "cache_path_override": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + override_shader = Path(str(output_path) + ".override.ppcache") + override_preprocess = Path(str(output_path) + ".override.ppcache.pre") + delete_path(override_shader) + delete_path(override_preprocess) + cmd = normalize_command(args.command) + if not cmd: + raise RuntimeError("missing command for cache override test") + insert_at = max(len(cmd) - 1, 0) + cmd = ( + cmd[:insert_at] + + ["-shader-cache-file", str(override_shader), "-preprocess-cache-file", str(override_preprocess)] + + cmd[insert_at:] + ) + run_build(args, cmd) + assert_exists(override_shader, "shader cache override file not created") + assert_exists(override_preprocess, "preprocess cache override file not created") + return + + if args.mode == "large_include_graph": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + created = [] + try: + for idx in range(25): + inc_path = proxy_path.parent / f"dummy_inc_{idx}.hlsl" + inc_path.write_text(f"// dummy {idx}\n", encoding="utf-8") + created.append(inc_path) + includes = default_builtin_includes() + includes.extend([f"#include \"{p.name}\"" for p in created]) + set_proxy_includes(proxy_path, includes) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + for idx in [0, len(created) // 2, len(created) - 1]: + check_path = normalize_dep_path(str(created[idx])) + assert_true(check_path in dep_text, "depfile missing dummy include") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + for p in created: + delete_path(p) + return + + if args.mode == "unused_include": + set_body_value(input_path, 1) + cleanup_outputs(output_path, report_path, args) + original_proxy = proxy_path.read_text(encoding="utf-8") + unused_path = proxy_path.parent / "unused_inc.hlsl" + try: + unused_path.write_text("// unused\n", encoding="utf-8") + includes = default_builtin_includes() + includes.extend(["#if 0", f"#include \"{unused_path.name}\"", "#endif"]) + set_proxy_includes(proxy_path, includes) + run_build(args) + depfile_path = Path(args.depfile) if args.depfile else Path(str(output_path) + ".dep") + dep_text = normalize_dep_path(depfile_path.read_text(encoding="utf-8")) + assert_true(normalize_dep_path(str(unused_path)) not in dep_text, "depfile should not include unused include") + finally: + proxy_path.write_text(original_proxy, encoding="utf-8") + delete_path(unused_path) + return + + raise RuntimeError(f"unknown mode: {args.mode}") + + +if __name__ == "__main__": + main()