@@ -610,25 +610,25 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies()
610610 GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0));
611611 GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i]));
612612 GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i]));
613- GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i] .c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem);
613+ GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", GetKernelName(i) .c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem);
614614 }
615615}
616616
617617void GPUReconstructionCUDA::loadKernelModules(bool perKernel)
618618{
619619 uint32_t j = 0;
620620#define GPUCA_KRNL(x_class, ...) \
621- getRTCkernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(mInternals->kernelFunctions.size()); \
621+ if (GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>() != j) { \
622+ GPUFatal("kernel numbers out of sync"); \
623+ } \
622624 mInternals->kernelFunctions.emplace_back(new CUfunction); \
623- mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \
624625 if (mProcessingSettings.debugLevel >= 3) { \
625626 GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \
626627 } \
627628 GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \
628629 j++;
629630#include "GPUReconstructionKernelList.h"
630631#undef GPUCA_KRNL
631-
632632 if (j != mInternals->kernelModules.size()) {
633633 GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size());
634634 }
0 commit comments