1313
1414#define GPUCA_GPUTYPE_VEGA
1515#include < hip/hip_runtime.h>
16+ #include " hip/hip_ext.h"
1617
1718#ifdef __CUDACC__
1819#define __HIPCC_CUDA__
@@ -73,13 +74,22 @@ GPUg() void runKernelHIP(GPUCA_CONSMEM_PTR int iSlice, Args... args)
7374
7475#undef GPUCA_KRNL_REG
7576#define GPUCA_KRNL_REG (args ) __launch_bounds__(GPUCA_M_STRIP(args))
77+ #undef GPUCA_KRNL_BACKEND_XARGS
78+ #define GPUCA_KRNL_BACKEND_XARGS hipEvent_t *start, hipEvent_t *stop,
7679#define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward ) GPUCA_KRNL_WRAP(GPUCA_KRNL_, x_class, x_attributes, x_arguments, x_forward)
7780#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionHIPBackend
78- #define GPUCA_KRNL_CALL_single (x_class, x_attributes, x_arguments, x_forward ) \
79- hipLaunchKernelGGL (HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->HIPStreams[x.stream], GPUCA_CONSMEM_CALL y.start, args...);
80- #define GPUCA_KRNL_CALL_multi (x_class, x_attributes, x_arguments, x_forward ) \
81- hipLaunchKernelGGL (HIP_KERNEL_NAME(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->HIPStreams[x.stream], GPUCA_CONSMEM_CALL y.start, y.num, args...);
82-
81+ #define GPUCA_KRNL_CALL_single (x_class, x_attributes, x_arguments, x_forward ) \
82+ if (start == nullptr ) { \
83+ hipLaunchKernelGGL (HIP_KERNEL_NAME (GPUCA_M_CAT (krnl_, GPUCA_M_KRNL_NAME (x_class))), dim3 (x.nBlocks ), dim3 (x.nThreads ), 0 , me->mInternals ->HIPStreams [x.stream ], GPUCA_CONSMEM_CALL y.start , args...); \
84+ } else { \
85+ hipExtLaunchKernelGGL (HIP_KERNEL_NAME (GPUCA_M_CAT (krnl_, GPUCA_M_KRNL_NAME (x_class))), dim3 (x.nBlocks ), dim3 (x.nThreads ), 0 , me->mInternals ->HIPStreams [x.stream ], *start, *stop, 0 , GPUCA_CONSMEM_CALL y.start , args...); \
86+ }
87+ #define GPUCA_KRNL_CALL_multi (x_class, x_attributes, x_arguments, x_forward ) \
88+ if (start == nullptr ) { \
89+ hipLaunchKernelGGL (HIP_KERNEL_NAME (GPUCA_M_CAT3 (krnl_, GPUCA_M_KRNL_NAME (x_class), _multi)), dim3 (x.nBlocks ), dim3 (x.nThreads ), 0 , me->mInternals ->HIPStreams [x.stream ], GPUCA_CONSMEM_CALL y.start , y.num , args...); \
90+ } else { \
91+ hipExtLaunchKernelGGL (HIP_KERNEL_NAME (GPUCA_M_CAT3 (krnl_, GPUCA_M_KRNL_NAME (x_class), _multi)), dim3 (x.nBlocks ), dim3 (x.nThreads ), 0 , me->mInternals ->HIPStreams [x.stream ], *start, *stop, 0 , GPUCA_CONSMEM_CALL y.start , y.num , args...); \
92+ }
8393#include " GPUReconstructionKernels.h"
8494#undef GPUCA_KRNL
8595
@@ -97,17 +107,15 @@ int GPUReconstructionHIPBackend::runKernelBackend(krnlSetup& _xyz, Args... args)
97107 if (mDeviceProcessingSettings .deviceTimers ) {
98108 GPUFailedMsg (hipEventCreate (&start));
99109 GPUFailedMsg (hipEventCreate (&stop));
100- GPUFailedMsg (hipEventRecord (start, mInternals ->HIPStreams [x.stream ]));
101- }
102- backendInternal<T, I>::runKernelBackendInternal (_xyz, this , args...);
103- if (mDeviceProcessingSettings .deviceTimers ) {
104- GPUFailedMsg (hipEventRecord (stop, mInternals ->HIPStreams [x.stream ]));
110+ backendInternal<T, I>::runKernelBackendInternal (_xyz, this , &start, &stop, args...);
105111 GPUFailedMsg (hipEventSynchronize (stop));
106112 float v;
107113 GPUFailedMsg (hipEventElapsedTime (&v, start, stop));
108114 _xyz.t = v * 1 .e -3 ;
109115 GPUFailedMsg (hipEventDestroy (start));
110116 GPUFailedMsg (hipEventDestroy (stop));
117+ } else {
118+ backendInternal<T, I>::runKernelBackendInternal (_xyz, this , nullptr , nullptr , args...);
111119 }
112120 GPUFailedMsg (hipGetLastError ());
113121 if (z.ev ) {
0 commit comments