Skip to content

Commit 31d5268

Browse files
davidrohrktf
authored andcommitted
GPU: Set launch_bounds for all kernels depending on the number of threads used
1 parent 40a995c commit 31d5268

File tree

4 files changed

+33
-35
lines changed

4 files changed

+33
-35
lines changed

GPU/Common/GPUDefGPUParameters.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,15 @@
2727
#define GPUCA_THREAD_COUNT_CONSTRUCTOR 128
2828
#define GPUCA_THREAD_COUNT_SELECTOR 128
2929
#define GPUCA_THREAD_COUNT_FINDER 128
30-
#define GPUCA_NEIGHBORSFINDER_REGS REG, (GPUCA_THREAD_COUNT_FINDER, 1)
3130
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
3231
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
3332
#elif defined(GPUCA_GPUTYPE_TURING)
34-
#define GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER 2
35-
#define GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER 4
33+
#define GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER 1
34+
#define GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER 2
3635
#define GPUCA_THREAD_COUNT 256
3736
#define GPUCA_THREAD_COUNT_CONSTRUCTOR 1024
3837
#define GPUCA_THREAD_COUNT_SELECTOR 512
3938
#define GPUCA_THREAD_COUNT_FINDER 512
40-
#define GPUCA_NEIGHBORSFINDER_REGS REG, (GPUCA_THREAD_COUNT_FINDER, 1)
4139
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
4240
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
4341
// #define GPUCA_USE_TEXTURES
@@ -48,10 +46,10 @@
4846

4947
// Default settings, if not already set for selected GPU type
5048
#ifndef GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER
51-
#define GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER 2
49+
#define GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER 1
5250
#endif
5351
#ifndef GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER
54-
#define GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER 3
52+
#define GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER 1
5553
#endif
5654
#ifndef GPUCA_THREAD_COUNT
5755
#define GPUCA_THREAD_COUNT 256

GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ GPUg() void GPUCA_ATTRRES(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CA
9595
GPUCA_KRNL_POST()
9696

9797
#define GPUCA_KRNL_(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_single(x_class, x_attributes, x_arguments, x_forward)
98+
#define GPUCA_KRNL_simple(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_single(x_class, x_attributes, x_arguments, x_forward)
9899
#define GPUCA_KRNL_both(x_class, x_attributes, x_arguments, x_forward) \
99100
GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward) \
100101
GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward) \
@@ -107,6 +108,7 @@ GPUg() void GPUCA_ATTRRES(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CA
107108
GPUCA_KRNL_POST()
108109

109110
#define GPUCA_KRNL_LOAD_(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_LOAD_single(x_class, x_attributes, x_arguments, x_forward)
111+
#define GPUCA_KRNL_LOAD_simple(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_LOAD_single(x_class, x_attributes, x_arguments, x_forward)
110112
#define GPUCA_KRNL_LOAD_both(x_class, x_attributes, x_arguments, x_forward) \
111113
GPUCA_KRNL_LOAD_single(x_class, x_attributes, x_arguments, x_forward) \
112114
GPUCA_KRNL_LOAD_multi(x_class, x_attributes, x_arguments, x_forward)

GPU/GPUTracking/Base/GPUReconstructionKernels.h

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,35 @@
1515
#include "GPUReconstructionKernelMacros.h"
1616

1717
// clang-format off
18-
GPUCA_KRNL((GPUTPCNeighboursFinder ), (single, GPUCA_NEIGHBORSFINDER_REGS), (), ())
19-
GPUCA_KRNL((GPUTPCNeighboursCleaner ), (single), (), ())
20-
GPUCA_KRNL((GPUTPCStartHitsFinder ), (single), (), ())
21-
GPUCA_KRNL((GPUTPCStartHitsSorter ), (single), (), ())
22-
GPUCA_KRNL((GPUTPCTrackletConstructor, singleSlice ), (single), (), ())
23-
GPUCA_KRNL((GPUTPCTrackletConstructor, allSlices ), (single), (), ())
24-
GPUCA_KRNL((GPUTPCTrackletSelector ), (both), (), ())
25-
GPUCA_KRNL((GPUMemClean16 ), (), (, GPUPtr1(void*, ptr), unsigned long size), (, GPUPtr2(void*, ptr), size))
18+
GPUCA_KRNL((GPUTPCNeighboursFinder ), (single, REG, (GPUCA_THREAD_COUNT_FINDER, 1)), (), ())
19+
GPUCA_KRNL((GPUTPCNeighboursCleaner ), (single, REG, (GPUCA_THREAD_COUNT, 1)), (), ())
20+
GPUCA_KRNL((GPUTPCStartHitsFinder ), (single, REG, (GPUCA_THREAD_COUNT, 1)), (), ())
21+
GPUCA_KRNL((GPUTPCStartHitsSorter ), (single, REG, (GPUCA_THREAD_COUNT, 1)), (), ())
22+
GPUCA_KRNL((GPUTPCTrackletConstructor, singleSlice ), (single, REG, (GPUCA_THREAD_COUNT_CONSTRUCTOR, GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER)), (), ())
23+
GPUCA_KRNL((GPUTPCTrackletConstructor, allSlices ), (single, REG, (GPUCA_THREAD_COUNT_CONSTRUCTOR, GPUCA_BLOCK_COUNT_CONSTRUCTOR_MULTIPLIER)), (), ())
24+
GPUCA_KRNL((GPUTPCTrackletSelector ), (both, REG, (GPUCA_THREAD_COUNT_SELECTOR, GPUCA_BLOCK_COUNT_SELECTOR_MULTIPLIER)), (), ())
25+
GPUCA_KRNL((GPUMemClean16 ), (simple, REG, (GPUCA_THREAD_COUNT, 1)), (, GPUPtr1(void*, ptr), unsigned long size), (, GPUPtr2(void*, ptr), size))
2626
#ifndef GPUCA_OPENCL1
27-
GPUCA_KRNL((GPUTPCGMMergerTrackFit ), (), (), ())
27+
GPUCA_KRNL((GPUTPCGMMergerTrackFit ), (simple, REG, (GPUCA_THREAD_COUNT_FIT, 1)), (), ())
2828
#ifdef HAVE_O2HEADERS
29-
GPUCA_KRNL((GPUTRDTrackerGPU ), (), (), ())
30-
GPUCA_KRNL((GPUITSFitterKernel ), (), (), ())
31-
GPUCA_KRNL((GPUTPCConvertKernel ), (), (), ())
32-
GPUCA_KRNL((GPUTPCCompressionKernels, step0attached ), (), (), ())
33-
GPUCA_KRNL((GPUTPCCompressionKernels, step1unattached ), (), (), ())
34-
GPUCA_KRNL((GPUTPCCFChargeMapFiller, fillChargeMap ), (single), (), ())
35-
GPUCA_KRNL((GPUTPCCFChargeMapFiller, resetMaps ), (single), (), ())
36-
GPUCA_KRNL((GPUTPCCFPeakFinder ), (single), (), ())
37-
GPUCA_KRNL((GPUTPCCFNoiseSuppression, noiseSuppression ), (single), (), ())
38-
GPUCA_KRNL((GPUTPCCFNoiseSuppression, updatePeaks ), (single), (), ())
39-
GPUCA_KRNL((GPUTPCCFDeconvolution ), (single), (), ())
40-
GPUCA_KRNL((GPUTPCCFClusterizer ), (single), (), ())
41-
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanUpStart), (single), (, int iBuf, int stage), (, iBuf, stage))
42-
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanUp ), (single), (, int iBuf, int nElems), (, iBuf, nElems))
43-
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanTop ), (single), (, int iBuf, int nElems), (, iBuf, nElems))
44-
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanDown ), (single), (, int iBuf, unsigned int offset, int nElems), (, iBuf, offset, nElems))
45-
GPUCA_KRNL((GPUTPCCFStreamCompaction, compactDigit ), (single), (, int iBuf, int stage, GPUPtr1(deprecated::PackedDigit*, in), GPUPtr1(deprecated::PackedDigit*, out)), (, iBuf, stage, GPUPtr2(deprecated::PackedDigit*, in), GPUPtr2(deprecated::PackedDigit*, out)))
46-
GPUCA_KRNL((GPUTPCCFDecodeZS ), (single), (), ())
29+
GPUCA_KRNL((GPUTRDTrackerGPU ), (simple, REG, (GPUCA_THREAD_COUNT_TRD, 1)), (), ())
30+
GPUCA_KRNL((GPUITSFitterKernel ), (simple, REG, (GPUCA_THREAD_COUNT_ITS, 1)), (), ())
31+
GPUCA_KRNL((GPUTPCConvertKernel ), (simple, REG, (GPUCA_THREAD_COUNT_CONVERTER, 1)), (), ())
32+
GPUCA_KRNL((GPUTPCCompressionKernels, step0attached ), (simple, REG, (GPUCA_THREAD_COUNT_COMPRESSION1, 1)), (), ())
33+
GPUCA_KRNL((GPUTPCCompressionKernels, step1unattached ), (simple, REG, (GPUCA_THREAD_COUNT_COMPRESSION2, 1)), (), ())
34+
GPUCA_KRNL((GPUTPCCFChargeMapFiller, fillChargeMap ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
35+
GPUCA_KRNL((GPUTPCCFChargeMapFiller, resetMaps ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
36+
GPUCA_KRNL((GPUTPCCFPeakFinder ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
37+
GPUCA_KRNL((GPUTPCCFNoiseSuppression, noiseSuppression ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
38+
GPUCA_KRNL((GPUTPCCFNoiseSuppression, updatePeaks ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
39+
GPUCA_KRNL((GPUTPCCFDeconvolution ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
40+
GPUCA_KRNL((GPUTPCCFClusterizer ), (single, REG, (GPUCA_THREAD_COUNT_CLUSTERER, 1)), (), ())
41+
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanUpStart), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int stage), (, iBuf, stage))
42+
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanUp ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int nElems), (, iBuf, nElems))
43+
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanTop ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int nElems), (, iBuf, nElems))
44+
GPUCA_KRNL((GPUTPCCFStreamCompaction, nativeScanDown ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, unsigned int offset, int nElems), (, iBuf, offset, nElems))
45+
GPUCA_KRNL((GPUTPCCFStreamCompaction, compactDigit ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int stage, GPUPtr1(deprecated::PackedDigit*, in), GPUPtr1(deprecated::PackedDigit*, out)), (, iBuf, stage, GPUPtr2(deprecated::PackedDigit*, in), GPUPtr2(deprecated::PackedDigit*, out)))
46+
GPUCA_KRNL((GPUTPCCFDecodeZS ), (single, REG, (GPUCA_THREAD_COUNT_CFDECODE, 1)), (), ())
4747
#endif
4848
#endif
4949
// clang-format on

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")
1818
set(SRCS GPUReconstructionCUDA.cu)
1919
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h)
2020

21-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --maxrregcount 64")
22-
2321
if(ALIGPU_BUILD_TYPE STREQUAL "O2")
2422
o2_add_library(
2523
${MODULE}

0 commit comments

Comments
 (0)