Skip to content

Commit 40a995c

Browse files
davidrohrktf
authored andcommitted
GPU: More fine-grained GPU Thread Count constants
1 parent 4f2ef79 commit 40a995c

File tree

9 files changed

+59
-9
lines changed

9 files changed

+59
-9
lines changed

GPU/Common/GPUDefGPUParameters.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,31 @@
6868
#ifndef GPUCA_THREAD_COUNT_TRD
6969
#define GPUCA_THREAD_COUNT_TRD 512
7070
#endif
71+
//#ifndef GPUCA_THREAD_COUNT_SCAN
72+
#define GPUCA_THREAD_COUNT_SCAN 512 // WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
73+
//#endif
74+
#ifndef GPUCA_THREAD_COUNT_CONVERTER
75+
#define GPUCA_THREAD_COUNT_CONVERTER 256
76+
#endif
77+
#ifndef GPUCA_THREAD_COUNT_COMPRESSION1
78+
#define GPUCA_THREAD_COUNT_COMPRESSION1 256
79+
#endif
80+
#ifndef GPUCA_THREAD_COUNT_COMPRESSION2
81+
#define GPUCA_THREAD_COUNT_COMPRESSION2 256
82+
#endif
83+
#ifndef GPUCA_THREAD_COUNT_CFDECODE
84+
#define GPUCA_THREAD_COUNT_CFDECODE 256
85+
#endif
7186
#ifndef GPUCA_THREAD_COUNT_CLUSTERER
7287
#define GPUCA_THREAD_COUNT_CLUSTERER 128
7388
#endif
74-
#ifndef GPUCA_THREAD_COUNT_SCAN
75-
#define GPUCA_THREAD_COUNT_SCAN 512
89+
#ifndef GPUCA_THREAD_COUNT_FIT
90+
#define GPUCA_THREAD_COUNT_FIT 256
91+
#endif
92+
#ifndef GPUCA_THREAD_COUNT_ITS
93+
#define GPUCA_THREAD_COUNT_ITS 256
7694
#endif
95+
7796
#ifndef GPUCA_NEIGHBORSFINDER_REGS
7897
#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0
7998
#endif

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ int GPUReconstructionCPU::ExitDevice()
153153
return 0;
154154
}
155155

156-
void GPUReconstructionCPU::SetThreadCounts() { mThreadCount = mBlockCount = mConstructorBlockCount = mSelectorBlockCount = mConstructorThreadCount = mSelectorThreadCount = mFinderThreadCount = mTRDThreadCount = mClustererThreadCount = mScanThreadCount = 1; }
156+
void GPUReconstructionCPU::SetThreadCounts() { mThreadCount = mBlockCount = mConstructorBlockCount = mSelectorBlockCount = mConstructorThreadCount = mSelectorThreadCount = mFinderThreadCount = mTRDThreadCount = mClustererThreadCount = mScanThreadCount = mConverterThreadCount =
157+
mCompression1ThreadCount = mCompression2ThreadCount = mCFDecodeThreadCount = mFitThreadCount = mITSThreadCount = 1; }
157158

158159
void GPUReconstructionCPU::SetThreadCounts(RecoStep step)
159160
{

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
186186
unsigned int mTRDThreadCount = 0;
187187
unsigned int mClustererThreadCount = 0;
188188
unsigned int mScanThreadCount = 0;
189+
unsigned int mConverterThreadCount = 0;
190+
unsigned int mCompression1ThreadCount = 0;
191+
unsigned int mCompression2ThreadCount = 0;
192+
unsigned int mCFDecodeThreadCount = 0;
193+
unsigned int mFitThreadCount = 0;
194+
unsigned int mITSThreadCount = 0;
189195

190196
int mThreadId = -1; // Thread ID that is valid for the local CUDA context
191197
int mGPUStuck = 0; // Marks that the GPU is stuck, skip future events

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,12 @@ void GPUReconstructionCUDABackend::SetThreadCounts()
552552
mTRDThreadCount = GPUCA_THREAD_COUNT_TRD;
553553
mClustererThreadCount = GPUCA_THREAD_COUNT_CLUSTERER;
554554
mScanThreadCount = GPUCA_THREAD_COUNT_SCAN;
555+
mConverterThreadCount = GPUCA_THREAD_COUNT_CONVERTER;
556+
mCompression1ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION1;
557+
mCompression2ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION2;
558+
mCFDecodeThreadCount = GPUCA_THREAD_COUNT_CFDECODE;
559+
mFitThreadCount = GPUCA_THREAD_COUNT_FIT;
560+
mITSThreadCount = GPUCA_THREAD_COUNT_ITS;
555561
}
556562

557563
int GPUReconstructionCUDABackend::registerMemoryForGPU(void* ptr, size_t size)

GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,12 @@ void GPUReconstructionHIPBackend::SetThreadCounts()
492492
mTRDThreadCount = GPUCA_THREAD_COUNT_TRD;
493493
mClustererThreadCount = GPUCA_THREAD_COUNT_CLUSTERER;
494494
mScanThreadCount = GPUCA_THREAD_COUNT_SCAN;
495+
mConverterThreadCount = GPUCA_THREAD_COUNT_CONVERTER;
496+
mCompression1ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION1;
497+
mCompression2ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION2;
498+
mCFDecodeThreadCount = GPUCA_THREAD_COUNT_CFDECODE;
499+
mFitThreadCount = GPUCA_THREAD_COUNT_FIT;
500+
mITSThreadCount = GPUCA_THREAD_COUNT_ITS;
495501
}
496502

497503
int GPUReconstructionHIPBackend::registerMemoryForGPU(void* ptr, size_t size)

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,4 +471,10 @@ void GPUReconstructionOCL::SetThreadCounts()
471471
mTRDThreadCount = GPUCA_THREAD_COUNT_TRD;
472472
mClustererThreadCount = GPUCA_THREAD_COUNT_CLUSTERER;
473473
mScanThreadCount = GPUCA_THREAD_COUNT_SCAN;
474+
mConverterThreadCount = GPUCA_THREAD_COUNT_CONVERTER;
475+
mCompression1ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION1;
476+
mCompression2ThreadCount = GPUCA_THREAD_COUNT_COMPRESSION2;
477+
mCFDecodeThreadCount = GPUCA_THREAD_COUNT_CFDECODE;
478+
mFitThreadCount = GPUCA_THREAD_COUNT_FIT;
479+
mITSThreadCount = GPUCA_THREAD_COUNT_ITS;
474480
}

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ class GPUChain
176176
inline unsigned int ClustererThreadCount() const { return mRec->mClustererThreadCount; }
177177
inline unsigned int ScanThreadCount() const { return mRec->mScanThreadCount; }
178178
inline unsigned int TRDThreadCount() const { return mRec->mTRDThreadCount; }
179+
inline unsigned int ConverterThreadCount() const { return mRec->mConverterThreadCount; }
180+
inline unsigned int Compression1ThreadCount() const { return mRec->mCompression1ThreadCount; }
181+
inline unsigned int Compression2ThreadCount() const { return mRec->mCompression2ThreadCount; }
182+
inline unsigned int CFDecodeThreadCount() const { return mRec->mCFDecodeThreadCount; }
183+
inline unsigned int FitThreadCount() const { return mRec->mFitThreadCount; }
184+
inline unsigned int ITSThreadCount() const { return mRec->mITSThreadCount; }
179185
inline size_t AllocateRegisteredMemory(GPUProcessor* proc) { return mRec->AllocateRegisteredMemory(proc); }
180186
inline size_t AllocateRegisteredMemory(short res, GPUOutputControl* control = nullptr) { return mRec->AllocateRegisteredMemory(res, control); }
181187
template <class T>

GPU/GPUTracking/Global/GPUChainITS.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ int GPUChainITS::RunITSTrackFit(std::vector<Road>& roads, std::array<const Clust
100100

101101
WriteToConstantMemory(RecoStep::ITSTracking, (char*)&processors()->itsFitter - (char*)processors(), &FitterShadow, sizeof(FitterShadow), 0);
102102
TransferMemoryResourcesToGPU(RecoStep::ITSTracking, &Fitter, 0);
103-
runKernel<GPUITSFitterKernel>({BlockCount(), ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
103+
runKernel<GPUITSFitterKernel>({BlockCount(), ITSThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
104104
TransferMemoryResourcesToHost(RecoStep::ITSTracking, &Fitter, 0);
105105

106106
SynchronizeGPU();

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ int GPUChainTracking::ConvertNativeToClusterData()
669669

670670
WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0);
671671
TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0);
672-
runKernel<GPUTPCConvertKernel>({NSLICES * GPUCA_ROW_COUNT, ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
672+
runKernel<GPUTPCConvertKernel>({NSLICES * GPUCA_ROW_COUNT, ConverterThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
673673
TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0);
674674
SynchronizeGPU();
675675

@@ -982,7 +982,7 @@ int GPUChainTracking::RunTPCClusterizer()
982982
continue;
983983
}
984984
if (mIOPtrs.tpcZS) {
985-
runKernel<GPUTPCCFDecodeZS, GPUTPCCFDecodeZS::decodeZS>({GPUTrackingInOutZS::NENDPOINTS, ClustererThreadCount(), lane}, {iSlice}, {});
985+
runKernel<GPUTPCCFDecodeZS, GPUTPCCFDecodeZS::decodeZS>({GPUTrackingInOutZS::NENDPOINTS, CFDecodeThreadCount(), lane}, {iSlice}, {});
986986
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
987987
SynchronizeStream(lane);
988988
}
@@ -1577,7 +1577,7 @@ int GPUChainTracking::RunTPCTrackingMerger()
15771577
TransferMemoryResourceLinkToGPU(RecoStep::TPCMerging, Merger.MemoryResRefit());
15781578
timerCopyToGPU.Stop();
15791579

1580-
runKernel<GPUTPCGMMergerTrackFit>({BlockCount(), ThreadCount(), 0}, krnlRunRangeNone);
1580+
runKernel<GPUTPCGMMergerTrackFit>({BlockCount(), FitThreadCount(), 0}, krnlRunRangeNone);
15811581
SynchronizeGPU();
15821582

15831583
timerCopyToHost.Start();
@@ -1631,8 +1631,8 @@ int GPUChainTracking::RunTPCCompression()
16311631
WriteToConstantMemory(myStep, (char*)&processors()->tpcCompressor - (char*)processors(), &CompressorShadow, sizeof(CompressorShadow), 0);
16321632
TransferMemoryResourcesToGPU(myStep, &Compressor, 0);
16331633
runKernel<GPUMemClean16>({BlockCount(), ThreadCount(), 0, RecoStep::TPCCompression}, krnlRunRangeNone, krnlEventNone, CompressorShadow.mClusterStatus, Compressor.mMaxClusters * sizeof(CompressorShadow.mClusterStatus[0]));
1634-
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step0attached>({BlockCount(), ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
1635-
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step1unattached>({BlockCount(), ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
1634+
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step0attached>({BlockCount(), Compression1ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
1635+
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step1unattached>({BlockCount(), Compression2ThreadCount(), 0}, krnlRunRangeNone, krnlEventNone);
16361636
TransferMemoryResourcesToHost(myStep, &Compressor, 0);
16371637
SynchronizeGPU();
16381638
memset((void*)&Compressor.mOutput, 0, sizeof(Compressor.mOutput));

0 commit comments

Comments
 (0)