Skip to content

Commit 4ff9d29

Browse files
Jorge Paradaktf
authored andcommitted
GPU: Add GPUdii() macro for force-inline on GPU, and apply to all ::Thread functions
1 parent 31d5268 commit 4ff9d29

20 files changed

+33
-29
lines changed

GPU/Common/GPUCommonDefAPI.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define GPUd()
2727
#define GPUdDefault()
2828
#define GPUdi() inline
29+
#define GPUdii()
2930
#define GPUh()
3031
#define GPUhi() inline
3132
#define GPUhd()
@@ -59,6 +60,7 @@
5960
#define GPUd()
6061
#define GPUdDefault()
6162
#define GPUdi() inline
63+
#define GPUdii() inline
6264
#define GPUh() INVALID_TRIGGER_ERROR_NO_HOST_CODE
6365
#define GPUhi() INVALID_TRIGGER_ERROR_NO_HOST_CODE
6466
#define GPUhd() inline
@@ -100,6 +102,7 @@
100102
#define GPUd() __device__
101103
#define GPUdDefault()
102104
#define GPUdi() __device__ inline
105+
#define GPUdii() __device__ inline
103106
#define GPUh() __host__ inline
104107
#define GPUhi() __host__ inline
105108
#define GPUhd() __host__ __device__ inline
@@ -118,6 +121,7 @@
118121
#define GPUd() __device__
119122
#define GPUdDefault() __device__
120123
#define GPUdi() __device__ inline
124+
#define GPUdii() __device__ __forceinline__
121125
#define GPUh() __host__ inline
122126
#define GPUhi() __host__ inline
123127
#define GPUhd() __host__ __device__ inline

GPU/GPUTracking/Base/GPUGeneralKernels.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
using namespace GPUCA_NAMESPACE::gpu;
1616

1717
template <>
18-
GPUd() void GPUMemClean16::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() void* ptr, unsigned long size)
18+
GPUdii() void GPUMemClean16::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() void* ptr, unsigned long size)
1919
{
2020
const unsigned long stride = get_global_size(0);
2121
int4 i0;

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ using namespace GPUCA_NAMESPACE::gpu;
2424
using namespace o2::tpc;
2525

2626
template <>
27-
GPUd() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0attached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
27+
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0attached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
2828
{
2929
GPUTPCGMMerger& merger = processors.tpcMerger;
3030
const o2::tpc::ClusterNativeAccess* clusters = processors.tpcConverter.getClustersNative();
@@ -168,7 +168,7 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::opera
168168
}
169169

170170
template <>
171-
GPUd() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1unattached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
171+
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1unattached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
172172
{
173173
GPUTPCGMMerger& merger = processors.tpcMerger;
174174
const o2::tpc::ClusterNativeAccess* clusters = processors.tpcConverter.getClustersNative();

GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ using namespace GPUCA_NAMESPACE::gpu;
2929
using namespace o2;
3030
using namespace o2::its;
3131

32-
GPUd() bool GPUITSFitterKernel::fitTrack(GPUITSFitter& Fitter, GPUTPCGMPropagator& prop, GPUITSTrack& track, int start, int end, int step)
32+
GPUdii() bool GPUITSFitterKernel::fitTrack(GPUITSFitter& Fitter, GPUTPCGMPropagator& prop, GPUITSTrack& track, int start, int end, int step)
3333
{
3434
for (int iLayer{start}; iLayer != end; iLayer += step) {
3535
if (track.mClusters[iLayer] == o2::its::constants::its::UnusedIndex) {
@@ -55,7 +55,7 @@ GPUd() bool GPUITSFitterKernel::fitTrack(GPUITSFitter& Fitter, GPUTPCGMPropagato
5555
}
5656

5757
template <>
58-
GPUd() void GPUITSFitterKernel::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
58+
GPUdii() void GPUITSFitterKernel::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
5959
{
6060
GPUITSFitter& Fitter = processors.itsFitter;
6161

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
using namespace GPUCA_NAMESPACE::gpu;
2020

2121
template <>
22-
GPUd() void GPUTPCGMMergerTrackFit::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger)
22+
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger)
2323
{
2424
#if defined(WITH_OPENMP) && !defined(GPUCA_GPUCODE)
2525
#pragma omp parallel for num_threads(merger.GetRec().GetDeviceProcessingSettings().nThreads)

GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
using namespace GPUCA_NAMESPACE::gpu;
1818

1919
template <>
20-
GPUd() void GPUTPCNeighboursCleaner::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
20+
GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
2121
{
2222
// *
2323
// * kill link to the neighbour if the neighbour is not pointed to the cluster

GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
using namespace GPUCA_NAMESPACE::gpu;
2020

2121
template <>
22-
GPUd() void GPUTPCNeighboursFinder::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
22+
GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
2323
{
2424
//* find neighbours
2525

GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
using namespace GPUCA_NAMESPACE::gpu;
1919

2020
template <>
21-
GPUd() void GPUTPCStartHitsFinder::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
21+
GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int /*nBlocks*/, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
2222
{
2323
// find start hits for tracklets
2424

GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
using namespace GPUCA_NAMESPACE::gpu;
1818

1919
template <>
20-
GPUd() void GPUTPCStartHitsSorter::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
20+
GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& tracker)
2121
{
2222
// Sorts the Start Hits by Row Index
2323
if (iThread == 0) {

GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
using namespace GPUCA_NAMESPACE::gpu;
2727

2828
MEM_CLASS_PRE2()
29-
GPUd() void GPUTPCTrackletConstructor::InitTracklet(MEM_LG2(GPUTPCTrackParam) & tParam)
29+
GPUdii() void GPUTPCTrackletConstructor::InitTracklet(MEM_LG2(GPUTPCTrackParam) & tParam)
3030
{
3131
// Initialize Tracklet Parameters using default values
3232
tParam.InitParam();
@@ -387,7 +387,7 @@ GPUd() void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLOBAL(GP
387387
}
388388

389389
template <>
390-
GPUd() void GPUTPCTrackletConstructor::Thread<GPUTPCTrackletConstructor::singleSlice>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& tracker)
390+
GPUdii() void GPUTPCTrackletConstructor::Thread<GPUTPCTrackletConstructor::singleSlice>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& tracker)
391391
{
392392
if (get_local_id(0) == 0) {
393393
sMem.mNTracklets = *tracker.NTracklets();
@@ -403,7 +403,7 @@ GPUd() void GPUTPCTrackletConstructor::Thread<GPUTPCTrackletConstructor::singleS
403403
}
404404

405405
template <>
406-
GPUd() void GPUTPCTrackletConstructor::Thread<GPUTPCTrackletConstructor::allSlices>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& tracker0)
406+
GPUdii() void GPUTPCTrackletConstructor::Thread<GPUTPCTrackletConstructor::allSlices>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& tracker0)
407407
{
408408
#ifdef GPUCA_GPUCODE
409409
GPUconstantref() MEM_GLOBAL(GPUTPCTracker)* pTracker = &tracker0;

0 commit comments

Comments
 (0)