Skip to content

Commit 1baee2a

Browse files
committed
GPU: Initial work to support attributed pointers in HIP, for now disabled due to compilation problems
1 parent cd5ddd9 commit 1baee2a

File tree

7 files changed

+71
-67
lines changed

7 files changed

+71
-67
lines changed

GPU/Common/GPUCommonDefAPI.h

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#define GPUconstexpr() __constant
7474
#define GPUprivate() __private
7575
#define GPUgeneric() __generic
76+
#define GPUconstexprref() GPUconstexpr()
7677
#if defined(__OPENCLCPP__) && !defined(__clang__)
7778
#define GPUbarrier() work_group_barrier(mem_fence::global | mem_fence::local);
7879
#define GPUAtomic(type) atomic<type>
@@ -98,6 +99,15 @@
9899
#define CONSTEXPR const
99100
#endif
100101
#endif
102+
#if !defined(__OPENCLCPP__) // Other special defines for OpenCL 1
103+
#define GPUCA_USE_TEMPLATE_ADDRESS_SPACES // TODO: check if we can make this (partially, where it is already implemented) compatible with OpenCL CPP
104+
#define GPUsharedref() GPUshared()
105+
#define GPUglobalref() GPUglobal()
106+
#endif
107+
#if (!defined(__OPENCLCPP__) || !defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY))
108+
#define GPUconstantref() GPUconstant()
109+
#endif
110+
101111
#elif defined(__CUDACC__) //Defines for CUDA
102112
#define GPUd() __device__
103113
#define GPUdDefault()
@@ -129,7 +139,15 @@
129139
#define GPUhdni() __host__ __device__
130140
#define GPUg() __global__
131141
#define GPUshared() __shared__
132-
#define GPUglobal()
142+
#if defined(GPUCA_GPUCODE_DEVICE) && 0 // TODO: Fix for HIP
143+
#define GPUCA_USE_TEMPLATE_ADDRESS_SPACES
144+
#define GPUglobal() __attribute__((address_space(1)))
145+
#define GPUglobalref() GPUglobal()
146+
#define GPUconstantref() __attribute__((address_space(4)))
147+
#define GPUsharedref() __attribute__((address_space(3)))
148+
#else
149+
#define GPUglobal()
150+
#endif
133151
#define GPUconstant()
134152
#define GPUconstexpr() __constant__
135153
#define GPUprivate()
@@ -143,22 +161,17 @@
143161
#define GPUconstant() GPUglobal()
144162
#endif
145163

146-
#if defined(__OPENCL__) && !defined(__OPENCLCPP__) // Other special defines for OpenCL
147-
#define GPUsharedref() GPUshared()
148-
#define GPUglobalref() GPUglobal()
149-
#else //Other defines for the rest
150-
#define GPUsharedref()
151-
#define GPUglobalref()
164+
#ifndef GPUsharedref
165+
#define GPUsharedref()
152166
#endif
153-
#if defined(__OPENCL__) // OpenCL 2 cannot cast __constant to __generic
154-
#define GPUconstexprref() GPUconstexpr()
155-
#else
156-
#define GPUconstexprref()
167+
#ifndef GPUglobalref
168+
#define GPUglobalref()
157169
#endif
158-
#if defined(__OPENCL__) && (!defined(__OPENCLCPP__) || !defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY))
159-
#define GPUconstantref() GPUconstant()
160-
#else
161-
#define GPUconstantref()
170+
#ifndef GPUconstantref
171+
#define GPUconstantref()
172+
#endif
173+
#ifndef GPUconstexprref
174+
#define GPUconstexprref()
162175
#endif
163176

164177
// Macros for GRID dimension

GPU/Common/GPUCommonMath.h

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,26 @@ class GPUCommonMath
6363
GPUhdni() static unsigned int Popcount(unsigned int val);
6464

6565
GPUhdni() static float Log(float x);
66-
GPUd() static unsigned int AtomicExch(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val);
67-
GPUd() static unsigned int AtomicAdd(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val);
68-
GPUd() static void AtomicMax(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val);
69-
GPUd() static void AtomicMin(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val);
70-
GPUd() static unsigned int AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val);
71-
GPUd() static unsigned int AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val);
72-
GPUd() static void AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val);
73-
GPUd() static void AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val);
66+
GPUdi() static unsigned int AtomicExch(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val) { return GPUCommonMath::AtomicExchInt(addr, val); }
67+
GPUdi() static unsigned int AtomicAdd(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val) { return GPUCommonMath::AtomicAddInt(addr, val); }
68+
GPUdi() static void AtomicMax(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMaxInt(addr, val); }
69+
GPUdi() static void AtomicMin(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMinInt(addr, val); }
70+
GPUdi() static unsigned int AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { return GPUCommonMath::AtomicExchInt(addr, val); }
71+
GPUdi() static unsigned int AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { return GPUCommonMath::AtomicAddInt(addr, val); }
72+
GPUdi() static void AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMaxInt(addr, val); }
73+
GPUdi() static void AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMinInt(addr, val); }
7474
GPUd() static int Mul24(int a, int b);
7575
GPUd() static float FMulRZ(float a, float b);
76+
77+
private:
78+
template <class S, class T>
79+
GPUd() static unsigned int AtomicExchInt(S* addr, T val);
80+
template <class S, class T>
81+
GPUd() static unsigned int AtomicAddInt(S* addr, T val);
82+
template <class S, class T>
83+
GPUd() static void AtomicMaxInt(S* addr, T val);
84+
template <class S, class T>
85+
GPUd() static void AtomicMinInt(S* addr, T val);
7686
};
7787

7888
typedef GPUCommonMath CAMath;
@@ -225,30 +235,13 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
225235
#endif // GPUCA_GPUCODE
226236
}
227237

228-
#if defined(__OPENCL__) && (!defined(__OPENCLCPP__) || (defined(__clang__) && !defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)))
229-
GPUdi() unsigned int GPUCommonMath::AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val)
230-
{
231-
return ::atomic_xchg(addr, val);
232-
}
233-
GPUdi() unsigned int GPUCommonMath::AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { return ::atomic_add(addr, val); }
234-
GPUdi() void GPUCommonMath::AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { ::atomic_max(addr, val); }
235-
GPUdi() void GPUCommonMath::AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { ::atomic_min(addr, val); }
236-
#else
237-
GPUdi() unsigned int GPUCommonMath::AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val)
238-
{
239-
return GPUCommonMath::AtomicExch(addr, val);
240-
}
241-
GPUdi() unsigned int GPUCommonMath::AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { return GPUCommonMath::AtomicAdd(addr, val); }
242-
GPUdi() void GPUCommonMath::AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMax(addr, val); }
243-
GPUdi() void GPUCommonMath::AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int) * addr, unsigned int val) { GPUCommonMath::AtomicMin(addr, val); }
244-
#endif
245-
246238
#ifndef GPUCA_GPUCODE
247239
#pragma GCC diagnostic push
248240
#pragma GCC diagnostic ignored "-Wunused-value" // GCC BUG in omp atomic capture gives false warning
249241
#endif
250242

251-
GPUdi() unsigned int GPUCommonMath::AtomicExch(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val)
243+
template <class S, class T>
244+
GPUdi() unsigned int GPUCommonMath::AtomicExchInt(S* addr, T val)
252245
{
253246
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
254247
return ::atomic_exchange(addr, val);
@@ -269,7 +262,8 @@ GPUdi() unsigned int GPUCommonMath::AtomicExch(GPUglobalref() GPUAtomic(unsigned
269262
#endif // GPUCA_GPUCODE
270263
}
271264

272-
GPUdi() unsigned int GPUCommonMath::AtomicAdd(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val)
265+
template <class S, class T>
266+
GPUdi() unsigned int GPUCommonMath::AtomicAddInt(S* addr, T val)
273267
{
274268
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
275269
return ::atomic_fetch_add(addr, val);
@@ -290,7 +284,8 @@ GPUdi() unsigned int GPUCommonMath::AtomicAdd(GPUglobalref() GPUAtomic(unsigned
290284
#endif // GPUCA_GPUCODE
291285
}
292286

293-
GPUdi() void GPUCommonMath::AtomicMax(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val)
287+
template <class S, class T>
288+
GPUdi() void GPUCommonMath::AtomicMaxInt(S* addr, T val)
294289
{
295290
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
296291
::atomic_fetch_max(addr, val);
@@ -309,7 +304,8 @@ GPUdi() void GPUCommonMath::AtomicMax(GPUglobalref() GPUAtomic(unsigned int) * a
309304
#endif // GPUCA_GPUCODE
310305
}
311306

312-
GPUdi() void GPUCommonMath::AtomicMin(GPUglobalref() GPUAtomic(unsigned int) * addr, unsigned int val)
307+
template <class S, class T>
308+
GPUdi() void GPUCommonMath::AtomicMinInt(S* addr, T val)
313309
{
314310
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
315311
::atomic_fetch_min(addr, val);

GPU/Common/GPUDefOpenCL12Templates.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@
2323

2424
//Special macros for OpenCL rev. 1.2 (encode address space in template parameter)
2525
enum LocalOrGlobal { Mem_Local, Mem_Global, Mem_Constant, Mem_Plain };
26-
#if defined(__OPENCL__) && !defined(__OPENCLCPP__)
26+
#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_USE_TEMPLATE_ADDRESS_SPACES)
2727
template<LocalOrGlobal, typename L, typename G, typename C, typename P> struct MakeTypeHelper;
2828
template<typename L, typename G, typename C, typename P> struct MakeTypeHelper<Mem_Local, L, G, C, P> { typedef L type; };
2929
template<typename L, typename G, typename C, typename P> struct MakeTypeHelper<Mem_Global, L, G, C, P> { typedef G type; };
3030
template<typename L, typename G, typename C, typename P> struct MakeTypeHelper<Mem_Constant, L, G, C, P> { typedef C type; };
3131
template<typename L, typename G, typename C, typename P> struct MakeTypeHelper<Mem_Plain, L, G, C, P> { typedef P type; };
32-
#define MakeType(base_type) typename MakeTypeHelper<LG, GPUshared() base_type, GPUglobalref() base_type, GPUconstant() base_type, base_type>::type
32+
#ifdef __HIPCC__
33+
#define MakeType(base_type) typename MakeTypeHelper<LG, GPUsharedref() base_type, GPUglobalref() base_type, GPUconstantref() base_type, base_type>::type
34+
#else
35+
#define MakeType(base_type) typename MakeTypeHelper<LG, GPUshared() base_type, GPUglobalref() base_type, GPUconstant() base_type, base_type>::type
36+
#endif
3337
#define MEM_CLASS_PRE() template<LocalOrGlobal LG>
3438
#define MEM_CLASS_PRE_TEMPLATE(t) template<LocalOrGlobal LG, t>
3539
#define MEM_LG(type) type<LG>

GPU/GPUTracking/Base/GPUProcessor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class GPUProcessor
5555
{
5656
return mConstantMem;
5757
}
58-
GPUd() GPUconstantref() const MEM_CONSTANT(GPUParam) & Param() const;
58+
GPUd() GPUconstantref() const MEM_CONSTANT(GPUParam) & Param() const; // Body in GPUConstantMem.h to avoid circular headers
5959
const GPUReconstruction& GetRec() const { return *mRec; }
6060

6161
#ifndef __OPENCL__

GPU/GPUTracking/SliceTracker/GPUTPCDef.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ namespace gpu
2525
#if defined(GPUCA_O2_LIB) || defined(GPUCA_O2_INTERFACE)
2626
typedef unsigned int calink;
2727
typedef unsigned int cahit;
28+
typedef uint2 cahit2;
2829
#else
2930
typedef unsigned int calink;
3031
typedef unsigned int cahit;
32+
typedef uint2 cahit2;
3133
#endif
32-
33-
struct cahit2{cahit x, y;};
3434
}
3535
} // GPUCA_NAMESPACE::GPU
3636

GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -133,21 +133,12 @@ class GPUTPCSliceData
133133

134134
GPUhdi() GPUglobalref() GPUAtomic(unsigned int) * HitWeights() const { return (mHitWeights); }
135135

136-
GPUhdi() void SetGPUTextureBase(const void* val) { mGPUTextureBase = val; }
136+
GPUhdi() void SetGPUTextureBase(GPUglobalref() const void* val) { mGPUTextureBase = val; }
137137
GPUhdi() char* GPUTextureBase() const { return ((char*)mGPUTextureBase); }
138138
GPUhdi() char* GPUTextureBaseConst() const { return ((char*)mGPUTextureBase); }
139139

140-
#if !defined(__OPENCL__)
141-
GPUhdi() const GPUTPCClusterData* ClusterData() const
142-
{
143-
return mClusterData;
144-
}
145-
#endif
146-
147-
float MaxZ() const
148-
{
149-
return mMaxZ;
150-
}
140+
GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mClusterData; }
141+
float MaxZ() const { return mMaxZ; }
151142

152143
private:
153144
#ifndef GPUCA_GPUCODE

GPU/GPUTracking/SliceTracker/GPUTPCTracker.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,11 @@ class GPUTPCTracker : public GPUProcessor
9696

9797
int ReadEvent();
9898

99-
GPUh() const GPUTPCClusterData* ClusterData() const { return mData.ClusterData(); }
99+
GPUh() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mData.ClusterData(); }
100100

101101
GPUh() MakeType(const MEM_LG(GPUTPCRow) &) Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); }
102102

103-
GPUhd() GPUTPCSliceOutput* Output() const { return mOutput; }
103+
GPUhd() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; }
104104
#endif
105105
GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const
106106
{
@@ -214,11 +214,11 @@ class GPUTPCTracker : public GPUProcessor
214214

215215
GPUhd() GPUglobalref() GPUAtomic(unsigned int) * NTracklets() const { return &mCommonMem->nTracklets; }
216216

217-
GPUhd() const GPUTPCHitId& TrackletStartHit(int i) const { return mTrackletStartHits[i]; }
217+
GPUhd() GPUglobalref() const GPUTPCHitId& TrackletStartHit(int i) const { return mTrackletStartHits[i]; }
218218
GPUhd() GPUglobalref() GPUTPCHitId* TrackletStartHits() const { return mTrackletStartHits; }
219219
GPUhd() GPUglobalref() GPUTPCHitId* TrackletTmpStartHits() const { return mTrackletTmpStartHits; }
220220
MEM_CLASS_PRE2()
221-
GPUhd() const MEM_LG2(GPUTPCTracklet) & Tracklet(int i) const { return mTracklets[i]; }
221+
GPUhd() GPUglobalref() const MEM_LG2(GPUTPCTracklet) & Tracklet(int i) const { return mTracklets[i]; }
222222
GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) * Tracklets() const { return mTracklets; }
223223
GPUhd() GPUglobalref() calink* TrackletRowHits() const { return mTrackletRowHits; }
224224

@@ -236,7 +236,7 @@ class GPUTPCTracker : public GPUProcessor
236236
return (&mGPUParametersConst);
237237
}
238238
GPUhd() MakeType(MEM_LG(const StructGPUParametersConst) *) GetGPUParametersConst() const { return (&mGPUParametersConst); }
239-
GPUhd() void SetGPUTextureBase(const void* val) { mData.SetGPUTextureBase(val); }
239+
GPUhd() void SetGPUTextureBase(GPUglobalref() const void* val) { mData.SetGPUTextureBase(val); }
240240

241241
struct trackSortData {
242242
int fTtrack; // Track ID

0 commit comments

Comments
 (0)