@@ -63,16 +63,26 @@ class GPUCommonMath
6363 GPUhdni () static unsigned int Popcount (unsigned int val);
6464
6565 GPUhdni () static float Log (float x);
66- GPUd () static unsigned int AtomicExch (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val);
67- GPUd () static unsigned int AtomicAdd (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val);
68- GPUd () static void AtomicMax (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val);
69- GPUd () static void AtomicMin (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val);
70- GPUd () static unsigned int AtomicExchShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val);
71- GPUd () static unsigned int AtomicAddShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val);
72- GPUd () static void AtomicMaxShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val);
73- GPUd () static void AtomicMinShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val);
66+ GPUdi () static unsigned int AtomicExch (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return GPUCommonMath::AtomicExchInt (addr, val); }
67+ GPUdi () static unsigned int AtomicAdd (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return GPUCommonMath::AtomicAddInt (addr, val); }
68+ GPUdi () static void AtomicMax (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMaxInt (addr, val); }
69+ GPUdi () static void AtomicMin (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMinInt (addr, val); }
70+ GPUdi () static unsigned int AtomicExchShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return GPUCommonMath::AtomicExchInt (addr, val); }
71+ GPUdi () static unsigned int AtomicAddShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return GPUCommonMath::AtomicAddInt (addr, val); }
72+ GPUdi () static void AtomicMaxShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMaxInt (addr, val); }
73+ GPUdi () static void AtomicMinShared (GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMinInt (addr, val); }
7474 GPUd () static int Mul24 (int a, int b);
7575 GPUd () static float FMulRZ (float a, float b);
76+
77+ private:
78+ template <class S , class T >
79+ GPUd () static unsigned int AtomicExchInt (S* addr, T val);
80+ template <class S , class T >
81+ GPUd () static unsigned int AtomicAddInt (S* addr, T val);
82+ template <class S , class T >
83+ GPUd () static void AtomicMaxInt (S* addr, T val);
84+ template <class S , class T >
85+ GPUd () static void AtomicMinInt (S* addr, T val);
7686};
7787
7888typedef GPUCommonMath CAMath;
@@ -225,30 +235,13 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
225235#endif // GPUCA_GPUCODE
226236}
227237
228- #if defined(__OPENCL__) && (!defined(__OPENCLCPP__) || (defined(__clang__) && !defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)))
229- GPUdi () unsigned int GPUCommonMath::AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val)
230- {
231- return ::atomic_xchg (addr, val);
232- }
233- GPUdi () unsigned int GPUCommonMath::AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return ::atomic_add (addr, val); }
234- GPUdi () void GPUCommonMath::AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { ::atomic_max (addr, val); }
235- GPUdi () void GPUCommonMath::AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { ::atomic_min (addr, val); }
236- #else
237- GPUdi () unsigned int GPUCommonMath::AtomicExchShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val)
238- {
239- return GPUCommonMath::AtomicExch (addr, val);
240- }
241- GPUdi () unsigned int GPUCommonMath::AtomicAddShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { return GPUCommonMath::AtomicAdd (addr, val); }
242- GPUdi () void GPUCommonMath::AtomicMaxShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMax (addr, val); }
243- GPUdi () void GPUCommonMath::AtomicMinShared(GPUsharedref() GPUAtomic(unsigned int ) * addr, unsigned int val) { GPUCommonMath::AtomicMin (addr, val); }
244- #endif
245-
246238#ifndef GPUCA_GPUCODE
247239#pragma GCC diagnostic push
248240#pragma GCC diagnostic ignored "-Wunused-value" // GCC BUG in omp atomic capture gives false warning
249241#endif
250242
251- GPUdi () unsigned int GPUCommonMath::AtomicExch (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val)
243+ template <class S , class T >
244+ GPUdi () unsigned int GPUCommonMath::AtomicExchInt (S* addr, T val)
252245{
253246#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
254247 return ::atomic_exchange (addr, val);
@@ -269,7 +262,8 @@ GPUdi() unsigned int GPUCommonMath::AtomicExch(GPUglobalref() GPUAtomic(unsigned
269262#endif // GPUCA_GPUCODE
270263}
271264
272- GPUdi () unsigned int GPUCommonMath::AtomicAdd (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val)
265+ template <class S , class T >
266+ GPUdi () unsigned int GPUCommonMath::AtomicAddInt (S* addr, T val)
273267{
274268#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
275269 return ::atomic_fetch_add (addr, val);
@@ -290,7 +284,8 @@ GPUdi() unsigned int GPUCommonMath::AtomicAdd(GPUglobalref() GPUAtomic(unsigned
290284#endif // GPUCA_GPUCODE
291285}
292286
293- GPUdi () void GPUCommonMath::AtomicMax (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val)
287+ template <class S , class T >
288+ GPUdi () void GPUCommonMath::AtomicMaxInt (S* addr, T val)
294289{
295290#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
296291 ::atomic_fetch_max (addr, val);
@@ -309,7 +304,8 @@ GPUdi() void GPUCommonMath::AtomicMax(GPUglobalref() GPUAtomic(unsigned int) * a
309304#endif // GPUCA_GPUCODE
310305}
311306
312- GPUdi () void GPUCommonMath::AtomicMin (GPUglobalref() GPUAtomic(unsigned int ) * addr, unsigned int val)
307+ template <class S , class T >
308+ GPUdi () void GPUCommonMath::AtomicMinInt (S* addr, T val)
313309{
314310#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
315311 ::atomic_fetch_min (addr, val);
0 commit comments