@@ -97,21 +97,22 @@ void trackleterKernelHost(
9797 }
9898}
9999
100- void trackletSelectionKernelHost (
100+ static void trackletSelectionKernelHost (
101101 const gsl::span<const Cluster> clusters0, // 0
102102 const gsl::span<const Cluster> clusters1, // 1
103103 gsl::span<unsigned char > usedClusters0, // Layer 0
104104 gsl::span<unsigned char > usedClusters2, // Layer 2
105105 const gsl::span<const Tracklet>& tracklets01,
106106 const gsl::span<const Tracklet>& tracklets12,
107- bounded_vector<uint8_t >& usedTracklets,
107+ bounded_vector<bool >& usedTracklets,
108108 const gsl::span<int > foundTracklets01,
109109 const gsl::span<int > foundTracklets12,
110110 bounded_vector<Line>& lines,
111111 const gsl::span<const o2::MCCompLabel>& trackletLabels,
112112 bounded_vector<o2::MCCompLabel>& linesLabels,
113- const short pivotRofId,
114- const short targetRofId,
113+ const short targetRofId0,
114+ const short targetRofId2,
115+ bool safeWrites = false ,
115116 const float tanLambdaCut = 0 .025f ,
116117 const float phiCut = 0 .005f ,
117118 const int maxTracklets = static_cast <int >(1e2 ))
@@ -121,16 +122,27 @@ void trackletSelectionKernelHost(
121122 int validTracklets{0 };
122123 for (int iTracklet12{offset12}; iTracklet12 < offset12 + foundTracklets12[iCurrentLayerClusterIndex]; ++iTracklet12) {
123124 for (int iTracklet01{offset01}; iTracklet01 < offset01 + foundTracklets01[iCurrentLayerClusterIndex]; ++iTracklet01) {
125+ if (usedTracklets[iTracklet01]) {
126+ continue ;
127+ }
128+
124129 const auto & tracklet01{tracklets01[iTracklet01]};
125130 const auto & tracklet12{tracklets12[iTracklet12]};
126- if (tracklet01.rof [0 ] != targetRofId || tracklet12.rof [1 ] != targetRofId) {
131+
132+ if (tracklet01.rof [0 ] != targetRofId0 || tracklet12.rof [1 ] != targetRofId2) {
127133 continue ;
128134 }
135+
129136 const float deltaTanLambda{o2::gpu::GPUCommonMath::Abs (tracklet01.tanLambda - tracklet12.tanLambda )};
130137 const float deltaPhi{o2::gpu::GPUCommonMath::Abs (math_utils::smallestAngleDifference (tracklet01.phi , tracklet12.phi ))};
131- if (!usedTracklets[iTracklet01] && deltaTanLambda < tanLambdaCut && deltaPhi < phiCut && validTracklets != maxTracklets) {
132- usedClusters0[tracklet01.firstClusterIndex ] = true ;
133- usedClusters2[tracklet12.secondClusterIndex ] = true ;
138+ if (deltaTanLambda < tanLambdaCut && deltaPhi < phiCut && validTracklets != maxTracklets) {
139+ if (safeWrites) {
140+ __atomic_store_n (&usedClusters0[tracklet01.firstClusterIndex ], 1 , __ATOMIC_RELAXED);
141+ __atomic_store_n (&usedClusters2[tracklet12.secondClusterIndex ], 1 , __ATOMIC_RELAXED);
142+ } else {
143+ usedClusters0[tracklet01.firstClusterIndex ] = 1 ;
144+ usedClusters2[tracklet12.secondClusterIndex ] = 1 ;
145+ }
134146 usedTracklets[iTracklet01] = true ;
135147 lines.emplace_back (tracklet01, clusters0.data (), clusters1.data ());
136148 if (!trackletLabels.empty ()) {
@@ -330,27 +342,37 @@ void VertexerTraits::computeTrackletMatching(const int iteration)
330342 continue ;
331343 }
332344 mTimeFrame ->getLines (pivotRofId).reserve (mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ).size ());
333- bounded_vector<uint8_t > usedTracklets (mTimeFrame ->getFoundTracklets (pivotRofId, 0 ).size (), false , mMemoryPool .get ());
345+ bounded_vector<bool > usedTracklets (mTimeFrame ->getFoundTracklets (pivotRofId, 0 ).size (), false , mMemoryPool .get ());
334346 short startROF{std::max ((short )0 , static_cast <short >(pivotRofId - mVrtParams [iteration].deltaRof ))};
335347 short endROF{std::min (static_cast <short >(mTimeFrame ->getNrof ()), static_cast <short >(pivotRofId + mVrtParams [iteration].deltaRof + 1 ))};
336- for (short targetRofId = startROF; targetRofId < endROF; ++targetRofId) {
337- trackletSelectionKernelHost (
338- mTimeFrame ->getClustersOnLayer (targetRofId, 0 ),
339- mTimeFrame ->getClustersOnLayer (pivotRofId, 1 ),
340- mTimeFrame ->getUsedClustersROF (targetRofId, 0 ),
341- mTimeFrame ->getUsedClustersROF (targetRofId, 2 ),
342- mTimeFrame ->getFoundTracklets (pivotRofId, 0 ),
343- mTimeFrame ->getFoundTracklets (pivotRofId, 1 ),
344- usedTracklets,
345- mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ),
346- mTimeFrame ->getNTrackletsCluster (pivotRofId, 1 ),
347- mTimeFrame ->getLines (pivotRofId),
348- mTimeFrame ->getLabelsFoundTracklets (pivotRofId, 0 ),
349- mTimeFrame ->getLinesLabel (pivotRofId),
350- pivotRofId,
351- targetRofId,
352- mVrtParams [iteration].tanLambdaCut ,
353- mVrtParams [iteration].phiCut );
348+
349+ // needed only if multi-threaded using deltaRof and only at the overlap edges of the ranges
350+ bool safeWrite = mTaskArena ->max_concurrency () > 1 && mVrtParams [iteration].deltaRof != 0 && ((Rofs.begin () - startROF < 0 ) || (endROF - Rofs.end () > 0 ));
351+
352+ for (short targetRofId0 = startROF; targetRofId0 < endROF; ++targetRofId0) {
353+ for (short targetRofId2 = startROF; targetRofId2 < endROF; ++targetRofId2) {
354+ if (std::abs (targetRofId0 - targetRofId2) > mVrtParams [iteration].deltaRof ) { // do not allow over 3 ROFs
355+ continue ;
356+ }
357+ trackletSelectionKernelHost (
358+ mTimeFrame ->getClustersOnLayer (targetRofId0, 0 ),
359+ mTimeFrame ->getClustersOnLayer (pivotRofId, 1 ),
360+ mTimeFrame ->getUsedClustersROF (targetRofId0, 0 ),
361+ mTimeFrame ->getUsedClustersROF (targetRofId2, 2 ),
362+ mTimeFrame ->getFoundTracklets (pivotRofId, 0 ),
363+ mTimeFrame ->getFoundTracklets (pivotRofId, 1 ),
364+ usedTracklets,
365+ mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ),
366+ mTimeFrame ->getNTrackletsCluster (pivotRofId, 1 ),
367+ mTimeFrame ->getLines (pivotRofId),
368+ mTimeFrame ->getLabelsFoundTracklets (pivotRofId, 0 ),
369+ mTimeFrame ->getLinesLabel (pivotRofId),
370+ targetRofId0,
371+ targetRofId2,
372+ safeWrite,
373+ mVrtParams [iteration].tanLambdaCut ,
374+ mVrtParams [iteration].phiCut );
375+ }
354376 }
355377 }
356378 });
0 commit comments