@@ -201,7 +201,13 @@ GPUd() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1,
201201}
202202
203203struct sort_tracklets {
204- GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex < b.firstClusterIndex || (a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex < b.secondClusterIndex); }
204+ GPUhd() bool operator()(const Tracklet& a, const Tracklet& b)
205+ {
206+ if (a.firstClusterIndex != b.firstClusterIndex) {
207+ return a.firstClusterIndex < b.firstClusterIndex;
208+ }
209+ return a.secondClusterIndex < b.secondClusterIndex;
210+ }
205211};
206212
207213struct equal_tracklets {
@@ -263,23 +269,34 @@ struct compare_track_chi2 {
263269 }
264270};
265271
266- GPUd () gpuSpan<const Vertex> getPrimaryVertices(const int rof,
267- const int* roframesPV,
268- const int nROF,
269- const uint8_t* mask,
270- const Vertex* vertices)
272+ GPUdii () gpuSpan<const Vertex> getPrimaryVertices(const int rof,
273+ const int* roframesPV,
274+ const int nROF,
275+ const uint8_t* mask,
276+ const Vertex* vertices)
271277{
272278 const int start_pv_id = roframesPV[rof];
273279 const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
274- size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
280+ const size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
275281 return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
276282};
277283
278- GPUd() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
279- const int totROFs,
280- const int layer,
281- const int** roframesClus,
282- const Cluster** clusters)
284+ GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
285+ const int romax,
286+ const int* roframesPV,
287+ const int nROF,
288+ const Vertex* vertices)
289+ {
290+ const int start_pv_id = roframesPV[romin];
291+ const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
292+ return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
293+ };
294+
295+ GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
296+ const int totROFs,
297+ const int layer,
298+ const int** roframesClus,
299+ const Cluster** clusters)
283300{
284301 if (rof < 0 || rof >= totROFs) {
285302 return gpuSpan<const Cluster>();
@@ -360,6 +377,8 @@ GPUg() void computeLayerCellNeighboursKernel(
360377 int* neighboursIndexTable,
361378 int** cellsLUTs,
362379 gpuPair<int, int>* cellNeighbours,
380+ const Tracklet** tracklets,
381+ const int deltaROF,
363382 const float maxChi2ClusterAttachment,
364383 const float bz,
365384 const int layerIndex,
@@ -377,15 +396,29 @@ GPUg() void computeLayerCellNeighboursKernel(
377396 if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) { // Check if cells share the same tracklet
378397 break;
379398 }
399+
400+ if (deltaROF) {
401+ const auto& trkl00 = tracklets[layerIndex][currentCellSeed.getFirstTrackletIndex()];
402+ const auto& trkl01 = tracklets[layerIndex + 1][currentCellSeed.getSecondTrackletIndex()];
403+ const auto& trkl10 = tracklets[layerIndex + 1][nextCellSeed.getFirstTrackletIndex()];
404+ const auto& trkl11 = tracklets[layerIndex + 2][nextCellSeed.getSecondTrackletIndex()];
405+ if ((o2::gpu::CAMath::Max(trkl00.getMaxRof(), o2::gpu::CAMath::Max(trkl01.getMaxRof(), o2::gpu::CAMath::Max(trkl10.getMaxRof(), trkl11.getMaxRof()))) -
406+ o2::gpu::CAMath::Min(trkl00.getMinRof(), o2::gpu::CAMath::Min(trkl01.getMinRof(), o2::gpu::CAMath::Min(trkl10.getMinRof(), trkl11.getMinRof())))) > deltaROF) {
407+ continue;
408+ }
409+ }
410+
380411 if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
381412 !nextCellSeed.propagateTo(currentCellSeed.getX(), bz)) {
382413 continue;
383414 }
415+
384416 float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed);
385417 if (chi2 > maxChi2ClusterAttachment) /// TODO: switch to the chi2 wrt cluster to avoid correlation
386418 {
387419 continue;
388420 }
421+
389422 if constexpr (initRun) {
390423 atomicAdd(neighboursLUT + iNextCell, 1);
391424 neighboursIndexTable[iCurrentCellIndex]++;
@@ -412,6 +445,7 @@ GPUg() void computeLayerCellsKernel(
412445 const int layer,
413446 CellSeed* cells,
414447 int** cellsLUTs,
448+ const int deltaROF,
415449 const float bz,
416450 const float maxChi2ClusterAttachment,
417451 const float cellDeltaTanLambdaSigma,
@@ -432,6 +466,9 @@ GPUg() void computeLayerCellsKernel(
432466 break;
433467 }
434468 const Tracklet& nextTracklet = tracklets[layer + 1][iNextTrackletIndex];
469+ if (deltaROF && currentTracklet.getSpanRof(nextTracklet) > deltaROF) {
470+ continue;
471+ }
435472 const float deltaTanLambda{o2::gpu::CAMath::Abs(currentTracklet.tanLambda - nextTracklet.tanLambda)};
436473
437474 if (deltaTanLambda / cellDeltaTanLambdaSigma < nSigmaCut) {
@@ -515,9 +552,12 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
515552{
516553 const int phiBins{utils->getNphiBins()};
517554 const int zBins{utils->getNzBins()};
555+ const int tableSize{phiBins * zBins + 1};
518556 for (unsigned int iROF{blockIdx.x}; iROF < endROF - startROF; iROF += gridDim.x) {
519- const short rof0 = iROF + startROF;
520- auto primaryVertices = getPrimaryVertices(rof0, rofPV, totalROFs, multMask, vertices);
557+ const short pivotROF = iROF + startROF;
558+ const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(pivotROF - deltaROF));
559+ const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(pivotROF + deltaROF));
560+ auto primaryVertices = getPrimaryVertices(minROF, maxROF, rofPV, totalROFs, vertices);
521561 if (primaryVertices.empty()) {
522562 continue;
523563 }
@@ -526,17 +566,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
526566 if ((endVtx - startVtx) <= 0) {
527567 continue;
528568 }
529- const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
530- const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
531- auto clustersCurrentLayer = getClustersOnLayer(rof0, totalROFs, layerIndex, ROFClusters, clusters);
569+
570+ auto clustersCurrentLayer = getClustersOnLayer(pivotROF, totalROFs, layerIndex, ROFClusters, clusters);
532571 if (clustersCurrentLayer.empty()) {
533572 continue;
534573 }
535574
536575 for (int currentClusterIndex = threadIdx.x; currentClusterIndex < clustersCurrentLayer.size(); currentClusterIndex += blockDim.x) {
576+
537577 unsigned int storedTracklets{0};
538578 const auto& currentCluster{clustersCurrentLayer[currentClusterIndex]};
539- const int currentSortedIndex{ROFClusters[layerIndex][rof0 ] + currentClusterIndex};
579+ const int currentSortedIndex{ROFClusters[layerIndex][pivotROF ] + currentClusterIndex};
540580 if (usedClusters[layerIndex][currentCluster.clusterId]) {
541581 continue;
542582 }
@@ -564,18 +604,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
564604 phiBinsNum += phiBins;
565605 }
566606
567- const int tableSize{phiBins * zBins + 1};
568- for (short rof1{minROF}; rof1 <= maxROF; ++rof1) {
569- auto clustersNextLayer = getClustersOnLayer(rof1, totalROFs, layerIndex + 1, ROFClusters, clusters);
607+ for (short targetROF{minROF}; targetROF <= maxROF; ++targetROF) {
608+ auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs, layerIndex + 1, ROFClusters, clusters);
570609 if (clustersNextLayer.empty()) {
571610 continue;
572611 }
573612 for (int iPhiCount{0}; iPhiCount < phiBinsNum; iPhiCount++) {
574613 int iPhiBin = (selectedBinsRect.y + iPhiCount) % phiBins;
575614 const int firstBinIndex{utils->getBinIndex(selectedBinsRect.x, iPhiBin)};
576615 const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1};
577- const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1 )*tableSize + firstBinIndex];
578- const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1 )*tableSize + maxBinIndex];
616+ const int firstRowClusterIndex = indexTables[layerIndex + 1][(targetROF )*tableSize + firstBinIndex];
617+ const int maxRowClusterIndex = indexTables[layerIndex + 1][(targetROF )*tableSize + maxBinIndex];
579618 for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) {
580619 if (nextClusterIndex >= clustersNextLayer.size()) {
581620 break;
@@ -592,8 +631,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
592631 } else {
593632 const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)};
594633 const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
595- const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1 ] + nextClusterIndex};
596- new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1 };
634+ const int nextSortedIndex{ROFClusters[layerIndex + 1][targetROF ] + nextClusterIndex};
635+ new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, pivotROF, targetROF };
597636 }
598637 ++storedTracklets;
599638 }
@@ -1018,6 +1057,7 @@ void countCellsHandler(
10181057 CellSeed* cells,
10191058 int** cellsLUTsArrayDevice,
10201059 int* cellsLUTsHost,
1060+ const int deltaROF,
10211061 const float bz,
10221062 const float maxChi2ClusterAttachment,
10231063 const float cellDeltaTanLambdaSigma,
@@ -1035,6 +1075,7 @@ void countCellsHandler(
10351075 layer, // const int
10361076 cells, // CellSeed*
10371077 cellsLUTsArrayDevice, // int**
1078+ deltaROF, // const int
10381079 bz, // const float
10391080 maxChi2ClusterAttachment, // const float
10401081 cellDeltaTanLambdaSigma, // const float
@@ -1053,6 +1094,7 @@ void computeCellsHandler(
10531094 CellSeed* cells,
10541095 int** cellsLUTsArrayDevice,
10551096 int* cellsLUTsHost,
1097+ const int deltaROF,
10561098 const float bz,
10571099 const float maxChi2ClusterAttachment,
10581100 const float cellDeltaTanLambdaSigma,
@@ -1070,6 +1112,7 @@ void computeCellsHandler(
10701112 layer, // const int
10711113 cells, // CellSeed*
10721114 cellsLUTsArrayDevice, // int**
1115+ deltaROF, // const int
10731116 bz, // const float
10741117 maxChi2ClusterAttachment, // const float
10751118 cellDeltaTanLambdaSigma, // const float
@@ -1081,6 +1124,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10811124 int** cellsLUTs,
10821125 gpuPair<int, int>* cellNeighbours,
10831126 int* neighboursIndexTable,
1127+ const Tracklet** tracklets,
1128+ const int deltaROF,
10841129 const float maxChi2ClusterAttachment,
10851130 const float bz,
10861131 const int layerIndex,
@@ -1096,12 +1141,13 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10961141 neighboursIndexTable,
10971142 cellsLUTs,
10981143 cellNeighbours,
1144+ tracklets,
1145+ deltaROF,
10991146 maxChi2ClusterAttachment,
11001147 bz,
11011148 layerIndex,
11021149 nCells,
11031150 maxCellNeighbours);
1104-
11051151 gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext);
11061152 gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1);
11071153 unsigned int nNeighbours;
@@ -1114,6 +1160,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11141160 int** cellsLUTs,
11151161 gpuPair<int, int>* cellNeighbours,
11161162 int* neighboursIndexTable,
1163+ const Tracklet** tracklets,
1164+ const int deltaROF,
11171165 const float maxChi2ClusterAttachment,
11181166 const float bz,
11191167 const int layerIndex,
@@ -1130,6 +1178,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11301178 neighboursIndexTable,
11311179 cellsLUTs,
11321180 cellNeighbours,
1181+ tracklets,
1182+ deltaROF,
11331183 maxChi2ClusterAttachment,
11341184 bz,
11351185 layerIndex,
0 commit comments