Skip to content

Commit 63bfc6d

Browse files
committed
GPU: Add option to register all temporary input memory used by the standalone benchmark
1 parent e814b0a commit 63bfc6d

File tree

8 files changed

+27
-3
lines changed

8 files changed

+27
-3
lines changed

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ inline void GPUReconstruction::AllocateIOMemoryHelper(unsigned int n, const T*&
342342
}
343343
u.reset(new T[n]);
344344
ptr = u.get();
345+
if (mDeviceProcessingSettings.registerStandaloneInputMemory) {
346+
registerMemoryForGPU(u.get(), n * sizeof(T));
347+
}
345348
}
346349

347350
template <class T, typename... Args>

GPU/GPUTracking/Base/GPUReconstructionConvert.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ void GPUReconstructionConvert::ZSstreamOut(unsigned short* bufIn, unsigned int&
180180
lenIn = 0;
181181
}
182182

183-
void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, const GPUTrackingInOutZS*& out, const GPUParam& param, bool zs12bit)
183+
void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, GPUTrackingInOutZS*& out, const GPUParam& param, bool zs12bit)
184184
{
185185
#ifdef GPUCA_TPC_GEOMETRY_O2
186186
static std::vector<std::array<long long int, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long long int)>> buffer[NSLICES][GPUTrackingInOutZS::NENDPOINTS];

GPU/GPUTracking/Base/GPUReconstructionConvert.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class GPUReconstructionConvert
4848
constexpr static unsigned int NSLICES = GPUCA_NSLICES;
4949
static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr<GPUTPCClusterData[]>* clusters, unsigned int* nClusters, const TPCFastTransform* transform, int continuousMaxTimeBin = 0);
5050
static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr<o2::tpc::ClusterNative[]>& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, unsigned int* nRawClusters);
51-
static void RunZSEncoder(const GPUTrackingInOutDigits* in, const GPUTrackingInOutZS*& out, const GPUParam& param, bool zs12bit);
51+
static void RunZSEncoder(const GPUTrackingInOutDigits* in, GPUTrackingInOutZS*& out, const GPUParam& param, bool zs12bit);
5252
static void RunZSFilter(std::unique_ptr<deprecated::PackedDigit[]>* buffers, const deprecated::PackedDigit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit);
5353
static int GetMaxTimeBin(const o2::tpc::ClusterNativeAccess& native);
5454
static int GetMaxTimeBin(const GPUTrackingInOutDigits& digits);

GPU/GPUTracking/Base/GPUSettings.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,5 @@ void GPUSettingsDeviceProcessing::SetDefaults()
8787
trackletSelectorInPipeline = false;
8888
forceMemoryPoolSize = 0;
8989
nTPCClustererLanes = 3;
90+
registerStandaloneInputMemory = false;
9091
}

GPU/GPUTracking/Base/GPUSettings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct GPUSettingsDeviceProcessing {
143143
size_t forceMemoryPoolSize; // Override size of memory pool to be allocated on GPU / Host (set =1 to force allocating all device memory, if supported)
144144
int nTPCClustererLanes; // Number of TPC clusterers that can run in parallel
145145
bool deviceTimers; // Use device timers instead of host-based timers
146+
bool registerStandaloneInputMemory; // Automatically register memory for the GPU which is used as input for the standalone benchmark
146147
};
147148
} // namespace gpu
148149
} // namespace GPUCA_NAMESPACE

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,9 @@ void GPUChainTracking::ConvertNativeToClusterDataLegacy()
691691
GPUReconstructionConvert::ConvertNativeToClusterData(mClusterNativeAccess.get(), mIOMem.clusterData, mIOPtrs.nClusterData, processors()->calibObjects.fastTransform, param().continuousMaxTimeBin);
692692
for (unsigned int i = 0; i < NSLICES; i++) {
693693
mIOPtrs.clusterData[i] = mIOMem.clusterData[i].get();
694+
if (GetDeviceProcessingSettings().registerStandaloneInputMemory) {
695+
mRec->registerMemoryForGPU(mIOMem.clusterData[i].get(), mIOPtrs.nClusterData[i] * sizeof(*mIOPtrs.clusterData[i]));
696+
}
694697
}
695698
mIOPtrs.clustersNative = nullptr;
696699
mIOMem.clustersNative.reset(nullptr);
@@ -709,11 +712,25 @@ void GPUChainTracking::ConvertRun2RawToNative()
709712
mIOMem.clusterData[i].reset(nullptr);
710713
}
711714
mIOPtrs.clustersNative = mClusterNativeAccess.get();
715+
if (GetDeviceProcessingSettings().registerStandaloneInputMemory) {
716+
mRec->registerMemoryForGPU(mIOMem.clustersNative.get(), mClusterNativeAccess->nClustersTotal * sizeof(*mClusterNativeAccess->clustersLinear));
717+
}
712718
}
713719

714720
void GPUChainTracking::ConvertZSEncoder(bool zs12bit)
715721
{
716-
GPUReconstructionConvert::RunZSEncoder(mIOPtrs.tpcPackedDigits, mIOPtrs.tpcZS, param(), zs12bit);
722+
GPUTrackingInOutZS* tmp;
723+
GPUReconstructionConvert::RunZSEncoder(mIOPtrs.tpcPackedDigits, tmp, param(), zs12bit);
724+
mIOPtrs.tpcZS = tmp;
725+
if (GetDeviceProcessingSettings().registerStandaloneInputMemory) {
726+
for (unsigned int i = 0; i < NSLICES; i++) {
727+
for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
728+
for (unsigned int k = 0; k < tmp->slice[i].count[j]; k++) {
729+
mRec->registerMemoryForGPU(tmp->slice[i].zsPtr[j][k], tmp->slice[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE);
730+
}
731+
}
732+
}
733+
}
717734
}
718735

719736
void GPUChainTracking::ConvertZSFilter(bool zs12bit)

GPU/GPUTracking/Standalone/qconfigoptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ AddOption(dzdr, float, 2.5f, "DzDr", 0, "Use dZ/dR search window instead of vert
138138
AddOption(cont, bool, false, "continuous", 0, "Process continuous timeframe data")
139139
AddOption(forceMemorySize, unsigned long long int, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ull))
140140
AddOption(outputcontrolmem, unsigned long long int, 0, "outputMemory", 0, "Use predefined output buffer of this size", min(0ull), message("Using %lld bytes as output memory"))
141+
AddOption(registerInputMemory, bool, false, "registerInputMemory", 0, "Automatically register input memory buffers for the GPU")
141142
AddOption(affinity, int, -1, "cpuAffinity", 0, "Pin CPU affinity to this CPU core", min(-1), message("Setting affinity to restrict on CPU %d"))
142143
AddOption(fifo, bool, false, "fifoScheduler", 0, "Use FIFO realtime scheduler", message("Setting FIFO scheduler: %s"))
143144
AddOption(fpe, bool, true, "fpe", 0, "Trap on floating point exceptions")

GPU/GPUTracking/Standalone/standalone.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ int SetupReconstruction()
329329
devProc.globalInitMutex = configStandalone.gpuInitMutex;
330330
devProc.gpuDeviceOnly = configStandalone.oclGPUonly;
331331
devProc.memoryAllocationStrategy = configStandalone.allocationStrategy;
332+
devProc.registerStandaloneInputMemory = configStandalone.registerInputMemory;
332333
recSet.tpcRejectionMode = configStandalone.configRec.tpcReject;
333334
if (configStandalone.configRec.tpcRejectThreshold != 0.f) {
334335
recSet.tpcRejectQPt = 1.f / configStandalone.configRec.tpcRejectThreshold;

0 commit comments

Comments
 (0)