From 5f55a2de1bf96c799e9047b836bc065ba91863bc Mon Sep 17 00:00:00 2001 From: Christophe Haen Date: Fri, 12 Dec 2025 10:52:41 +0100 Subject: [PATCH 1/3] feat (DFC Client): speedup getReplicas by sorting LFNs --- src/DIRAC/Resources/Catalog/FileCatalogClient.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/DIRAC/Resources/Catalog/FileCatalogClient.py b/src/DIRAC/Resources/Catalog/FileCatalogClient.py index 3db729e0dc1..a35df4f3b8d 100644 --- a/src/DIRAC/Resources/Catalog/FileCatalogClient.py +++ b/src/DIRAC/Resources/Catalog/FileCatalogClient.py @@ -141,7 +141,9 @@ def getReplicas(self, lfns, allStatus=False, timeout=120): successful = {} failed = {} - for chunk in breakListIntoChunks(lfns, GET_REPLICAS_CHUNK_SIZE): + # We want to sort the lfns because of the way the server groups the db queries by + # directory. So if we sort them, the grouping is more efficient. + for chunk in breakListIntoChunks(sorted(lfns), GET_REPLICAS_CHUNK_SIZE): rpcClient = self._getRPC(timeout=timeout) result = rpcClient.getReplicas(chunk, allStatus) From 75395a22ae516bb15c08bd136f6aee9a26816f0d Mon Sep 17 00:00:00 2001 From: Christophe Haen Date: Fri, 12 Dec 2025 14:31:00 +0100 Subject: [PATCH 2/3] feat (TS): TransformationAgent does not bulk calls to dm.getReplicas --- .../Agent/TransformationAgent.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py b/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py index 049d4a4a279..1339452b09a 100644 --- a/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py +++ b/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py @@ -500,19 +500,18 @@ def __getDataReplicas(self, transDict, lfns, clients, forJobs=True): startTime = time.time() self._logInfo(f"Getting replicas for {len(newLFNs)} files from catalog", method=method, transID=transID) newReplicas = {} - for chunk in breakListIntoChunks(newLFNs, 10000): - res = self._getDataReplicasDM(transID, chunk, clients, forJobs=forJobs) - if res["OK"]: - reps = {lfn: ses for lfn, ses in res["Value"].items() if ses} - newReplicas.update(reps) - self.__updateCache(transID, reps) - else: - self._logWarn( - f"Failed to get replicas for {len(chunk)} files", - res["Message"], - method=method, - transID=transID, - ) + res = self._getDataReplicasDM(transID, newLFNs, clients, forJobs=forJobs) + if res["OK"]: + newReplicas = {lfn: ses for lfn, ses in res["Value"].items() if ses} + + self.__updateCache(transID, newReplicas) + else: + self._logWarn( + f"Failed to get replicas for {len(newLFNs)} files", + res["Message"], + method=method, + transID=transID, + ) self._logInfo( f"Obtained {len(newReplicas)} replicas from catalog in {time.time() - startTime:.1f} seconds", From 26abc4349cc7c6e8a10ca747a281bb38254d40cb Mon Sep 17 00:00:00 2001 From: Christophe Haen Date: Fri, 12 Dec 2025 15:01:29 +0100 Subject: [PATCH 3/3] feat (FC): support passing sets --- src/DIRAC/Resources/Catalog/Utilities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DIRAC/Resources/Catalog/Utilities.py b/src/DIRAC/Resources/Catalog/Utilities.py index 7207dcb8773..c422d215f44 100644 --- a/src/DIRAC/Resources/Catalog/Utilities.py +++ b/src/DIRAC/Resources/Catalog/Utilities.py @@ -1,5 +1,5 @@ -""" DIRAC FileCatalog client utilities -""" +"""DIRAC FileCatalog client utilities""" + import os import errno import functools @@ -16,7 +16,7 @@ def checkArgumentDict(path): """Check and process format of the arguments to FileCatalog methods""" if isinstance(path, str): urls = {path: True} - elif isinstance(path, list): + elif isinstance(path, (list, set)): urls = {} for url in path: urls[url] = True