diff --git a/src/DIRAC/Resources/Catalog/FileCatalogClient.py b/src/DIRAC/Resources/Catalog/FileCatalogClient.py index 3db729e0dc1..a35df4f3b8d 100644 --- a/src/DIRAC/Resources/Catalog/FileCatalogClient.py +++ b/src/DIRAC/Resources/Catalog/FileCatalogClient.py @@ -141,7 +141,9 @@ def getReplicas(self, lfns, allStatus=False, timeout=120): successful = {} failed = {} - for chunk in breakListIntoChunks(lfns, GET_REPLICAS_CHUNK_SIZE): + # We want to sort the lfns because of the way the server groups the db queries by + # directory. So if we sort them, the grouping is more efficient. + for chunk in breakListIntoChunks(sorted(lfns), GET_REPLICAS_CHUNK_SIZE): rpcClient = self._getRPC(timeout=timeout) result = rpcClient.getReplicas(chunk, allStatus) diff --git a/src/DIRAC/Resources/Catalog/Utilities.py b/src/DIRAC/Resources/Catalog/Utilities.py index 7207dcb8773..c422d215f44 100644 --- a/src/DIRAC/Resources/Catalog/Utilities.py +++ b/src/DIRAC/Resources/Catalog/Utilities.py @@ -1,5 +1,5 @@ -""" DIRAC FileCatalog client utilities -""" +"""DIRAC FileCatalog client utilities""" + import os import errno import functools @@ -16,7 +16,7 @@ def checkArgumentDict(path): """Check and process format of the arguments to FileCatalog methods""" if isinstance(path, str): urls = {path: True} - elif isinstance(path, list): + elif isinstance(path, (list, set)): urls = {} for url in path: urls[url] = True diff --git a/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py b/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py index 049d4a4a279..1339452b09a 100644 --- a/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py +++ b/src/DIRAC/TransformationSystem/Agent/TransformationAgent.py @@ -500,19 +500,18 @@ def __getDataReplicas(self, transDict, lfns, clients, forJobs=True): startTime = time.time() self._logInfo(f"Getting replicas for {len(newLFNs)} files from catalog", method=method, transID=transID) newReplicas = {} - for chunk in breakListIntoChunks(newLFNs, 10000): - res = self._getDataReplicasDM(transID, chunk, clients, forJobs=forJobs) - if res["OK"]: - reps = {lfn: ses for lfn, ses in res["Value"].items() if ses} - newReplicas.update(reps) - self.__updateCache(transID, reps) - else: - self._logWarn( - f"Failed to get replicas for {len(chunk)} files", - res["Message"], - method=method, - transID=transID, - ) + res = self._getDataReplicasDM(transID, newLFNs, clients, forJobs=forJobs) + if res["OK"]: + newReplicas = {lfn: ses for lfn, ses in res["Value"].items() if ses} + + self.__updateCache(transID, newReplicas) + else: + self._logWarn( + f"Failed to get replicas for {len(newLFNs)} files", + res["Message"], + method=method, + transID=transID, + ) self._logInfo( f"Obtained {len(newReplicas)} replicas from catalog in {time.time() - startTime:.1f} seconds",