From fc6e63d4d4e3ae0426cb7282c7fcfee0015be15d Mon Sep 17 00:00:00 2001 From: Andrei Tsaregorodtsev Date: Mon, 3 Nov 2025 14:29:04 +0100 Subject: [PATCH 1/2] fix: avoid endless loop in MonitoringReporter --- src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py b/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py index 590e74b4275..b1970e0e398 100644 --- a/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py +++ b/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py @@ -134,6 +134,7 @@ def commit(self): self.__documents = [] self.__documentLock.release() recordSent = 0 + try_count = 0 try: while documents: recordsToSend = documents[: self.__maxRecordsInABundle] @@ -152,6 +153,10 @@ def commit(self): del documents[: self.__maxRecordsInABundle] else: gLogger.warn("Failed to insert the records:", retVal["Message"]) + try_count += 1 + if try_count == 10: + gLogger.error("Failed to insert Monitoring records after 10 attempts") + break except Exception as e: # pylint: disable=broad-except gLogger.exception("Error committing", lException=e) return S_ERROR(f"Error committing {repr(e).replace(',)',')')}") From 4883dba906aaed9922201cf690487f803359f06f Mon Sep 17 00:00:00 2001 From: Andrei Tsaregorodtsev Date: Fri, 14 Nov 2025 14:16:33 +0100 Subject: [PATCH 2/2] fix: reset try_count upon a successful document upload --- src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py b/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py index b1970e0e398..bdc4b0c5d97 100644 --- a/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py +++ b/src/DIRAC/MonitoringSystem/Client/MonitoringReporter.py @@ -151,6 +151,7 @@ def commit(self): # if we managed to publish the records we can delete from the list recordSent += len(recordsToSend) del documents[: self.__maxRecordsInABundle] + try_count = 0 else: gLogger.warn("Failed to insert the records:", retVal["Message"]) try_count += 1