From 2bdec94860e969caa3ce1c5a5e50ff762c32b23d Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Thu, 11 Dec 2025 17:02:13 +0000 Subject: [PATCH 01/17] add prometheus run.sh, new env and label --- .gitignore | 1 + docker-compose.yml | 1 + prometheus/prometheus.yml | 3 +++ prometheus/prometheus.yml.example | 29 +++++++++++++++++++++++++++++ prometheus/run.sh | 21 +++++++++++++++++++++ 5 files changed, 55 insertions(+) create mode 100644 prometheus/prometheus.yml.example create mode 100644 prometheus/run.sh diff --git a/.gitignore b/.gitignore index b9c601f4..d76459fa 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ teku/validator/ # Teku directory for storing logs teku/logs/ commit-boost/config.toml +prometheus/prometheus.yml diff --git a/docker-compose.yml b/docker-compose.yml index bab6221a..dd397336 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -198,6 +198,7 @@ services: - ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro - ./grafana/dashboards:/etc/dashboards - ./data/grafana:/var/lib/grafana + entrypoint: /etc/prometheus/run.sh restart: unless-stopped loki: diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index a5866c81..cceb57ad 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -21,6 +21,9 @@ scrape_configs: - job_name: "charon" static_configs: - targets: ["charon:3620"] + relabel_configs: + - target_label: alert_discord_ids + replacement: "$ALERT_DISCORD_IDS" - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example new file mode 100644 index 00000000..87ff64de --- /dev/null +++ b/prometheus/prometheus.yml.example @@ -0,0 +1,29 @@ +global: + scrape_interval: 30s # Set the scrape interval to every 30 seconds. + evaluation_interval: 30s # Evaluate rules every 30 seconds. + +remote_write: + - url: https://vm.monitoring.gcp.obol.tech/write + authorization: + credentials: $PROM_REMOTE_WRITE_TOKEN + write_relabel_configs: + - source_labels: [job] + regex: "charon|nethermind|lighthouse|lodestar" + action: keep # Keeps charon metrics and drop metrics from other containers. + +scrape_configs: + - job_name: "nethermind" + static_configs: + - targets: ["nethermind:8008"] + - job_name: "lighthouse" + static_configs: + - targets: ["lighthouse:5054"] + - job_name: "charon" + static_configs: + - targets: ["charon:3620"] + - job_name: "lodestar" + static_configs: + - targets: [ "lodestar:5064" ] + - job_name: "validator-ejector" + static_configs: + - targets: [ "validator-ejector:8989" ] \ No newline at end of file diff --git a/prometheus/run.sh b/prometheus/run.sh new file mode 100644 index 00000000..000941ba --- /dev/null +++ b/prometheus/run.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +if [ -z "$SERVICE_OWNER" ] +then + echo "\$SERVICE_OWNER variable is empty" >&2 + exit 1 +fi + +if [ -z "$PROM_REMOTE_WRITE_TOKEN" ] +then + echo "\$PROM_REMOTE_WRITE_TOKEN variable is empty" >&2 + exit 1 +fi + +sed -e "s|\$PROM_REMOTE_WRITE_TOKEN|${PROM_REMOTE_WRITE_TOKEN}|g" \ + -e "s|\$SERVICE_OWNER|${SERVICE_OWNER}|g" \ + -e "s|\$ALERT_DISCORD_IDS|${ALERT_DISCORD_IDS}|g" \ + /etc/prometheus/prometheus.yml.example > /etc/prometheus/prometheus.yml + +/bin/prometheus \ + --config.file=/etc/prometheus/prometheus.yml From 32392dc403eba861bb56d1cd54f5e50b0c7eef24 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Fri, 12 Dec 2025 09:20:40 +0000 Subject: [PATCH 02/17] update envs --- .env.sample.holesky | 6 ++++++ .env.sample.hoodi | 6 ++++++ .env.sample.mainnet | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/.env.sample.holesky b/.env.sample.holesky index 84887bf5..c3019e7b 100644 --- a/.env.sample.holesky +++ b/.env.sample.holesky @@ -259,6 +259,12 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. +# Enable developer mode on discord with User Settings > Advanced. +# Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. +# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +#ALERT_DISCORD_IDS="" + # Uncomment these if you have log exporting with Promtail # and want to disable log export on a particular container. #EL_NETHERMIND_PROMTAIL_MONITORED=false diff --git a/.env.sample.hoodi b/.env.sample.hoodi index 3329a1f7..78b800a8 100644 --- a/.env.sample.hoodi +++ b/.env.sample.hoodi @@ -266,6 +266,12 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. +# Enable developer mode on discord with User Settings > Advanced. +# Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. +# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +#ALERT_DISCORD_IDS="" + # Uncomment these if you have log exporting with Promtail # and want to disable log export on a particular container. #EL_NETHERMIND_PROMTAIL_MONITORED=false diff --git a/.env.sample.mainnet b/.env.sample.mainnet index 709ce0da..b3eebe94 100644 --- a/.env.sample.mainnet +++ b/.env.sample.mainnet @@ -267,6 +267,12 @@ LIDODVEXIT_EXIT_EPOCH=194048 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. +# Enable developer mode on discord with User Settings > Advanced. +# Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. +# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +#ALERT_DISCORD_IDS="" + # Uncomment these if you have log exporting with Promtail # and want to disable log export on a particular container. #EL_NETHERMIND_PROMTAIL_MONITORED=false From 3acb8be037aaf14e9f55dbcf0100faaced9f7c63 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Tue, 6 Jan 2026 17:56:34 +0000 Subject: [PATCH 03/17] add mev boost metric scraping --- compose-mev.yml | 2 ++ prometheus/prometheus.yml | 3 +++ prometheus/prometheus.yml.example | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/compose-mev.yml b/compose-mev.yml index 5e1f1b2c..944fa3aa 100644 --- a/compose-mev.yml +++ b/compose-mev.yml @@ -39,6 +39,8 @@ services: -request-timeout-getheader=${MEV_TIMEOUT_GETHEADER:-950} -request-timeout-getpayload=${MEV_TIMEOUT_GETPAYLOAD:-4000} -request-timeout-regval=${MEV_TIMEOUT_REGVAL:-3000} + -metrics + -metrics-addr=0.0.0.0:18551 labels: - "promtail-monitored=${MEV_MEV_BOOST_PROMTAIL_MONITORED:-true}" networks: [dvnode] diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index cceb57ad..ca33fb2b 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -24,6 +24,9 @@ scrape_configs: relabel_configs: - target_label: alert_discord_ids replacement: "$ALERT_DISCORD_IDS" + - job_name: "mev-boost" + static_configs: + - targets: ["mev-mevboost:18551"] - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index 87ff64de..64031d5d 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -21,6 +21,12 @@ scrape_configs: - job_name: "charon" static_configs: - targets: ["charon:3620"] + relabel_configs: + - target_label: alert_discord_ids + replacement: "$ALERT_DISCORD_IDS" + - job_name: "mev-boost" + static_configs: + - targets: ["mev-mevboost:18551"] - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] From a6ab60f75fab4ad969a9bf2f3810d03775cd2706 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Tue, 6 Jan 2026 21:00:54 +0000 Subject: [PATCH 04/17] add operator label --- .env.sample.holesky | 3 +++ .env.sample.hoodi | 3 +++ .env.sample.mainnet | 3 +++ prometheus/prometheus.yml.example | 3 ++- prometheus/run.sh | 1 + 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.env.sample.holesky b/.env.sample.holesky index c3019e7b..5a0630c1 100644 --- a/.env.sample.holesky +++ b/.env.sample.holesky @@ -259,6 +259,9 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Unique label to identify this operator in Prometheus metrics. +#OPERATOR_PROMETHEUS_LABEL= + # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/.env.sample.hoodi b/.env.sample.hoodi index 78b800a8..0e7d9009 100644 --- a/.env.sample.hoodi +++ b/.env.sample.hoodi @@ -266,6 +266,9 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Unique label to identify this operator in Prometheus metrics. +#OPERATOR_PROMETHEUS_LABEL= + # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/.env.sample.mainnet b/.env.sample.mainnet index b3eebe94..2994b73b 100644 --- a/.env.sample.mainnet +++ b/.env.sample.mainnet @@ -267,6 +267,9 @@ LIDODVEXIT_EXIT_EPOCH=194048 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Unique label to identify this operator in Prometheus metrics. +#OPERATOR_PROMETHEUS_LABEL= + # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index 64031d5d..7da2f579 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -1,7 +1,8 @@ global: scrape_interval: 30s # Set the scrape interval to every 30 seconds. evaluation_interval: 30s # Evaluate rules every 30 seconds. - + external_labels: + operator_id: $OPERATOR_PROMETHEUS_LABEL remote_write: - url: https://vm.monitoring.gcp.obol.tech/write authorization: diff --git a/prometheus/run.sh b/prometheus/run.sh index 000941ba..2d5af027 100644 --- a/prometheus/run.sh +++ b/prometheus/run.sh @@ -15,6 +15,7 @@ fi sed -e "s|\$PROM_REMOTE_WRITE_TOKEN|${PROM_REMOTE_WRITE_TOKEN}|g" \ -e "s|\$SERVICE_OWNER|${SERVICE_OWNER}|g" \ -e "s|\$ALERT_DISCORD_IDS|${ALERT_DISCORD_IDS}|g" \ + -e "s|\$OPERATOR_PROMETHEUS_LABEL|${OPERATOR_PROMETHEUS_LABEL}|g" \ /etc/prometheus/prometheus.yml.example > /etc/prometheus/prometheus.yml /bin/prometheus \ From 2bee8378979f050db269f5934bcf08abc335d07c Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Tue, 6 Jan 2026 21:57:03 +0000 Subject: [PATCH 05/17] add operator label only to mev metrics --- prometheus/prometheus.yml.example | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index 7da2f579..b5349169 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -1,8 +1,6 @@ global: scrape_interval: 30s # Set the scrape interval to every 30 seconds. evaluation_interval: 30s # Evaluate rules every 30 seconds. - external_labels: - operator_id: $OPERATOR_PROMETHEUS_LABEL remote_write: - url: https://vm.monitoring.gcp.obol.tech/write authorization: @@ -28,6 +26,9 @@ scrape_configs: - job_name: "mev-boost" static_configs: - targets: ["mev-mevboost:18551"] + relabel_configs: + - target_label: operator_id + replacement: "$OPERATOR_PROMETHEUS_LABEL" - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] From 783d87ce4d93b1ec88f69ed2512e806e80ac6115 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Tue, 6 Jan 2026 22:40:41 +0000 Subject: [PATCH 06/17] add mev boost to remote_write regex --- prometheus/prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index ca33fb2b..dd313997 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -8,7 +8,7 @@ remote_write: credentials: $PROM_REMOTE_WRITE_TOKEN write_relabel_configs: - source_labels: [job] - regex: "charon|nethermind|lighthouse|lodestar" + regex: "charon|nethermind|lighthouse|lodestar|mev-boost" action: keep # Keeps charon metrics and drop metrics from other containers. scrape_configs: From fcf10f0791be06466966f239ed56e4c269b0470a Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 09:42:41 +0000 Subject: [PATCH 07/17] update prometheus to use run.sh --- compose-monitoring.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/compose-monitoring.yml b/compose-monitoring.yml index 7623b121..6bef811a 100644 --- a/compose-monitoring.yml +++ b/compose-monitoring.yml @@ -8,9 +8,13 @@ services: image: prom/prometheus:${PROMETHEUS_VERSION:-v2.53.5} user: ":" networks: [dvnode] + environment: + PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} + SERVICE_OWNER: ${SERVICE_OWNER:-"obol-lcdvn"} volumes: - - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus + entrypoint: /etc/prometheus/run.sh restart: unless-stopped grafana: From eb055a844ac5ee9012a30675fd19b4a9185fed60 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 09:43:22 +0000 Subject: [PATCH 08/17] update docker compose to use run.sh --- docker-compose.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index dd397336..3ae5502f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -180,9 +180,13 @@ services: profiles: [""] user: ":" networks: [dvnode] + environment: + PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} + SERVICE_OWNER: ${SERVICE_OWNER:-"obol-cdvn"} volumes: - - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus + entrypoint: /etc/prometheus/run.sh restart: unless-stopped grafana: From 1ec6d0cbb424e6eb0585339f6a72a8a5a418f80b Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 09:46:16 +0000 Subject: [PATCH 09/17] add prom remote token and service owner env variables --- .env.sample.holesky | 6 ++++++ .env.sample.hoodi | 6 ++++++ .env.sample.mainnet | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/.env.sample.holesky b/.env.sample.holesky index 5a0630c1..1d36fe6e 100644 --- a/.env.sample.holesky +++ b/.env.sample.holesky @@ -259,6 +259,12 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Prometheus remote write token used for accessing external prometheus. +#PROM_REMOTE_WRITE_TOKEN= + +# Prometheus service owner used to uniquely identify user from which metrics are pushed. +#SERVICE_OWNER=charon_user + # Unique label to identify this operator in Prometheus metrics. #OPERATOR_PROMETHEUS_LABEL= diff --git a/.env.sample.hoodi b/.env.sample.hoodi index 0e7d9009..5fa30c03 100644 --- a/.env.sample.hoodi +++ b/.env.sample.hoodi @@ -266,6 +266,12 @@ LIDODVEXIT_EXIT_EPOCH=256 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Prometheus remote write token used for accessing external prometheus. +#PROM_REMOTE_WRITE_TOKEN= + +# Prometheus service owner used to uniquely identify user from which metrics are pushed. +#SERVICE_OWNER=charon_user + # Unique label to identify this operator in Prometheus metrics. #OPERATOR_PROMETHEUS_LABEL= diff --git a/.env.sample.mainnet b/.env.sample.mainnet index 2994b73b..2ab72115 100644 --- a/.env.sample.mainnet +++ b/.env.sample.mainnet @@ -267,6 +267,12 @@ LIDODVEXIT_EXIT_EPOCH=194048 # See available tags https://github.com/prometheus/prometheus/releases. #PROMETHEUS_VERSION= +# Prometheus remote write token used for accessing external prometheus. +#PROM_REMOTE_WRITE_TOKEN= + +# Prometheus service owner used to uniquely identify user from which metrics are pushed. +#SERVICE_OWNER=charon_user + # Unique label to identify this operator in Prometheus metrics. #OPERATOR_PROMETHEUS_LABEL= From 4f17686f9569de1b3d6f1255b91716a1583c20f8 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 12:53:40 +0000 Subject: [PATCH 10/17] add discord env variable to compose --- compose-monitoring.yml | 1 + docker-compose.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/compose-monitoring.yml b/compose-monitoring.yml index 6bef811a..e84df4d1 100644 --- a/compose-monitoring.yml +++ b/compose-monitoring.yml @@ -11,6 +11,7 @@ services: environment: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} SERVICE_OWNER: ${SERVICE_OWNER:-"obol-lcdvn"} + ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus diff --git a/docker-compose.yml b/docker-compose.yml index 3ae5502f..d0188db0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -183,6 +183,7 @@ services: environment: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} SERVICE_OWNER: ${SERVICE_OWNER:-"obol-cdvn"} + ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus From b6f9d0ac2aaaafb0fd391426f6030028163b0106 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 12:54:36 +0000 Subject: [PATCH 11/17] add metrics flag to compose --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index d0188db0..eb922245 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -163,6 +163,8 @@ services: -request-timeout-getheader=${MEVBOOST_TIMEOUT_GETHEADER:-900} -request-timeout-getpayload=${MEVBOOST_TIMEOUT_GETPAYLOAD:-4000} -request-timeout-regval=${MEVBOOST_TIMEOUT_REGVAL:-3000} + -metrics + -metrics-addr=0.0.0.0:18551 labels: - "promtail-monitored=${MEV_BOOST_PROMTAIL_MONITORED:-true}" networks: [dvnode] From ef39cee67266f9998542ecefb12d8d81d39cf586 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 13:38:16 +0000 Subject: [PATCH 12/17] bump charon to v1.8.1 --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index eb922245..ce65ae0a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,7 +87,7 @@ services: # \___|_| |_|\__,_|_| \___/|_| |_| charon: - image: obolnetwork/charon:${CHARON_VERSION:-v1.7.2} + image: obolnetwork/charon:${CHARON_VERSION:-v1.8.1} environment: - CHARON_BEACON_NODE_ENDPOINTS=${CHARON_BEACON_NODE_ENDPOINTS:-http://lighthouse:5052} - CHARON_BEACON_NODE_HEADERS=${CHARON_BEACON_NODE_HEADERS:-} From 4d2ee03685a9e5b572fcd9d99a2a1c725947a4f9 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 14:19:54 +0000 Subject: [PATCH 13/17] scrape both mev container --- prometheus/prometheus.yml | 5 ++++- prometheus/prometheus.yml.example | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index dd313997..19191260 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -26,7 +26,10 @@ scrape_configs: replacement: "$ALERT_DISCORD_IDS" - job_name: "mev-boost" static_configs: - - targets: ["mev-mevboost:18551"] + - targets: ["mev-mevboost:18551","mev-boost:18551"] + relabel_configs: + - target_label: operator_id + replacement: "$OPERATOR_PROMETHEUS_LABEL" - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index b5349169..706abe6d 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -1,13 +1,14 @@ global: scrape_interval: 30s # Set the scrape interval to every 30 seconds. evaluation_interval: 30s # Evaluate rules every 30 seconds. + remote_write: - url: https://vm.monitoring.gcp.obol.tech/write authorization: credentials: $PROM_REMOTE_WRITE_TOKEN write_relabel_configs: - source_labels: [job] - regex: "charon|nethermind|lighthouse|lodestar" + regex: "charon|nethermind|lighthouse|lodestar|mev-boost" action: keep # Keeps charon metrics and drop metrics from other containers. scrape_configs: @@ -25,7 +26,7 @@ scrape_configs: replacement: "$ALERT_DISCORD_IDS" - job_name: "mev-boost" static_configs: - - targets: ["mev-mevboost:18551"] + - targets: ["mev-mevboost:18551","mev-boost:18551"] relabel_configs: - target_label: operator_id replacement: "$OPERATOR_PROMETHEUS_LABEL" From df80ed2cdce269da92b65d338085125893cc4091 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 14:20:29 +0000 Subject: [PATCH 14/17] delete prometheus file --- prometheus/prometheus.yml | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 prometheus/prometheus.yml diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml deleted file mode 100644 index 19191260..00000000 --- a/prometheus/prometheus.yml +++ /dev/null @@ -1,38 +0,0 @@ -global: - scrape_interval: 30s # Set the scrape interval to every 30 seconds. - evaluation_interval: 30s # Evaluate rules every 30 seconds. - -remote_write: - - url: https://vm.monitoring.gcp.obol.tech/write - authorization: - credentials: $PROM_REMOTE_WRITE_TOKEN - write_relabel_configs: - - source_labels: [job] - regex: "charon|nethermind|lighthouse|lodestar|mev-boost" - action: keep # Keeps charon metrics and drop metrics from other containers. - -scrape_configs: - - job_name: "nethermind" - static_configs: - - targets: ["nethermind:8008"] - - job_name: "lighthouse" - static_configs: - - targets: ["lighthouse:5054"] - - job_name: "charon" - static_configs: - - targets: ["charon:3620"] - relabel_configs: - - target_label: alert_discord_ids - replacement: "$ALERT_DISCORD_IDS" - - job_name: "mev-boost" - static_configs: - - targets: ["mev-mevboost:18551","mev-boost:18551"] - relabel_configs: - - target_label: operator_id - replacement: "$OPERATOR_PROMETHEUS_LABEL" - - job_name: "lodestar" - static_configs: - - targets: [ "lodestar:5064" ] - - job_name: "validator-ejector" - static_configs: - - targets: [ "validator-ejector:8989" ] From 8c12cbfe8c00bbbf1e5efeb33d2ee12ec96ad57b Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 15:05:15 +0000 Subject: [PATCH 15/17] add service owner as cluster hash --- compose-monitoring.yml | 3 ++- docker-compose.yml | 3 ++- prometheus/prometheus.yml.example | 2 ++ prometheus/run.sh | 7 +++++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/compose-monitoring.yml b/compose-monitoring.yml index e84df4d1..8a336cd9 100644 --- a/compose-monitoring.yml +++ b/compose-monitoring.yml @@ -10,11 +10,12 @@ services: networks: [dvnode] environment: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} - SERVICE_OWNER: ${SERVICE_OWNER:-"obol-lcdvn"} + SERVICE_OWNER: ${SERVICE_OWNER} ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus + - ./.charon/cluster-lock.json:/opt/charon/.charon/cluster-lock.json:ro entrypoint: /etc/prometheus/run.sh restart: unless-stopped diff --git a/docker-compose.yml b/docker-compose.yml index ce65ae0a..426bd4f8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -184,11 +184,12 @@ services: networks: [dvnode] environment: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} - SERVICE_OWNER: ${SERVICE_OWNER:-"obol-cdvn"} + SERVICE_OWNER: ${SERVICE_OWNER} ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus + - ./.charon/cluster-lock.json:/opt/charon/.charon/cluster-lock.json:ro entrypoint: /etc/prometheus/run.sh restart: unless-stopped diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index 706abe6d..8fc83a5f 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -1,6 +1,8 @@ global: scrape_interval: 30s # Set the scrape interval to every 30 seconds. evaluation_interval: 30s # Evaluate rules every 30 seconds. + external_labels: + service_owner: $SERVICE_OWNER # replace this with your Operator name you want to be identified by, it helps us route alerts and metrics to your notification channels easily remote_write: - url: https://vm.monitoring.gcp.obol.tech/write diff --git a/prometheus/run.sh b/prometheus/run.sh index 2d5af027..5de4e467 100644 --- a/prometheus/run.sh +++ b/prometheus/run.sh @@ -2,8 +2,11 @@ if [ -z "$SERVICE_OWNER" ] then - echo "\$SERVICE_OWNER variable is empty" >&2 - exit 1 + if [ -f /opt/charon/.charon/cluster-lock.json ]; then + export SERVICE_OWNER=$(cat /opt/charon/.charon/cluster-lock.json | jq -r '.lock_hash[2:9]') + else + export SERVICE_OWNER="unknown" + fi fi if [ -z "$PROM_REMOTE_WRITE_TOKEN" ] From 9334d08989119cb904cca80fcc190b9b64d27c87 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Wed, 7 Jan 2026 16:34:08 +0000 Subject: [PATCH 16/17] remove operator id and use cluster name/peer --- .env.sample.holesky | 7 ++----- .env.sample.hoodi | 7 ++----- .env.sample.mainnet | 7 ++----- compose-monitoring.yml | 3 ++- docker-compose.yml | 3 ++- prometheus/prometheus.yml.example | 6 ++++-- prometheus/run.sh | 5 ++--- 7 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.env.sample.holesky b/.env.sample.holesky index 1d36fe6e..2c823f68 100644 --- a/.env.sample.holesky +++ b/.env.sample.holesky @@ -185,10 +185,10 @@ CHARON_EXECUTION_CLIENT_RPC_ENDPOINT=http://${EL}:8545 # Loki log aggregation server addresses. Disable loki log aggregation by setting an empty address. #CHARON_LOKI_ADDRESSES= -# Charon Cluster Name. Mandatory to send logs with Promtail. +# Charon Cluster Name. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_NAME= -# Charon Cluster Peer. Mandatory to send logs with Promtail. +# Charon Cluster Peer. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_PEER= # Nickname to identify this charon node on monitoring (max 32 characters). @@ -265,9 +265,6 @@ LIDODVEXIT_EXIT_EPOCH=256 # Prometheus service owner used to uniquely identify user from which metrics are pushed. #SERVICE_OWNER=charon_user -# Unique label to identify this operator in Prometheus metrics. -#OPERATOR_PROMETHEUS_LABEL= - # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/.env.sample.hoodi b/.env.sample.hoodi index 5fa30c03..f30e0244 100644 --- a/.env.sample.hoodi +++ b/.env.sample.hoodi @@ -186,10 +186,10 @@ CHARON_EXECUTION_CLIENT_RPC_ENDPOINT=http://${EL}:8545 # Loki log aggregation server addresses. Disable loki log aggregation by setting an empty address. #CHARON_LOKI_ADDRESSES= -# Charon Cluster Name. Mandatory to send logs with Promtail. +# Charon Cluster Name. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_NAME= -# Charon Cluster Peer. Mandatory to send logs with Promtail. +# Charon Cluster Peer. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_PEER= # Nickname to identify this charon node on monitoring (max 32 characters). @@ -272,9 +272,6 @@ LIDODVEXIT_EXIT_EPOCH=256 # Prometheus service owner used to uniquely identify user from which metrics are pushed. #SERVICE_OWNER=charon_user -# Unique label to identify this operator in Prometheus metrics. -#OPERATOR_PROMETHEUS_LABEL= - # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/.env.sample.mainnet b/.env.sample.mainnet index 2ab72115..0a05604c 100644 --- a/.env.sample.mainnet +++ b/.env.sample.mainnet @@ -187,10 +187,10 @@ CHARON_EXECUTION_CLIENT_RPC_ENDPOINT=http://${EL}:8545 # Loki log aggregation server addresses. Disable loki log aggregation by setting an empty address. #CHARON_LOKI_ADDRESSES= -# Charon Cluster Name. Mandatory to send logs with Promtail. +# Charon Cluster Name. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_NAME= -# Charon Cluster Peer. Mandatory to send logs with Promtail. +# Charon Cluster Peer. Mandatory to send logs with Promtail and metrics with Prometheus. #CLUSTER_PEER= # Nickname to identify this charon node on monitoring (max 32 characters). @@ -273,9 +273,6 @@ LIDODVEXIT_EXIT_EPOCH=194048 # Prometheus service owner used to uniquely identify user from which metrics are pushed. #SERVICE_OWNER=charon_user -# Unique label to identify this operator in Prometheus metrics. -#OPERATOR_PROMETHEUS_LABEL= - # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. diff --git a/compose-monitoring.yml b/compose-monitoring.yml index 8a336cd9..5bb6b65b 100644 --- a/compose-monitoring.yml +++ b/compose-monitoring.yml @@ -12,10 +12,11 @@ services: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} SERVICE_OWNER: ${SERVICE_OWNER} ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" + CLUSTER_NAME: ${CLUSTER_NAME} + CLUSTER_PEER: ${CLUSTER_PEER} volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus - - ./.charon/cluster-lock.json:/opt/charon/.charon/cluster-lock.json:ro entrypoint: /etc/prometheus/run.sh restart: unless-stopped diff --git a/docker-compose.yml b/docker-compose.yml index 426bd4f8..4cbba440 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -186,10 +186,11 @@ services: PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} SERVICE_OWNER: ${SERVICE_OWNER} ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" + CLUSTER_NAME: ${CLUSTER_NAME} + CLUSTER_PEER: ${CLUSTER_PEER} volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus - - ./.charon/cluster-lock.json:/opt/charon/.charon/cluster-lock.json:ro entrypoint: /etc/prometheus/run.sh restart: unless-stopped diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index 8fc83a5f..c36ed579 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -30,8 +30,10 @@ scrape_configs: static_configs: - targets: ["mev-mevboost:18551","mev-boost:18551"] relabel_configs: - - target_label: operator_id - replacement: "$OPERATOR_PROMETHEUS_LABEL" + - target_label: cluster_name + replacement: "$CLUSTER_NAME" + - target_label: cluster_peer + replacement: "$CLUSTER_PEER" - job_name: "lodestar" static_configs: - targets: [ "lodestar:5064" ] diff --git a/prometheus/run.sh b/prometheus/run.sh index 5de4e467..2aa077b5 100644 --- a/prometheus/run.sh +++ b/prometheus/run.sh @@ -2,8 +2,8 @@ if [ -z "$SERVICE_OWNER" ] then - if [ -f /opt/charon/.charon/cluster-lock.json ]; then - export SERVICE_OWNER=$(cat /opt/charon/.charon/cluster-lock.json | jq -r '.lock_hash[2:9]') + if [ -n "$CLUSTER_NAME" ] && [ -n "$CLUSTER_PEER" ]; then + export SERVICE_OWNER="${CLUSTER_NAME}-${CLUSTER_PEER}" else export SERVICE_OWNER="unknown" fi @@ -18,7 +18,6 @@ fi sed -e "s|\$PROM_REMOTE_WRITE_TOKEN|${PROM_REMOTE_WRITE_TOKEN}|g" \ -e "s|\$SERVICE_OWNER|${SERVICE_OWNER}|g" \ -e "s|\$ALERT_DISCORD_IDS|${ALERT_DISCORD_IDS}|g" \ - -e "s|\$OPERATOR_PROMETHEUS_LABEL|${OPERATOR_PROMETHEUS_LABEL}|g" \ /etc/prometheus/prometheus.yml.example > /etc/prometheus/prometheus.yml /bin/prometheus \ From 21b6216278a5d1a2e37bc6093d8ea43a69dd71f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <4981644+OisinKyne@users.noreply.github.com> Date: Wed, 7 Jan 2026 19:20:29 +0000 Subject: [PATCH 17/17] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hope this isn't too forward. Keen to get everything to rc while i'm on a roll. Signed-off-by: Oisín Kyne <4981644+OisinKyne@users.noreply.github.com> --- .env.sample.holesky | 2 +- .env.sample.hoodi | 2 +- .env.sample.mainnet | 2 +- compose-monitoring.yml | 10 +++++----- docker-compose.yml | 10 +++++----- prometheus/prometheus.yml.example | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.env.sample.holesky b/.env.sample.holesky index 2c823f68..5ee77f7d 100644 --- a/.env.sample.holesky +++ b/.env.sample.holesky @@ -268,7 +268,7 @@ LIDODVEXIT_EXIT_EPOCH=256 # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. -# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +# Specify multiple discord IDs using comma separation. (e.g. `ALERT_DISCORD_IDS=123456789098765432,098765432123456789`) #ALERT_DISCORD_IDS="" # Uncomment these if you have log exporting with Promtail diff --git a/.env.sample.hoodi b/.env.sample.hoodi index f30e0244..85d40dd1 100644 --- a/.env.sample.hoodi +++ b/.env.sample.hoodi @@ -275,7 +275,7 @@ LIDODVEXIT_EXIT_EPOCH=256 # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. -# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +# Specify multiple discord IDs using comma separation. (e.g. `ALERT_DISCORD_IDS=123456789098765432,098765432123456789`) #ALERT_DISCORD_IDS="" # Uncomment these if you have log exporting with Promtail diff --git a/.env.sample.mainnet b/.env.sample.mainnet index 0a05604c..bba01964 100644 --- a/.env.sample.mainnet +++ b/.env.sample.mainnet @@ -276,7 +276,7 @@ LIDODVEXIT_EXIT_EPOCH=194048 # To get Alerted with Obol Agent monitoring on Discord, specify your Discord ID(s) below. # Enable developer mode on discord with User Settings > Advanced. # Then right click on a user's profile picture or name and select Copy ID to get a unique 18-digit number that represents their account. -# Specify multiple discord IDs using comma separation. (e.g. `DISCORD_IDS=123456789098765432,098765432123456789`) +# Specify multiple discord IDs using comma separation. (e.g. `ALERT_DISCORD_IDS=123456789098765432,098765432123456789`) #ALERT_DISCORD_IDS="" # Uncomment these if you have log exporting with Promtail diff --git a/compose-monitoring.yml b/compose-monitoring.yml index 5bb6b65b..ddf02012 100644 --- a/compose-monitoring.yml +++ b/compose-monitoring.yml @@ -9,11 +9,11 @@ services: user: ":" networks: [dvnode] environment: - PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} - SERVICE_OWNER: ${SERVICE_OWNER} - ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" - CLUSTER_NAME: ${CLUSTER_NAME} - CLUSTER_PEER: ${CLUSTER_PEER} + PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN:-} + SERVICE_OWNER: ${SERVICE_OWNER:-} + ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS:-}" + CLUSTER_NAME: ${CLUSTER_NAME:-} + CLUSTER_PEER: ${CLUSTER_PEER:-} volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus diff --git a/docker-compose.yml b/docker-compose.yml index 4cbba440..12206d5b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -183,11 +183,11 @@ services: user: ":" networks: [dvnode] environment: - PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN} - SERVICE_OWNER: ${SERVICE_OWNER} - ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS}" - CLUSTER_NAME: ${CLUSTER_NAME} - CLUSTER_PEER: ${CLUSTER_PEER} + PROM_REMOTE_WRITE_TOKEN: ${PROM_REMOTE_WRITE_TOKEN:-} + SERVICE_OWNER: ${SERVICE_OWNER:-} + ALERT_DISCORD_IDS: "${ALERT_DISCORD_IDS:-}" + CLUSTER_NAME: ${CLUSTER_NAME:-} + CLUSTER_PEER: ${CLUSTER_PEER:-} volumes: - ./prometheus:/etc/prometheus - ./data/prometheus:/prometheus diff --git a/prometheus/prometheus.yml.example b/prometheus/prometheus.yml.example index c36ed579..375aa9d1 100644 --- a/prometheus/prometheus.yml.example +++ b/prometheus/prometheus.yml.example @@ -10,7 +10,7 @@ remote_write: credentials: $PROM_REMOTE_WRITE_TOKEN write_relabel_configs: - source_labels: [job] - regex: "charon|nethermind|lighthouse|lodestar|mev-boost" + regex: "charon|mev-boost" action: keep # Keeps charon metrics and drop metrics from other containers. scrape_configs: