From f30c1286e255d43eff866845297d9a4d577287c6 Mon Sep 17 00:00:00 2001 From: khatrivarun Date: Fri, 27 Feb 2026 09:09:39 +0530 Subject: [PATCH 1/4] feat(garage): setting up garage configuration for prometheus scrapes --- modules/garage/configmap.tf | 1 + modules/garage/statefulset.tf | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/modules/garage/configmap.tf b/modules/garage/configmap.tf index 6db83f4..d1f231d 100644 --- a/modules/garage/configmap.tf +++ b/modules/garage/configmap.tf @@ -37,6 +37,7 @@ resource "kubernetes_config_map" "garage_config" { [admin] api_bind_addr = "[::]:3903" + metrics_require_token = false EOF } } diff --git a/modules/garage/statefulset.tf b/modules/garage/statefulset.tf index 55f1a97..f39a6d4 100644 --- a/modules/garage/statefulset.tf +++ b/modules/garage/statefulset.tf @@ -29,6 +29,13 @@ resource "kubernetes_stateful_set" "statefulset" { component = "pod" "part-of" = "garage" } + + // Scrape for metrics + annotations = { + "prometheus.io/scrape" = "true" + "prometheus.io/port" = "3903" + "prometheus.io/path" = "/metrics" + } } spec { From 363c2ac3b31aaf3b281eb9143b62c56aec69e842 Mon Sep 17 00:00:00 2001 From: khatrivarun Date: Fri, 27 Feb 2026 09:09:59 +0530 Subject: [PATCH 2/4] feat(garage): netpol update to allow for scrapes --- modules/garage/networkpolicy.tf | 24 +++++++++++++++++++++++- modules/garage/variables.tf | 6 ++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/modules/garage/networkpolicy.tf b/modules/garage/networkpolicy.tf index 7a6dcbd..833d017 100644 --- a/modules/garage/networkpolicy.tf +++ b/modules/garage/networkpolicy.tf @@ -99,8 +99,30 @@ resource "kubernetes_network_policy" "garage_network_access_policy" { port = 3943 } } + + # Rule 5: Allow OpenTelemetry Collector to scrape Garage metrics + ingress { + from { + namespace_selector { + match_labels = { + "kubernetes.io/metadata.name" = var.observability_namespace + } + } - # -------------- INGRESS RULES -------------- # + pod_selector { + match_labels = { + "app.kubernetes.io/instance" = "otel-collector" + } + } + } + + ports { + protocol = "TCP" + port = 3903 + } + } + + # -------------- EGRESS RULES -------------- # # Rule 1: Allow egress to other Garage pods egress { to { diff --git a/modules/garage/variables.tf b/modules/garage/variables.tf index 6be7dab..1f6f8ed 100644 --- a/modules/garage/variables.tf +++ b/modules/garage/variables.tf @@ -24,6 +24,12 @@ variable "namespace" { default = "garage" } +variable "observability_namespace" { + description = "Namespace where all components for observability are deployed" + type = string + nullable = false +} + # --------------- GARAGE CERTIFICATE VARIABLES --------------- # variable "cluster_issuer_name" { description = "Name for the Cluster Issuer to be used to generate internal self signed certificates" From 3eabbd96db7e083ea1c0a0bef6566683f8819e03 Mon Sep 17 00:00:00 2001 From: khatrivarun Date: Fri, 27 Feb 2026 09:10:12 +0530 Subject: [PATCH 3/4] feat(garage): garage configured for scrapes --- infrastructure/main.tf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/infrastructure/main.tf b/infrastructure/main.tf index e23fb19..efac43a 100644 --- a/infrastructure/main.tf +++ b/infrastructure/main.tf @@ -22,7 +22,7 @@ module "cluster-issuer" { # Complete Observability Stack Deployment module "observability" { - source = "git::https://github.com/necro-cloud/modules//modules/observability?ref=main" + source = "git::https://github.com/necro-cloud/modules//modules/observability?ref=task/93/garage-dashboards" // Certificates Details cluster_issuer_name = module.cluster-issuer.cluster-issuer-name @@ -35,7 +35,7 @@ module "observability" { # Garage Deployment for an S3 compatible object storage solution module "garage" { - source = "git::https://github.com/necro-cloud/modules//modules/garage?ref=main" + source = "git::https://github.com/necro-cloud/modules//modules/garage?ref=task/93/garage-dashboards" // Certificates Details cluster_issuer_name = module.cluster-issuer.cluster-issuer-name @@ -46,6 +46,9 @@ module "garage" { // Granting required namespaces access to the Garage cluster access_namespaces = "postgres,ferret" + // Observability details + observability_namespace = module.observability.observability_namespace + // Configuring required configurations on the Garage Cluster required_buckets = var.garage_required_buckets required_access_keys = var.garage_required_access_keys From 795e05e0df4df6ccee6e622fc94769b5d85e38b1 Mon Sep 17 00:00:00 2001 From: khatrivarun Date: Fri, 27 Feb 2026 10:58:07 +0530 Subject: [PATCH 4/4] feat(observability): garage dashboard deployment --- modules/observability/dashboards/garage.json | 1431 ++++++++++++++++++ modules/observability/grafana.tf | 22 +- 2 files changed, 1450 insertions(+), 3 deletions(-) create mode 100644 modules/observability/dashboards/garage.json diff --git a/modules/observability/dashboards/garage.json b/modules/observability/dashboards/garage.json new file mode 100644 index 0000000..4a9cfa5 --- /dev/null +++ b/modules/observability/dashboards/garage.json @@ -0,0 +1,1431 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 0, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "Garage Health", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "builder", + "expr": "min(cluster_healthy{namespace=\"$namespace\", pod=~\"$pod\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cluster Health", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "max(cluster_connected_nodes{namespace=~\"$namespace\", pod=~\"$pod\"})", + "hide": false, + "legendFormat": "Number of Online Nodes", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "max(cluster_storage_nodes{namespace=~\"$namespace\", pod=~\"$pod\"})", + "hide": false, + "instant": false, + "legendFormat": "Number of Storage Nodes", + "range": true, + "refId": "B" + } + ], + "title": "Active vs Expected Nodes", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "% Nodes Online", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "Number of Online Nodes" + } + }, + "operator": "/", + "right": { + "matcher": { + "id": "byName", + "options": "Number of Storage Nodes" + } + } + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 4, + "options": { + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "max(block_resync_errored_blocks{namespace=~\"$namespace\", pod=~\"$pod\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Errored Blocks", + "type": "bargauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [], + "title": "Capacity & Hardware", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 75 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 6, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "100 - (sum by(pod) (garage_local_disk_avail{volume=\"data\", namespace=~\"$namespace\", pod=~\"$pod\"}) / sum by(pod) (garage_local_disk_total{volume=\"data\", namespace=~\"$namespace\", pod=~\"$pod\"}) * 100)", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Data Volume Disk Usage (%)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "sum by(pod) (rate(block_bytes_read_total{namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "legendFormat": "Reads - {{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "-sum by(pod) (rate(block_bytes_written_total{namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "hide": false, + "instant": false, + "legendFormat": "Writes - {{pod}}", + "range": true, + "refId": "B" + } + ], + "title": "Disk I/O Throughput", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 8, + "panels": [], + "title": "S3 API Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (api_endpoint) (rate(api_s3_request_counter_total{namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "legendFormat": "{{api_endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "S3 Requests Per Second (RPS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, api_endpoint) (rate(api_s3_request_duration_bucket{namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", + "legendFormat": "{{api_endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "S3 p99 Latency (True Speed)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 11, + "panels": [], + "title": "Garage Internals & Backpressure", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "kbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "min by(pod) (block_ram_buffer_free_kb{namespace=~\"$namespace\", pod=~\"$pod\"})", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "RAM Buffer Free (Backpressure Warning)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "sum by(table_name) (table_gc_todo_queue_length{namespace=~\"$namespace\", pod=~\"$pod\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum by(table_name) (table_merkle_updater_todo_queue_length{namespace=~\"$namespace\", pod=~\"$pod\"})", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Metadata Queues", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 14, + "panels": [], + "title": "Node-to-Node RPC (Network Health)", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (rpc_endpoint) (rate(rpc_request_counter_total{namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "legendFormat": "{{rpc_endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC Requests Per Second (RPS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (rpc_endpoint) (rate(rpc_duration_sum{namespace=~\"$namespace\", pod=~\"$pod\"}[5m])) \n/ \nsum by (rpc_endpoint) (rate(rpc_duration_count{namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "legendFormat": "Avg Latency: {{rpc_endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "Average RPC Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le, rpc_endpoint) (rate(rpc_duration_bucket{namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", + "legendFormat": "p99: {{rpc_endpoint}}", + "range": true, + "refId": "A" + } + ], + "title": "p99 RPC Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 15, + "options": { + "cellHeight": "sm", + "showHeader": true + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "editorMode": "code", + "expr": "max by(id) (cluster_layout_node_disconnected_time{namespace=~\"$namespace\", pod=~\"$pod\"}) > 0", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Disconnected Node Timer", + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 19, + "panels": [], + "title": "Logging", + "type": "row" + }, + { + "datasource": { + "type": "victoriametrics-logs-datasource", + "uid": "PD775F2863313E6C7" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 20, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "enableLogDetails": true, + "showControls": true, + "showTime": false, + "sortOrder": "Descending", + "syntaxHighlighting": true, + "wrapLogMessage": true + }, + "pluginVersion": "12.3.3", + "targets": [ + { + "datasource": { + "type": "victoriametrics-logs-datasource", + "uid": "PD775F2863313E6C7" + }, + "editorMode": "code", + "expr": "part-of: \"garage\" AND k8s.pod.name: $pod", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Logging", + "type": "logs" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "garage", + "value": "garage" + }, + "definition": "label_values(garage_build_info,namespace)", + "description": "Namespace where the Garage Cluster lives", + "label": "Namespace", + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(garage_build_info,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "garage", + "value": "garage" + }, + "definition": "label_values(garage_build_info,pod)", + "description": "Name of the Garage Cluster", + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(garage_build_info,pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "/^(.*?)(?:-[0-9]+)$/", + "type": "query" + }, + { + "current": { + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "definition": "label_values(garage_build_info{namespace=\"$namespace\", pod=~\"^$cluster-.*\"},pod)", + "description": "Name of the Pod in the cluster", + "includeAll": true, + "label": "Pod Name", + "multi": true, + "name": "pod", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(garage_build_info{namespace=\"$namespace\", pod=~\"^$cluster-.*\"},pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Garage S3 Dashboard", + "uid": "obf7jq7", + "version": 2 +} \ No newline at end of file diff --git a/modules/observability/grafana.tf b/modules/observability/grafana.tf index 11a1643..63a6fae 100644 --- a/modules/observability/grafana.tf +++ b/modules/observability/grafana.tf @@ -113,8 +113,19 @@ resource "helm_release" "grafana" { disableDeletion = false editable = true options = { - path = "/var/lib/grafana/dashboards/default" - } + path = "/var/lib/grafana/dashboards/psql" + }, + }, + { + name = "Garage S3 Object Storage Dashboard" + orgId = 1 + folder = "Object Storage Dashboards" + type = "file" + disableDeletion = false + editable = true + options = { + path = "/var/lib/grafana/dashboards/garage" + }, } ] } @@ -122,11 +133,16 @@ resource "helm_release" "grafana" { // Injecting the Dashboard JSON file into the Grafana container dashboards = { - default = { + psql = { postgres-dashboard = { json = file("${path.module}/dashboards/postgresql.json") } } + garage = { + garage-dashboard = { + json = file("${path.module}/dashboards/garage.json") + } + } } affinity = {