From 1f53cf11903134263790cb537fe2b2340b51dc98 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Fri, 5 Jun 2026 13:31:58 -0500 Subject: [PATCH 1/3] USHIFT-6951: add node-exporter Kubernetes manifests Co-Authored-By: Claude Opus 4.6 --- .../optional/node-exporter/00-namespace.yaml | 9 + .../01-cluster-role-binding.yaml | 18 ++ .../node-exporter/01-cluster-role.yaml | 31 +++ .../01-security-context-constraints.yaml | 22 ++ .../node-exporter/01-service-account.yaml | 12 ++ .../02-accelerators-collector-configmap.yaml | 139 +++++++++++++ .../02-kube-rbac-proxy-secret.yaml | 17 ++ .../optional/node-exporter/03-daemonset.yaml | 191 ++++++++++++++++++ assets/optional/node-exporter/04-service.yaml | 24 +++ .../node-exporter/kustomization.aarch64.yaml | 7 + .../node-exporter/kustomization.x86_64.yaml | 7 + .../optional/node-exporter/kustomization.yaml | 12 ++ .../release-node-exporter-aarch64.json | 8 + .../release-node-exporter-x86_64.json | 8 + 14 files changed, 505 insertions(+) create mode 100644 assets/optional/node-exporter/00-namespace.yaml create mode 100644 assets/optional/node-exporter/01-cluster-role-binding.yaml create mode 100644 assets/optional/node-exporter/01-cluster-role.yaml create mode 100644 assets/optional/node-exporter/01-security-context-constraints.yaml create mode 100644 assets/optional/node-exporter/01-service-account.yaml create mode 100644 assets/optional/node-exporter/02-accelerators-collector-configmap.yaml create mode 100644 assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml create mode 100644 assets/optional/node-exporter/03-daemonset.yaml create mode 100644 assets/optional/node-exporter/04-service.yaml create mode 100644 assets/optional/node-exporter/kustomization.aarch64.yaml create mode 100644 assets/optional/node-exporter/kustomization.x86_64.yaml create mode 100644 assets/optional/node-exporter/kustomization.yaml create mode 100644 assets/optional/node-exporter/release-node-exporter-aarch64.json create mode 100644 assets/optional/node-exporter/release-node-exporter-x86_64.json diff --git a/assets/optional/node-exporter/00-namespace.yaml b/assets/optional/node-exporter/00-namespace.yaml new file mode 100644 index 0000000000..17f727565a --- /dev/null +++ b/assets/optional/node-exporter/00-namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-monitoring + labels: + name: openshift-monitoring + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged diff --git a/assets/optional/node-exporter/01-cluster-role-binding.yaml b/assets/optional/node-exporter/01-cluster-role-binding.yaml new file mode 100644 index 0000000000..b6790fa9b4 --- /dev/null +++ b/assets/optional/node-exporter/01-cluster-role-binding.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-exporter +subjects: +- kind: ServiceAccount + name: node-exporter + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/01-cluster-role.yaml b/assets/optional/node-exporter/01-cluster-role.yaml new file mode 100644 index 0000000000..50d7a5e755 --- /dev/null +++ b/assets/optional/node-exporter/01-cluster-role.yaml @@ -0,0 +1,31 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - security.openshift.io + resourceNames: + - node-exporter + resources: + - securitycontextconstraints + verbs: + - use diff --git a/assets/optional/node-exporter/01-security-context-constraints.yaml b/assets/optional/node-exporter/01-security-context-constraints.yaml new file mode 100644 index 0000000000..1caaf72fcd --- /dev/null +++ b/assets/optional/node-exporter/01-security-context-constraints.yaml @@ -0,0 +1,22 @@ +allowHostDirVolumePlugin: true +allowHostNetwork: true +allowHostPID: true +allowHostPorts: true +allowPrivilegedContainer: true +apiVersion: security.openshift.io/v1 +kind: SecurityContextConstraints +metadata: + annotations: + kubernetes.io/description: node-exporter scc is used for the Prometheus node exporter + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter +readOnlyRootFilesystem: false +runAsUser: + type: RunAsAny +seLinuxContext: + type: RunAsAny +seccompProfiles: +- runtime/default +users: [] diff --git a/assets/optional/node-exporter/01-service-account.yaml b/assets/optional/node-exporter/01-service-account.yaml new file mode 100644 index 0000000000..c3d1dc95c9 --- /dev/null +++ b/assets/optional/node-exporter/01-service-account.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml b/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml new file mode 100644 index 0000000000..05984b7fb9 --- /dev/null +++ b/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml @@ -0,0 +1,139 @@ +apiVersion: v1 +data: + config.yaml: |- + - "models": + - "modelName": "NVIDIA A800 PCIe 80GB" + "pciID": "0x20f5" + - "modelName": "NVIDIA A800 40GB PCIe active cooled" + "pciID": "0x20f6" + - "modelName": "NVIDIA AX800" + "pciID": "0x20fd" + - "modelName": "NVIDIA A100 PCIe 40GB" + "pciID": "0x20f1" + - "modelName": "NVIDIA A100 PCIe 80GB" + "pciID": "0x20b5" + - "modelName": "NVIDIA A40" + "pciID": "0x2235" + - "modelName": "NVIDIA A30" + "pciID": "0x20b7" + - "modelName": "NVIDIA A10" + "pciID": "0x2236" + - "modelName": "NVIDIA A16" + "pciID": "0x25b6" + - "modelName": "H800 NVL" + "pciID": "0x2322" + - "modelName": "NVIDIA H100 NVL" + "pciID": "0x2321" + - "modelName": "NVIDIA H100 PCIe 80GB" + "pciID": "0x2331" + - "modelName": "NVIDIA L40" + "pciID": "0x26b5" + - "modelName": "NVIDIA L40S" + "pciID": "0x26b9" + - "modelName": "NVIDIA L20 liquid cooled" + "pciID": "0x26bA" + - "modelName": "NVIDIA L4" + "pciID": "0x27b8" + - "modelName": "NVIDIA L2" + "pciID": "0x27b6" + - "modelName": "NVIDIA RTX 6000 Ada" + "pciID": "0x26b1" + - "modelName": "NVIDIA RTX 5880 Ada" + "pciID": "0x26b3" + - "modelName": "NVIDIA RTX 5000 Ada" + "pciID": "0x2231" + - "modelName": "NVIDIA RTX A6000" + "pciID": "0x2230" + - "modelName": "NVIDIA RTX A5500" + "pciID": "0x2233" + - "modelName": "NVIDIA RTX 8000 passive" + "pciID": "0x1e30" + - "modelName": "NVIDIA RTX A2000" + "pciID": "0x2531" + - "modelName": "NVIDIA A100 SXM4 40GB" + "pciID": "0x20b0" + - "modelName": "NVIDIA H800 NVL" + "pciID": "0x233a" + - "modelName": "NVIDIA H200 NVL" + "pciID": "0x233b" + - "modelName": "NVIDIA A100 SXM4 80GB" + "pciID": "0x20b2" + - "modelName": "NVIDIA A100 SXM 64GB" + "pciID": "0x20b3" + - "modelName": "NVIDIA A800 SXM4 40GB" + "pciID": "0x20bd" + - "modelName": "NVIDIA A800 SXM4 80GB" + "pciID": "0x20f3" + - "modelName": "NVIDIA RTX A1000" + "pciID": "0x25b0" + - "modelName": "Blackwell RTX PRO 6000" + "pciID": "0x2bb5" + - "modelName": "Blackwell GB100" + "pciID": "0x2941" + "vendorID": "0x10de" + "vendorName": "NVIDIA" + - "models": + - "modelName": "AMD MI210" + "pciID": "0x740f" + - "modelName": "AMD MI250" + "pciID": "0x740c" + - "modelName": "AMD MI250X" + "pciID": "0x7408" + - "modelName": "AMD MI300" + "pciID": "0x74a0" + - "modelName": "AMD MI300X" + "pciID": "0x74a1" + - "modelName": "AMD MI325X" + "pciID": "0x74a5" + - "modelName": "AMD MI308X" + "pciID": "0x7aa2" + - "modelName": "AMD MI300X VF" + "pciID": "0x74b5" + - "modelName": "AMD MI210 VF" + "pciID": "0x7410" + "vendorID": "0x1002" + "vendorName": "AMD" + - "models": + - "modelName": "Gaudi 1" + "pciID": "0x1000" + - "modelName": "Gaudi 2" + "pciID": "0x1020" + "vendorID": "0x1da3" + "vendorName": "GAUDI" + - "models": + - "modelName": "Intel Data Center GPU Max 1550" + "pciID": "0x0bd5" + - "modelName": "Intel Data Center GPU Max 1100" + "pciID": "0x0bda" + - "modelName": "Intel Data Center GPU Flex 170" + "pciID": "0x56c0" + - "modelName": "Intel Data Center GPU Flex 140" + "pciID": "0x56c1" + - "modelName": "Intel IPU Data Path" + "pciID": "0x1452" + "vendorID": "0x8086" + "vendorName": "Intel" + - "models": + - "modelName": "Qualcomm AI 100" + "pciID": "0xa100" + - "modelName": "Qualcomm AI 80" + "pciID": "0xa080" + "vendorID": "0x17cb" + "vendorName": "Qualcomm" + - "models": + - "modelName": "Marvell OCTEON 10 CN10XXX" + "pciID": "0xb900" + "vendorID": "0x177d" + "vendorName": "Marvell" + - "models": + - "modelName": "BlueField-3 integrated ConnectX-7" + "pciID": "0xa2dc" + "vendorID": "0x15b3" + "vendorName": "Mellanox" +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter-accelerators-collector-config + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml b/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml new file mode 100644 index 0000000000..e02d0bb40b --- /dev/null +++ b/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter-kube-rbac-proxy-config + namespace: openshift-monitoring +stringData: + config.yaml: |- + "authorization": + "static": + - "path": "/metrics" + "resourceRequest": false + "verb": "get" +type: Opaque diff --git a/assets/optional/node-exporter/03-daemonset.yaml b/assets/optional/node-exporter/03-daemonset.yaml new file mode 100644 index 0000000000..25fdbb546a --- /dev/null +++ b/assets/optional/node-exporter/03-daemonset.yaml @@ -0,0 +1,191 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/enable-ds-eviction: "false" + kubectl.kubernetes.io/default-container: node-exporter + openshift.io/required-scc: node-exporter + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + spec: + automountServiceAccountToken: true + containers: + - args: + - --web.listen-address=127.0.0.1:9101 + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --path.procfs=/host/root/proc + - --path.udev.data=/host/root/run/udev/data + - --no-collector.wifi + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^.*$ + - --collector.netdev.device-exclude=^.*$ + - --collector.cpu.info + - --collector.textfile.directory=/var/node_exporter/textfile + - --no-collector.btrfs + command: + - /bin/sh + - -c + - | + export GOMAXPROCS=4 + # We don't take CPU affinity into account as the container doesn't have integer CPU requests. + # In case of error, fallback to the default value. + NUM_CPUS=$(grep -c '^processor' "/proc/cpuinfo" 2>/dev/null || echo "0") + if [ "$NUM_CPUS" -lt "$GOMAXPROCS" ]; then + export GOMAXPROCS="$NUM_CPUS" + fi + echo "ts=$(date --iso-8601=seconds) num_cpus=$NUM_CPUS gomaxprocs=$GOMAXPROCS" + exec /bin/node_exporter "$0" "$@" + env: + - name: DBUS_SYSTEM_BUS_ADDRESS + value: unix:path=/host/root/var/run/dbus/system_bus_socket + image: quay.io/openshift/node-exporter + name: node-exporter + resources: + requests: + cpu: 8m + memory: 32Mi + securityContext: {} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + - mountPath: /var/node_exporter/textfile + name: node-exporter-textfile + readOnly: true + - mountPath: /var/node_exporter/accelerators_collector_config + name: node-exporter-accelerators-collector-config + readOnly: true + workingDir: /var/node_exporter/textfile + - args: + - --secure-listen-address=0.0.0.0:9100 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 + - --upstream=http://127.0.0.1:9101/ + - --tls-cert-file=/etc/tls/private/tls.crt + - --tls-private-key-file=/etc/tls/private/tls.key + - --config-file=/etc/kube-rbac-policy/config.yaml + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay.io/openshift/kube-rbac-proxy + name: kube-rbac-proxy + ports: + - containerPort: 9100 + hostPort: 9100 + name: https + resources: + requests: + cpu: 1m + memory: 15Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: node-exporter-tls + readOnly: true + - mountPath: /etc/kube-rbac-policy + name: node-exporter-kube-rbac-proxy-config + readOnly: true + hostNetwork: true + hostPID: true + initContainers: + - command: + - /bin/sh + - -c + - '[[ ! -d /node_exporter/collectors/init ]] || find /node_exporter/collectors/init -perm /111 -type f -exec {} \;' + env: + - name: TMPDIR + value: /tmp + image: quay.io/openshift/node-exporter + name: init-textfile + resources: + requests: + cpu: 1m + memory: 1Mi + securityContext: + privileged: true + runAsUser: 0 + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/node_exporter/textfile + name: node-exporter-textfile + readOnly: false + - mountPath: /var/log/wtmp + name: node-exporter-wtmp + readOnly: true + workingDir: /var/node_exporter/textfile + nodeSelector: + kubernetes.io/os: linux + priorityClassName: system-cluster-critical + securityContext: {} + serviceAccountName: node-exporter + tolerations: + - operator: Exists + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root + - emptyDir: {} + name: node-exporter-textfile + - name: node-exporter-tls + secret: + secretName: node-exporter-tls + - hostPath: + path: /var/log/wtmp + type: File + name: node-exporter-wtmp + - name: node-exporter-kube-rbac-proxy-config + secret: + secretName: node-exporter-kube-rbac-proxy-config + - configMap: + items: + - key: config.yaml + path: config.yaml + name: node-exporter-accelerators-collector-config + name: node-exporter-accelerators-collector-config + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate diff --git a/assets/optional/node-exporter/04-service.yaml b/assets/optional/node-exporter/04-service.yaml new file mode 100644 index 0000000000..37b420ccdb --- /dev/null +++ b/assets/optional/node-exporter/04-service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + openshift.io/description: Expose the `/metrics` endpoint on port 9100. This port is for internal use, and no other usage is guaranteed. + service.beta.openshift.io/serving-cert-secret-name: node-exporter-tls + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9100 + targetPort: https + selector: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring diff --git a/assets/optional/node-exporter/kustomization.aarch64.yaml b/assets/optional/node-exporter/kustomization.aarch64.yaml new file mode 100644 index 0000000000..ebef723822 --- /dev/null +++ b/assets/optional/node-exporter/kustomization.aarch64.yaml @@ -0,0 +1,7 @@ +images: + - name: quay.io/openshift/node-exporter + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:acec7a35ba736689009c0e1cca62af79f798881559863d18ce4852dca3fb346f + - name: quay.io/openshift/kube-rbac-proxy + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:0d6a1c6ebba722e09ff2850010cb8114a8d097ccee1198c1f59680c8c7581d48 diff --git a/assets/optional/node-exporter/kustomization.x86_64.yaml b/assets/optional/node-exporter/kustomization.x86_64.yaml new file mode 100644 index 0000000000..5657782d55 --- /dev/null +++ b/assets/optional/node-exporter/kustomization.x86_64.yaml @@ -0,0 +1,7 @@ +images: + - name: quay.io/openshift/node-exporter + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:af33972b8dddfcbefffacb5b24b5c65f27eacc5a74a5a647d0099373f4b2f02d + - name: quay.io/openshift/kube-rbac-proxy + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:3b2676dd92a952c620e067cc158c2a0942c602471645d8e367104293cb964147 diff --git a/assets/optional/node-exporter/kustomization.yaml b/assets/optional/node-exporter/kustomization.yaml new file mode 100644 index 0000000000..20b41b0f31 --- /dev/null +++ b/assets/optional/node-exporter/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - 00-namespace.yaml + - 01-service-account.yaml + - 01-cluster-role.yaml + - 01-cluster-role-binding.yaml + - 01-security-context-constraints.yaml + - 02-kube-rbac-proxy-secret.yaml + - 02-accelerators-collector-configmap.yaml + - 03-daemonset.yaml + - 04-service.yaml diff --git a/assets/optional/node-exporter/release-node-exporter-aarch64.json b/assets/optional/node-exporter/release-node-exporter-aarch64.json new file mode 100644 index 0000000000..c1ca6adbe1 --- /dev/null +++ b/assets/optional/node-exporter/release-node-exporter-aarch64.json @@ -0,0 +1,8 @@ +{ + "release": { + "base": "placeholder" + }, + "images": { + "node_exporter": "quay.io/openshift-release-dev/ocp-v5.0-art-dev@sha256:acec7a35ba736689009c0e1cca62af79f798881559863d18ce4852dca3fb346f" + } +} diff --git a/assets/optional/node-exporter/release-node-exporter-x86_64.json b/assets/optional/node-exporter/release-node-exporter-x86_64.json new file mode 100644 index 0000000000..1efe8ad5e3 --- /dev/null +++ b/assets/optional/node-exporter/release-node-exporter-x86_64.json @@ -0,0 +1,8 @@ +{ + "release": { + "base": "placeholder" + }, + "images": { + "node_exporter": "quay.io/openshift-release-dev/ocp-v5.0-art-dev@sha256:af33972b8dddfcbefffacb5b24b5c65f27eacc5a74a5a647d0099373f4b2f02d" + } +} From af9a16f689db0ae273dc2b655e5146eb22fdaa33 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Fri, 5 Jun 2026 13:32:04 -0500 Subject: [PATCH 2/3] USHIFT-6951: register node-exporter healthcheck Co-Authored-By: Claude Opus 4.6 --- .../microshift_optional_workloads.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pkg/healthcheck/microshift_optional_workloads.go b/pkg/healthcheck/microshift_optional_workloads.go index 80e2d9a3b0..7d21061934 100644 --- a/pkg/healthcheck/microshift_optional_workloads.go +++ b/pkg/healthcheck/microshift_optional_workloads.go @@ -38,6 +38,21 @@ var optionalWorkloadPaths = map[string]optionalWorkloads{ Namespace: "sriov-network-operator", Workloads: NamespaceWorkloads{Deployments: []string{"sriov-network-operator"}}, }, + + "/usr/lib/microshift/manifests.d/082-microshift-node-exporter": { + Namespace: "openshift-monitoring", + Workloads: NamespaceWorkloads{DaemonSets: []string{"node-exporter"}}, + }, +} + +// mergeWorkloads merges two NamespaceWorkloads, returning a new NamespaceWorkloads. This is helpful for cases +// where components from multiple sources are deployed to the same namespace. +func mergeWorkloads(existing, incoming NamespaceWorkloads) NamespaceWorkloads { + return NamespaceWorkloads{ + Deployments: append(existing.Deployments, incoming.Deployments...), + DaemonSets: append(existing.DaemonSets, incoming.DaemonSets...), + StatefulSets: append(existing.StatefulSets, incoming.StatefulSets...), + } } // fillOptionalMicroShiftWorkloads assembles list of optional MicroShift workloads @@ -73,7 +88,7 @@ func fillOptionalMicroShiftWorkloads(workloadsToCheck map[string]NamespaceWorklo } klog.Infof("Optional component path exists and is configured: %s - expecting %v in namespace %q", path, ow.Workloads.String(), ow.Namespace) - workloadsToCheck[ow.Namespace] = ow.Workloads + workloadsToCheck[ow.Namespace] = mergeWorkloads(workloadsToCheck[ow.Namespace], ow.Workloads) } return nil } From 7b65aac5c10289b6ce0149ba8a7cbb8ff0b4550e Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Fri, 5 Jun 2026 13:32:19 -0500 Subject: [PATCH 3/3] USHIFT-6951: package node-exporter RPM and observability integration Co-Authored-By: Claude Opus 4.6 --- .../microshift-metrics-node-exporter.yaml | 19 +++++++ packaging/rpm/microshift.spec | 54 +++++++++++++++++++ scripts/auto-rebase/assets.yaml | 22 ++++++++ test/bin/common.sh | 1 + 4 files changed, 96 insertions(+) create mode 100644 packaging/observability/otelcol.d/microshift-metrics-node-exporter.yaml diff --git a/packaging/observability/otelcol.d/microshift-metrics-node-exporter.yaml b/packaging/observability/otelcol.d/microshift-metrics-node-exporter.yaml new file mode 100644 index 0000000000..41534198b2 --- /dev/null +++ b/packaging/observability/otelcol.d/microshift-metrics-node-exporter.yaml @@ -0,0 +1,19 @@ +receivers: + prometheus/node_exporter: + config: + scrape_configs: + - job_name: node-exporter + scrape_interval: 30s + scheme: https + tls_config: + ca_file: /var/lib/microshift/certs/service-ca/ca.crt + server_name: node-exporter.openshift-monitoring.svc + static_configs: + - targets: ["127.0.0.1:9100"] + +service: + pipelines: + metrics/node_exporter: + receivers: [prometheus/node_exporter] + processors: [batch] + exporters: [otlp] diff --git a/packaging/rpm/microshift.spec b/packaging/rpm/microshift.spec index 6362e4f552..9a21a82ecf 100644 --- a/packaging/rpm/microshift.spec +++ b/packaging/rpm/microshift.spec @@ -236,6 +236,7 @@ and can be used to embed those images into osbuilder blueprints or bootc contain Summary: OpenTelemetry-Collector configured for MicroShift BuildArch: noarch Requires: microshift = %{version} +Requires: microshift-metrics-node-exporter = %{version} Requires: opentelemetry-collector %description observability @@ -261,6 +262,25 @@ The microshift-cert-manager-release-info package provides release information fi release. These files contain the list of container image references used by Cert Manager and can be used to embed those images into osbuilder blueprints or bootc containerfiles. +%package metrics-node-exporter +Summary: Prometheus node-exporter for MicroShift +ExclusiveArch: x86_64 aarch64 +Requires: microshift = %{version} + +%description metrics-node-exporter +The microshift-metrics-node-exporter package provides the Prometheus node-exporter for MicroShift. +Install this package to expose host-level hardware and OS metrics. + +%package metrics-node-exporter-release-info +Summary: Release information for node-exporter for MicroShift +BuildArch: noarch +Requires: microshift-release-info = %{version} + +%description metrics-node-exporter-release-info +The microshift-metrics-node-exporter-release-info package provides release information files for this +release. These files contain the list of container image references used by node-exporter +and can be used to embed those images into osbuilder blueprints or bootc containerfiles. + %package sriov Summary: SR-IOV Network Operator for MicroShift ExclusiveArch: x86_64 aarch64 @@ -562,7 +582,9 @@ install -p -m644 assets/optional/ai-model-serving/release-ai-model-serving-x86_6 # observability install -d -m755 %{buildroot}/%{_sysconfdir}/microshift/observability +install -d -m755 %{buildroot}/%{_sysconfdir}/microshift/observability/otelcol.d install -p -m644 packaging/observability/*.yaml -D %{buildroot}%{_sysconfdir}/microshift/observability/ +install -p -m644 packaging/observability/otelcol.d/microshift-metrics-node-exporter.yaml %{buildroot}%{_sysconfdir}/microshift/observability/otelcol.d/ # Explicit copy of large config as default. Not using symlink to avoid accidental package upgrade overwriting user config if the user edits the config without copying (i.e. edits the target of symlink). install -p -m644 packaging/observability/opentelemetry-collector-large.yaml -D %{buildroot}%{_sysconfdir}/microshift/observability/opentelemetry-collector.yaml install -p -m644 packaging/observability/microshift-observability.service %{buildroot}%{_unitdir}/ @@ -599,6 +621,29 @@ cat assets/optional/cert-manager/manager/images-x86_64.yaml >> %{buildroot}/%{_p mkdir -p -m755 %{buildroot}%{_datadir}/microshift/release install -p -m644 assets/optional/cert-manager/release-cert-manager-{x86_64,aarch64}.json %{buildroot}%{_datadir}/microshift/release/ +# node-exporter +install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/00-namespace.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-service-account.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-cluster-role.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-cluster-role-binding.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-security-context-constraints.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/02-accelerators-collector-configmap.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/03-daemonset.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/04-service.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/kustomization.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter + +%ifarch %{arm} aarch64 +cat assets/optional/node-exporter/kustomization.aarch64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/kustomization.yaml +%endif +%ifarch x86_64 +cat assets/optional/node-exporter/kustomization.x86_64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/kustomization.yaml +%endif + +# node-exporter-release-info +install -p -m644 assets/optional/node-exporter/release-node-exporter-{x86_64,aarch64}.json %{buildroot}%{_datadir}/microshift/release/ + # sriov install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/070-microshift-sriov install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/070-microshift-sriov/crd @@ -790,10 +835,12 @@ fi %files observability %dir %{_prefix}/lib/microshift/manifests.d/003-microshift-observability %dir %{_sysconfdir}/microshift/observability/ +%dir %{_sysconfdir}/microshift/observability/otelcol.d %{_unitdir}/microshift-observability.service %config(noreplace) %{_sysconfdir}/microshift/observability/opentelemetry-collector.yaml %{_sysconfdir}/microshift/observability/opentelemetry-collector-*.yaml %{_prefix}/lib/microshift/manifests.d/003-microshift-observability/* +%config(noreplace) %{_sysconfdir}/microshift/observability/otelcol.d/microshift-metrics-node-exporter.yaml %files cert-manager %dir %{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager @@ -802,6 +849,13 @@ fi %files cert-manager-release-info %{_datadir}/microshift/release/release-cert-manager-{x86_64,aarch64}.json +%files metrics-node-exporter +%dir %{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/* + +%files metrics-node-exporter-release-info +%{_datadir}/microshift/release/release-node-exporter-{x86_64,aarch64}.json + %files sriov %dir %{_prefix}/lib/microshift/manifests.d/070-microshift-sriov %dir %{_prefix}/lib/microshift/manifests.d/070-microshift-sriov/crd diff --git a/scripts/auto-rebase/assets.yaml b/scripts/auto-rebase/assets.yaml index b4f34d3f6c..b7dd816111 100644 --- a/scripts/auto-rebase/assets.yaml +++ b/scripts/auto-rebase/assets.yaml @@ -301,6 +301,28 @@ assets: - file: service.yaml - file: serviceaccount.yaml + - dir: optional/node-exporter/ + ignore: "MicroShift-specific node-exporter manifests sourced from CMO" + files: + - file: 00-namespace.yaml + - file: 01-cluster-role-binding.yaml + - file: 01-cluster-role.yaml + - file: 01-security-context-constraints.yaml + - file: 01-service-account.yaml + - file: 02-accelerators-collector-configmap.yaml + - file: 02-kube-rbac-proxy-secret.yaml + - file: 03-daemonset.yaml + - file: 04-service.yaml + - file: kustomization.yaml + - file: kustomization.x86_64.yaml + ignore: "gets generated during image rebase" + - file: kustomization.aarch64.yaml + ignore: "gets generated during image rebase" + - file: release-node-exporter-x86_64.json + ignore: "gets generated during image rebase" + - file: release-node-exporter-aarch64.json + ignore: "gets generated during image rebase" + - dir: optional/observability/ ignore: "they don't exist in upstream repository - only in microshift" files: diff --git a/test/bin/common.sh b/test/bin/common.sh index ef682a676f..cd9d5d6fa1 100644 --- a/test/bin/common.sh +++ b/test/bin/common.sh @@ -388,6 +388,7 @@ MICROSHIFT_Y2_OPTIONAL_RPMS_LIST=( microshift-cert-manager-release-info microshift-sriov microshift-sriov-release-info + microshift-metrics-node-exporter ) MICROSHIFT_Y1_OPTIONAL_RPMS_LIST=( "${MICROSHIFT_Y2_OPTIONAL_RPMS_LIST[@]}"