From dec18890e323d1b261d91ed249e4aec72dfa86de Mon Sep 17 00:00:00 2001 From: Daniel Pawlik Date: Wed, 18 Feb 2026 14:18:50 +0100 Subject: [PATCH] [os_must_gather] Continue getting pod resources when metrics available On some deployments especially on CRC, where we don't enable openshift-monitoring, the log files are empty. In that case, those tasks can be just skipped. Signed-off-by: Daniel Pawlik --- roles/os_must_gather/tasks/get_top.yml | 73 ++++++++++++++++++++++++++ roles/os_must_gather/tasks/main.yml | 38 +------------- 2 files changed, 75 insertions(+), 36 deletions(-) create mode 100644 roles/os_must_gather/tasks/get_top.yml diff --git a/roles/os_must_gather/tasks/get_top.yml b/roles/os_must_gather/tasks/get_top.yml new file mode 100644 index 0000000000..1609392bbf --- /dev/null +++ b/roles/os_must_gather/tasks/get_top.yml @@ -0,0 +1,73 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: Get all containers usage - sort by cpu + ansible.builtin.shell: > + oc adm top pods + --all-namespaces + --sort-by=cpu + --containers > + {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-cpu-top.log + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" + register: _top_pods_cpu + ignore_errors: true # noqa: ignore-errors + +- name: Print message when metrics are not available + when: + - _top_pods_cpu.rc != 0 + - "'not available' not in _top_pods_cpu.stderr" + - "'not available' not in _top_pods_cpu.stdout" + ansible.builtin.debug: + msg: | + Probably metrics are not available. Can not collect logs + Exact error is {{ _top_pods_cpu.stderr }} + +- name: Continue collecting resource usage when metrics available + when: + - _top_pods_cpu.rc == 0 + environment: + KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" + block: + - name: Get all containers usage - sort by memory + ansible.builtin.shell: > + oc adm top pods + --all-namespaces + --sort-by=memory + --containers > + {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-memory-top.log + + - name: Find all namespaces directories + ansible.builtin.find: + paths: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces" + file_type: directory + depth: 1 + register: _os_gather_namespaces + + - name: Get resource usage by pods per namespace + when: _os_gather_namespaces.files | length > 1 + vars: + namespace_dir: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces/{{ _namespace_path.path | basename }}" + ansible.builtin.shell: > + oc adm top pods + -n {{ _namespace_path.path | basename }} > {{ namespace_dir }}/pods-top.log + loop: "{{ _os_gather_namespaces.files }}" + loop_control: + loop_var: _namespace_path + + - name: Get node resource usage + ansible.builtin.shell: | + oc adm top nodes > {{ cifmw_os_must_gather_output_log_dir }}/latest/openstack-nodes-top.log diff --git a/roles/os_must_gather/tasks/main.yml b/roles/os_must_gather/tasks/main.yml index 9d5622ab5a..d6b7f1a95c 100644 --- a/roles/os_must_gather/tasks/main.yml +++ b/roles/os_must_gather/tasks/main.yml @@ -102,42 +102,8 @@ state: link # Collect pod usage - - name: Find all namespaces directories - ansible.builtin.find: - paths: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces" - file_type: directory - depth: 1 - register: _os_gather_namespaces - - - name: Get resource usage by pods per namespace - when: _os_gather_namespaces.files | length > 1 - vars: - namespace_dir: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces/{{ _namespace_path.path | basename }}" - ansible.builtin.shell: | - oc adm top pods -n {{ _namespace_path.path | basename }} > {{ namespace_dir }}/pods-top.log - loop: "{{ _os_gather_namespaces.files }}" - loop_control: - loop_var: _namespace_path - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - - - name: Get node resource usage - ansible.builtin.shell: | - oc adm top nodes > {{ cifmw_os_must_gather_output_log_dir }}/latest/openstack-nodes-top.log - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - - - name: Get all containers usage - sort by cpu - ansible.builtin.shell: | - oc adm top pods --all-namespaces --sort-by=cpu --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-cpu-top.log - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" - - - name: Get all containers usage - sort by memory - ansible.builtin.shell: | - oc adm top pods --all-namespaces --sort-by=memory --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-memory-top.log - environment: - KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}" + - name: Collect pod usage + ansible.builtin.include_tasks: get_top.yml rescue: - name: Openstack-must-gather failure