From f26a0e1d6ab142a69fe5bb38cd54eea909839cbb Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Thu, 4 Dec 2025 13:13:05 +0000 Subject: [PATCH 1/7] Added new script validation for CSCwn37676 --- aci-preupgrade-validation-script.py | 106 ++++++ docs/docs/validations.md | 18 ++ .../bootx_firmware_tmp_check/infraWiNode.json | 62 ++++ .../test_bootx_firmware_tmp_check.py | 301 ++++++++++++++++++ 4 files changed, 487 insertions(+) create mode 100644 tests/checks/bootx_firmware_tmp_check/infraWiNode.json create mode 100644 tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index bfca5bb6..1dc5d9af 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5962,6 +5962,111 @@ def configpush_shard_check(tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) +@check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks') +def bootx_firmware_tmp_check(cversion, username, password, **kwargs): + result = PASS + headers = ["Pod", "Node", "File Count", "Fatal Errors Found", "Status"] + data = [] + recommended_action = 'Contact Cisco TAC to investigate all flagged high file and log counts' + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#bootx_service_failure_log_and_firmware_tmp_directory_checks' + + if not cversion: + return Result(result=MANUAL, msg="Current version not provided") + + + affected = False + if (not cversion.older_than("6.0(2f)") and not cversion.newer_than("6.0(8f)")) or \ + (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2f)")): + affected = True + + if not affected: + return Result(result=PASS, msg=VER_NOT_AFFECTED) + + + controller = icurl('class', 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))') + if not controller: + return Result(result=ERROR, msg="infraWiNode response empty. Is the cluster healthy?", doc_url=doc_url) + + print('') + checked_apics = {} + has_error = False + nodes_file_count_result = [] + nodes_fatal_errors_result = [] + + for apic in controller: + attr = apic['infraWiNode']['attributes'] + if attr['addr'] in checked_apics: + continue + checked_apics[attr['addr']] = 1 + pod_id = attr['podId'] + node_id = attr['id'] + node_name = attr['name'] + node_title = 'Checking %s...' % node_name + + try: + c = Connection(attr['addr']) + c.username = username + c.password = password + c.log = LOG_FILE + c.connect() + except Exception as e: + data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)]) + has_error = True + continue + + try: + # Check if /firmware/tmp directory exists and count files + c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0') + file_count = 0 + for line in c.output.strip().split('\n'): + line = line.strip() + if line.isdigit(): + file_count = int(line) + break + + # Check for fatal errors in bootx logs + c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0') + fatal_count = 0 + for line in c.output.strip().split('\n'): + line = line.strip() + if line.isdigit(): + fatal_count = int(line) + break + + # Determine status + if file_count >= 1000: + status = 'FAIL - High file count' + data.append([pod_id, node_id, str(file_count), str(fatal_count), status]) + result = FAIL_UF + nodes_file_count_result.append(result) + elif fatal_count > 0: + status = 'WARNING - Fatal errors found' + data.append([pod_id, node_id, str(file_count), str(fatal_count), status]) + if result == PASS: + result = MANUAL + nodes_fatal_errors_result.append(result) + + except Exception as e: + data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)]) + has_error = True + continue + + if FAIL_UF in nodes_file_count_result: + result = FAIL_UF + if MANUAL in nodes_fatal_errors_result: + result = MANUAL + if has_error and result == PASS: + result = ERROR + + return Result( + result=result, + headers=headers, + data=data, + recommended_action=recommended_action, + doc_url=doc_url, + ) + + # ---- Script Execution ---- @@ -6049,6 +6154,7 @@ class CheckManager: post_upgrade_cb_check, validate_32_64_bit_image_check, fabric_link_redundancy_check, + bootx_firmware_tmp_check, # Faults apic_disk_space_faults_check, diff --git a/docs/docs/validations.md b/docs/docs/validations.md index e395564f..c8ca2fab 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -191,6 +191,7 @@ Items | Defect | This Script [Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign: [ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign: [Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | +[Bootx Service failure log & firmware/tmp directory checks][d29] | CSCwn37676 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -220,6 +221,7 @@ Items | Defect | This Script [d26]: #stale-pconsra-object [d27]: #isis-dteps-byte-size [d28]: #policydist-configpushshardcont-crash +[d29]: #bootx_service_failure_log_and_firmware_tmp_directory_checks ## General Check Details @@ -2604,6 +2606,21 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade. +### Bootx Service failure log & firmware/tmp directory checks + +Due to [CSCwn37676][62], ACI runs on releases 6.0(2) through 6.0(8) or 6.1(1) through 6.1(2) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures. + +The script performs two validations on each APIC: + +1. Checks if `/firmware/tmp/` directory contains 1000 or more files +2. Searches for "fatal" errors in `/var/log/bootx/logs/` + +!!! warning + If this check fails, verify the bootx service status on the affected APIC(s) by running `systemctl status bootx`. If the service is not running, the APIC is already experiencing the issue and must be resolved before proceeding with the upgrade. + +!!! tip + Certain high churn logging configurations have been found to cause excessive files in `/firmware/tmp/while on non-fixed versions. If this check identifies issues, work with Cisco TAC to clean up excess files and resolve any bootx service failures before attempting the upgrade. + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html [2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html @@ -2666,3 +2683,4 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC [59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515 [60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter [61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression +[62]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwn37676 \ No newline at end of file diff --git a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json new file mode 100644 index 00000000..b6626d02 --- /dev/null +++ b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json @@ -0,0 +1,62 @@ +[ + { + "infraWiNode": { + "attributes": { + "addr": "10.0.0.1", + "adminSt": "in-service", + "apicMode": "active", + "cntrlSbstState": "approved", + "dn": "topology/pod-1/node-1/av/node-1", + "failoverStatus": "idle", + "health": "fully-fit", + "id": "1", + "mbSn": "FCH1234ABCD", + "name": "", + "nodeName": "apic1", + "operSt": "available", + "podId": "0", + "targetMbSn": "" + } + } + }, + { + "infraWiNode": { + "attributes": { + "addr": "10.0.0.2", + "adminSt": "in-service", + "apicMode": "active", + "cntrlSbstState": "approved", + "dn": "topology/pod-1/node-1/av/node-2", + "failoverStatus": "idle", + "health": "fully-fit", + "id": "2", + "mbSn": "FCH1235ABCD", + "name": "", + "nodeName": "apic2", + "operSt": "available", + "podId": "0", + "targetMbSn": "" + } + } + }, + { + "infraWiNode": { + "attributes": { + "addr": "10.0.0.3", + "adminSt": "in-service", + "apicMode": "active", + "cntrlSbstState": "approved", + "dn": "topology/pod-1/node-1/av/node-3", + "failoverStatus": "idle", + "health": "fully-fit", + "id": "3", + "mbSn": "FCH1236ABCD", + "name": "", + "nodeName": "apic3", + "operSt": "available", + "podId": "1", + "targetMbSn": "" + } + } + } +] diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py new file mode 100644 index 00000000..f4a9c3e0 --- /dev/null +++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py @@ -0,0 +1,301 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) + +# API query for controllers +infraWiNode_api = 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))' + +# Commands that will be executed via SSH +ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0' +grep_fatal_bootx_cmd = '[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0' + +test_function = "bootx_firmware_tmp_check" + +@pytest.mark.parametrize( + "icurl_outputs, conn_cmds, cversion, expected_result", + [ + # Test 1: Version not provided (cversion is None) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + None, + script.MANUAL, + ), + # Test 2: Version not affected (below 6.0(2f)) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + "6.0(1a)", + script.PASS, + ), + # Test 3: Version not affected (above 6.0(8f)) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + "6.0(9a)", + script.PASS, + ), + # Test 4: Version not affected (between 6.0(8f) and 6.1(1f)) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + "6.0(9h)", + script.PASS, + ), + # Test 5: Version not affected (above 6.1(2f)) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + "6.1(3a)", + script.PASS, + ), + # Test 6: Affected version 6.0(2f), no issues found + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "0\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "0\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "0\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(2f)", + script.PASS, + ), + # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(5a)", + script.FAIL_UF, + ), + # Test 8: Affected version 6.0(8f), fatal errors found on one APIC + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "5\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "30\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "20\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(8f)", + script.MANUAL, + ), + # Test 9: Affected version 6.1(1f), both high file count and fatal errors + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "2000\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "10\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "500\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.1(1f)", + script.FAIL_UF, + ), + # Test 10: Affected version 6.1(2f), multiple APICs with issues + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "1200\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "2\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.1(2f)", + script.FAIL_UF, + ), + # Test 11: Affected version, file count exactly 1000 (boundary test) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "1000\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(3a)", + script.FAIL_UF, + ), + # Test 12: Affected version, file count just below 1000 (boundary test) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "999\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(4a)", + script.PASS, + ), + # Test 13: Affected version, only fatal errors (no high file count) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "10\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "20\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "3\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "15\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "7\napic3#", "exception": None}, + ], + }, + "6.1(2a)", + script.MANUAL, + ), + ], +) +def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result): + cver = script.AciVersion(cversion) if cversion else None + result = run_check(cversion=cver, username="admin", password="password") + assert result.result == expected_result + + +@pytest.mark.parametrize( + "icurl_outputs, conn_cmds, conn_failure, cversion, expected_result", + [ + # Test 14: SSH connection failure on one APIC + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {}, + True, + "6.0(5a)", + script.ERROR, + ), + # Test 15: SSH command execution error on one APIC + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "", "exception": Exception("Command failed")}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + False, + "6.0(7a)", + script.ERROR, + ), + ], +) +def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_result): + cver = script.AciVersion(cversion) if cversion else None + result = run_check(cversion=cver, username="admin", password="password") + assert result.result == expected_result + + +@pytest.mark.parametrize( + "icurl_outputs, conn_cmds, cversion, expected_result", + [ + # Test 16: Empty topSystem response (unhealthy cluster) + ( + {infraWiNode_api: []}, + {}, + "6.0(5a)", + script.ERROR, + ), + # Test 17: Non-numeric output from commands (edge case) + ( + {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + { + "10.0.0.1": [ + {"cmd": ls_firmware_tmp_cmd, "output": "error\napic1#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None}, + ], + "10.0.0.2": [ + {"cmd": ls_firmware_tmp_cmd, "output": "0\napic2#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "invalid\napic2#", "exception": None}, + ], + "10.0.0.3": [ + {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None}, + {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None}, + ], + }, + "6.0(6a)", + script.PASS, + ), + ], +) +def test_edge_cases(run_check, mock_icurl, mock_conn, cversion, expected_result): + cver = script.AciVersion(cversion) if cversion else None + result = run_check(cversion=cver, username="admin", password="password") + assert result.result == expected_result \ No newline at end of file From 0e94543fd5e86ffc5d872f1284f2de1449bfa0a0 Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Tue, 9 Dec 2025 10:24:04 +0000 Subject: [PATCH 2/7] Modified the mo class, Updated the release version alphabets, Modified the test case --- aci-preupgrade-validation-script.py | 28 ++++----- .../bootx_firmware_tmp_check/fabricNode.json | 50 +++++++++++++++ .../bootx_firmware_tmp_check/infraWiNode.json | 62 ------------------- .../test_bootx_firmware_tmp_check.py | 36 +++++------ 4 files changed, 81 insertions(+), 95 deletions(-) create mode 100644 tests/checks/bootx_firmware_tmp_check/fabricNode.json delete mode 100644 tests/checks/bootx_firmware_tmp_check/infraWiNode.json diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index 1dc5d9af..5418a92a 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5965,7 +5965,7 @@ def configpush_shard_check(tversion, **kwargs): @check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks') def bootx_firmware_tmp_check(cversion, username, password, **kwargs): result = PASS - headers = ["Pod", "Node", "File Count", "Fatal Errors Found", "Status"] + headers = ["Node", "File Count", "Fatal Errors Found", "Status"] data = [] recommended_action = 'Contact Cisco TAC to investigate all flagged high file and log counts' doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#bootx_service_failure_log_and_firmware_tmp_directory_checks' @@ -5975,17 +5975,16 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs): affected = False - if (not cversion.older_than("6.0(2f)") and not cversion.newer_than("6.0(8f)")) or \ - (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2f)")): + if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \ + (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")): affected = True if not affected: return Result(result=PASS, msg=VER_NOT_AFFECTED) - - controller = icurl('class', 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))') + controller = icurl('class', 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))') if not controller: - return Result(result=ERROR, msg="infraWiNode response empty. Is the cluster healthy?", doc_url=doc_url) + return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url) print('') checked_apics = {} @@ -5994,23 +5993,22 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs): nodes_fatal_errors_result = [] for apic in controller: - attr = apic['infraWiNode']['attributes'] - if attr['addr'] in checked_apics: + attr = apic['fabricNode']['attributes'] + if attr['address'] in checked_apics: continue - checked_apics[attr['addr']] = 1 - pod_id = attr['podId'] + checked_apics[attr['address']] = 1 node_id = attr['id'] node_name = attr['name'] node_title = 'Checking %s...' % node_name try: - c = Connection(attr['addr']) + c = Connection(attr['address']) c.username = username c.password = password c.log = LOG_FILE c.connect() except Exception as e: - data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)]) + data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) has_error = True continue @@ -6036,18 +6034,18 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs): # Determine status if file_count >= 1000: status = 'FAIL - High file count' - data.append([pod_id, node_id, str(file_count), str(fatal_count), status]) + data.append([node_id, str(file_count), str(fatal_count), status]) result = FAIL_UF nodes_file_count_result.append(result) elif fatal_count > 0: status = 'WARNING - Fatal errors found' - data.append([pod_id, node_id, str(file_count), str(fatal_count), status]) + data.append([node_id, str(file_count), str(fatal_count), status]) if result == PASS: result = MANUAL nodes_fatal_errors_result.append(result) except Exception as e: - data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)]) + data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) has_error = True continue diff --git a/tests/checks/bootx_firmware_tmp_check/fabricNode.json b/tests/checks/bootx_firmware_tmp_check/fabricNode.json new file mode 100644 index 00000000..d102b18b --- /dev/null +++ b/tests/checks/bootx_firmware_tmp_check/fabricNode.json @@ -0,0 +1,50 @@ +[ + { + "fabricNode": { + "attributes": { + "address": "10.0.0.1", + "dn": "topology/pod-1/node-1", + "fabricSt": "commissioned", + "id": "1", + "model": "APIC-SERVER-L2", + "monPolDn": "uni/fabric/monfab-default", + "name": "apic1", + "nodeType": "unspecified", + "podId": "1", + "role": "controller" + } + } + }, + { + "fabricNode": { + "attributes": { + "address": "10.0.0.2", + "dn": "topology/pod-1/node-2", + "fabricSt": "commissioned", + "id": "2", + "model": "APIC-SERVER-L2", + "monPolDn": "uni/fabric/monfab-default", + "name": "apic2", + "nodeType": "unspecified", + "podId": "1", + "role": "controller" + } + } + }, + { + "fabricNode": { + "attributes": { + "address": "10.0.0.3", + "dn": "topology/pod-1/node-3", + "fabricSt": "commissioned", + "id": "3", + "model": "APIC-SERVER-L2", + "monPolDn": "uni/fabric/monfab-default", + "name": "apic3", + "nodeType": "unspecified", + "podId": "1", + "role": "controller" + } + } + } +] diff --git a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json deleted file mode 100644 index b6626d02..00000000 --- a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json +++ /dev/null @@ -1,62 +0,0 @@ -[ - { - "infraWiNode": { - "attributes": { - "addr": "10.0.0.1", - "adminSt": "in-service", - "apicMode": "active", - "cntrlSbstState": "approved", - "dn": "topology/pod-1/node-1/av/node-1", - "failoverStatus": "idle", - "health": "fully-fit", - "id": "1", - "mbSn": "FCH1234ABCD", - "name": "", - "nodeName": "apic1", - "operSt": "available", - "podId": "0", - "targetMbSn": "" - } - } - }, - { - "infraWiNode": { - "attributes": { - "addr": "10.0.0.2", - "adminSt": "in-service", - "apicMode": "active", - "cntrlSbstState": "approved", - "dn": "topology/pod-1/node-1/av/node-2", - "failoverStatus": "idle", - "health": "fully-fit", - "id": "2", - "mbSn": "FCH1235ABCD", - "name": "", - "nodeName": "apic2", - "operSt": "available", - "podId": "0", - "targetMbSn": "" - } - } - }, - { - "infraWiNode": { - "attributes": { - "addr": "10.0.0.3", - "adminSt": "in-service", - "apicMode": "active", - "cntrlSbstState": "approved", - "dn": "topology/pod-1/node-1/av/node-3", - "failoverStatus": "idle", - "health": "fully-fit", - "id": "3", - "mbSn": "FCH1236ABCD", - "name": "", - "nodeName": "apic3", - "operSt": "available", - "podId": "1", - "targetMbSn": "" - } - } - } -] diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py index f4a9c3e0..c94ac4a1 100644 --- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py +++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py @@ -10,7 +10,7 @@ dir = os.path.dirname(os.path.abspath(__file__)) # API query for controllers -infraWiNode_api = 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))' +fabricNode_api = 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))' # Commands that will be executed via SSH ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0' @@ -23,42 +23,42 @@ [ # Test 1: Version not provided (cversion is None) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, None, script.MANUAL, ), # Test 2: Version not affected (below 6.0(2f)) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(1a)", script.PASS, ), # Test 3: Version not affected (above 6.0(8f)) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(9a)", script.PASS, ), # Test 4: Version not affected (between 6.0(8f) and 6.1(1f)) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(9h)", script.PASS, ), # Test 5: Version not affected (above 6.1(2f)) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.1(3a)", script.PASS, ), # Test 6: Affected version 6.0(2f), no issues found ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "0\napic1#", "exception": None}, @@ -78,7 +78,7 @@ ), # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic1#", "exception": None}, @@ -98,7 +98,7 @@ ), # Test 8: Affected version 6.0(8f), fatal errors found on one APIC ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "50\napic1#", "exception": None}, @@ -118,7 +118,7 @@ ), # Test 9: Affected version 6.1(1f), both high file count and fatal errors ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "2000\napic1#", "exception": None}, @@ -138,7 +138,7 @@ ), # Test 10: Affected version 6.1(2f), multiple APICs with issues ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "1200\napic1#", "exception": None}, @@ -158,7 +158,7 @@ ), # Test 11: Affected version, file count exactly 1000 (boundary test) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "1000\napic1#", "exception": None}, @@ -178,7 +178,7 @@ ), # Test 12: Affected version, file count just below 1000 (boundary test) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "999\napic1#", "exception": None}, @@ -198,7 +198,7 @@ ), # Test 13: Affected version, only fatal errors (no high file count) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "10\napic1#", "exception": None}, @@ -229,7 +229,7 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result): [ # Test 14: SSH connection failure on one APIC ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, True, "6.0(5a)", @@ -237,7 +237,7 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result): ), # Test 15: SSH command execution error on one APIC ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "", "exception": Exception("Command failed")}, @@ -268,14 +268,14 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_ [ # Test 16: Empty topSystem response (unhealthy cluster) ( - {infraWiNode_api: []}, + {fabricNode_api: []}, {}, "6.0(5a)", script.ERROR, ), # Test 17: Non-numeric output from commands (edge case) ( - {infraWiNode_api: read_data(dir, "infraWiNode.json")}, + {fabricNode_api: read_data(dir, "fabricNode.json")}, { "10.0.0.1": [ {"cmd": ls_firmware_tmp_cmd, "output": "error\napic1#", "exception": None}, From eb6157ad41e312917cffd42d52927e7f5a3be5de Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Thu, 11 Dec 2025 06:17:01 +0000 Subject: [PATCH 3/7] Removed then fabricNode api query. Added the filtered logic in code. Modified the test cases --- aci-preupgrade-validation-script.py | 18 ++++++++---------- .../test_bootx_firmware_tmp_check.py | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index 5418a92a..6ae0e6e0 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5963,7 +5963,7 @@ def configpush_shard_check(tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) @check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks') -def bootx_firmware_tmp_check(cversion, username, password, **kwargs): +def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs): result = PASS headers = ["Node", "File Count", "Fatal Errors Found", "Status"] data = [] @@ -5982,10 +5982,14 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs): if not affected: return Result(result=PASS, msg=VER_NOT_AFFECTED) - controller = icurl('class', 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))') - if not controller: + if not fabric_nodes: return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url) + # Filter for controller nodes only + controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller'] + if not controller: + return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url) + print('') checked_apics = {} has_error = False @@ -6056,13 +6060,7 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs): if has_error and result == PASS: result = ERROR - return Result( - result=result, - headers=headers, - data=data, - recommended_action=recommended_action, - doc_url=doc_url, - ) + return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) # ---- Script Execution ---- diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py index c94ac4a1..8f8b289d 100644 --- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py +++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py @@ -9,8 +9,8 @@ log = logging.getLogger(__name__) dir = os.path.dirname(os.path.abspath(__file__)) -# API query for controllers -fabricNode_api = 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))' +# API query for fabricNode (get_fabric_nodes() uses 'fabricNode.json' without filter) +fabricNode_api = 'fabricNode.json' # Commands that will be executed via SSH ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0' @@ -218,9 +218,10 @@ ), ], ) -def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result): +def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result): cver = script.AciVersion(cversion) if cversion else None - result = run_check(cversion=cver, username="admin", password="password") + fabric_nodes = icurl_outputs.get(fabricNode_api, []) + result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password") assert result.result == expected_result @@ -257,9 +258,10 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result): ), ], ) -def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_result): +def test_connection_errors(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result): cver = script.AciVersion(cversion) if cversion else None - result = run_check(cversion=cver, username="admin", password="password") + fabric_nodes = icurl_outputs.get(fabricNode_api, []) + result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password") assert result.result == expected_result @@ -295,7 +297,8 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_ ), ], ) -def test_edge_cases(run_check, mock_icurl, mock_conn, cversion, expected_result): +def test_edge_cases(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result): cver = script.AciVersion(cversion) if cversion else None - result = run_check(cversion=cver, username="admin", password="password") + fabric_nodes = icurl_outputs.get(fabricNode_api, []) + result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password") assert result.result == expected_result \ No newline at end of file From 3de425564d4750582da9a6957429bf1868caa7f1 Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Fri, 12 Dec 2025 05:32:47 +0000 Subject: [PATCH 4/7] Incorporated the review comments --- aci-preupgrade-validation-script.py | 4 +--- docs/docs/validations.md | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index 6ae0e6e0..e689db3b 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5962,7 +5962,7 @@ def configpush_shard_check(tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) -@check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks') +@check_wrapper(check_title = 'Bootx Service failure checks') def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs): result = PASS headers = ["Node", "File Count", "Fatal Errors Found", "Status"] @@ -5990,7 +5990,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg if not controller: return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url) - print('') checked_apics = {} has_error = False nodes_file_count_result = [] @@ -6003,7 +6002,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg checked_apics[attr['address']] = 1 node_id = attr['id'] node_name = attr['name'] - node_title = 'Checking %s...' % node_name try: c = Connection(attr['address']) diff --git a/docs/docs/validations.md b/docs/docs/validations.md index c8ca2fab..d5d9a622 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -191,7 +191,7 @@ Items | Defect | This Script [Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign: [ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign: [Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | -[Bootx Service failure log & firmware/tmp directory checks][d29] | CSCwn37676 | :white_check_mark: | :no_entry_sign: +[Bootx Service failure checks][d29] | CSCwn37676 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -221,7 +221,7 @@ Items | Defect | This Script [d26]: #stale-pconsra-object [d27]: #isis-dteps-byte-size [d28]: #policydist-configpushshardcont-crash -[d29]: #bootx_service_failure_log_and_firmware_tmp_directory_checks +[d29]: #bootx-service-failure-checks ## General Check Details @@ -2606,9 +2606,9 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade. -### Bootx Service failure log & firmware/tmp directory checks +### Bootx Service failure checks -Due to [CSCwn37676][62], ACI runs on releases 6.0(2) through 6.0(8) or 6.1(1) through 6.1(2) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures. +Due to [CSCwn37676][62], ACI runs on releases 6.0(2h) through 6.0(8h) or 6.1(1f) through 6.1(2g) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures. The script performs two validations on each APIC: From 7f700b675d8d44cde4caa80e57f93d4cbde14170 Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Fri, 12 Dec 2025 13:24:23 +0000 Subject: [PATCH 5/7] Modified the script logic based on the updated preupgrade flag --- aci-preupgrade-validation-script.py | 23 ++++-------- .../test_bootx_firmware_tmp_check.py | 36 +++++++++---------- 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index e689db3b..a7ac27dd 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5991,9 +5991,7 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url) checked_apics = {} - has_error = False - nodes_file_count_result = [] - nodes_fatal_errors_result = [] + has_error = False for apic in controller: attr = apic['fabricNode']['attributes'] @@ -6001,7 +5999,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg continue checked_apics[attr['address']] = 1 node_id = attr['id'] - node_name = attr['name'] try: c = Connection(attr['address']) @@ -6036,25 +6033,19 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg # Determine status if file_count >= 1000: status = 'FAIL - High file count' - data.append([node_id, str(file_count), str(fatal_count), status]) + data.append([node_id, str(file_count),"-", status]) + result = FAIL_UF + + if fatal_count > 0: + status = 'FAIL - Fatal errors found' + data.append([node_id, "-", str(fatal_count), status]) result = FAIL_UF - nodes_file_count_result.append(result) - elif fatal_count > 0: - status = 'WARNING - Fatal errors found' - data.append([node_id, str(file_count), str(fatal_count), status]) - if result == PASS: - result = MANUAL - nodes_fatal_errors_result.append(result) except Exception as e: data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) has_error = True continue - if FAIL_UF in nodes_file_count_result: - result = FAIL_UF - if MANUAL in nodes_fatal_errors_result: - result = MANUAL if has_error and result == PASS: result = ERROR diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py index 8f8b289d..7366a6f4 100644 --- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py +++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py @@ -28,35 +28,35 @@ None, script.MANUAL, ), - # Test 2: Version not affected (below 6.0(2f)) + # Test 2: Version not affected (below 6.0(2h)) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(1a)", script.PASS, ), - # Test 3: Version not affected (above 6.0(8f)) + # Test 3: Version not affected (above 6.0(8h)) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(9a)", script.PASS, ), - # Test 4: Version not affected (between 6.0(8f) and 6.1(1f)) + # Test 4: Version not affected (between 6.0(8h) and 6.1(1f)) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.0(9h)", script.PASS, ), - # Test 5: Version not affected (above 6.1(2f)) + # Test 5: Version not affected (above 6.1(2g)) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, "6.1(3a)", script.PASS, ), - # Test 6: Affected version 6.0(2f), no issues found + # Test 6: Version not affected 6.0(2f) (below 6.0(2h)), no issues found ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -76,7 +76,7 @@ "6.0(2f)", script.PASS, ), - # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC + # Test 7: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), file count >= 1000 on one APIC ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -96,7 +96,7 @@ "6.0(5a)", script.FAIL_UF, ), - # Test 8: Affected version 6.0(8f), fatal errors found on one APIC + # Test 8: Affected version 6.0(8f) (within 6.0(2h) to 6.0(8h)), fatal errors found on one APIC ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -114,9 +114,9 @@ ], }, "6.0(8f)", - script.MANUAL, + script.FAIL_UF, ), - # Test 9: Affected version 6.1(1f), both high file count and fatal errors + # Test 9: Affected version 6.1(1f) (within 6.1(1f) to 6.1(2g)), both high file count and fatal errors ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -136,7 +136,7 @@ "6.1(1f)", script.FAIL_UF, ), - # Test 10: Affected version 6.1(2f), multiple APICs with issues + # Test 10: Affected version 6.1(2f) (within 6.1(1f) to 6.1(2g)), multiple APICs with issues ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -156,7 +156,7 @@ "6.1(2f)", script.FAIL_UF, ), - # Test 11: Affected version, file count exactly 1000 (boundary test) + # Test 11: Affected version 6.0(3a) (within 6.0(2h) to 6.0(8h)), file count exactly 1000 (boundary test) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -176,7 +176,7 @@ "6.0(3a)", script.FAIL_UF, ), - # Test 12: Affected version, file count just below 1000 (boundary test) + # Test 12: Affected version 6.0(4a) (within 6.0(2h) to 6.0(8h)), file count just below 1000 (boundary test) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -196,7 +196,7 @@ "6.0(4a)", script.PASS, ), - # Test 13: Affected version, only fatal errors (no high file count) + # Test 13: Affected version 6.1(2a) (within 6.1(1f) to 6.1(2g)), only fatal errors (no high file count) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -214,7 +214,7 @@ ], }, "6.1(2a)", - script.MANUAL, + script.FAIL_UF, ), ], ) @@ -228,7 +228,7 @@ def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expect @pytest.mark.parametrize( "icurl_outputs, conn_cmds, conn_failure, cversion, expected_result", [ - # Test 14: SSH connection failure on one APIC + # Test 14: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), SSH connection failure on one APIC ( {fabricNode_api: read_data(dir, "fabricNode.json")}, {}, @@ -236,7 +236,7 @@ def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expect "6.0(5a)", script.ERROR, ), - # Test 15: SSH command execution error on one APIC + # Test 15: Affected version 6.0(7a) (within 6.0(2h) to 6.0(8h)), SSH command execution error on one APIC ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { @@ -268,14 +268,14 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, icurl_outputs, cver @pytest.mark.parametrize( "icurl_outputs, conn_cmds, cversion, expected_result", [ - # Test 16: Empty topSystem response (unhealthy cluster) + # Test 16: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), Empty fabricNode response (unhealthy cluster) ( {fabricNode_api: []}, {}, "6.0(5a)", script.ERROR, ), - # Test 17: Non-numeric output from commands (edge case) + # Test 17: Affected version 6.0(6a) (within 6.0(2h) to 6.0(8h)), Non-numeric output from commands (edge case) ( {fabricNode_api: read_data(dir, "fabricNode.json")}, { From d8c19d24a977996dabacd9773f7dbb1d94bf647c Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Tue, 16 Dec 2025 11:38:05 +0000 Subject: [PATCH 6/7] Addressed the review comments --- aci-preupgrade-validation-script.py | 6 +++--- .../fabricNode.json | 0 .../test_bootx_service_failure_checks.py} | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename tests/checks/{bootx_firmware_tmp_check => bootx_service_failure_checks}/fabricNode.json (100%) rename tests/checks/{bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py => bootx_service_failure_checks/test_bootx_service_failure_checks.py} (99%) diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index a7ac27dd..b1544f51 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5963,7 +5963,7 @@ def configpush_shard_check(tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) @check_wrapper(check_title = 'Bootx Service failure checks') -def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs): +def bootx_service_failure_checks(fabric_nodes, cversion, username, password, **kwargs): result = PASS headers = ["Node", "File Count", "Fatal Errors Found", "Status"] data = [] @@ -6045,7 +6045,7 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) has_error = True continue - + c.close() if has_error and result == PASS: result = ERROR @@ -6139,7 +6139,7 @@ class CheckManager: post_upgrade_cb_check, validate_32_64_bit_image_check, fabric_link_redundancy_check, - bootx_firmware_tmp_check, + bootx_service_failure_checks, # Faults apic_disk_space_faults_check, diff --git a/tests/checks/bootx_firmware_tmp_check/fabricNode.json b/tests/checks/bootx_service_failure_checks/fabricNode.json similarity index 100% rename from tests/checks/bootx_firmware_tmp_check/fabricNode.json rename to tests/checks/bootx_service_failure_checks/fabricNode.json diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py similarity index 99% rename from tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py rename to tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py index 7366a6f4..a8e7f4cb 100644 --- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py +++ b/tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py @@ -16,7 +16,7 @@ ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0' grep_fatal_bootx_cmd = '[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0' -test_function = "bootx_firmware_tmp_check" +test_function = "bootx_service_failure_checks" @pytest.mark.parametrize( "icurl_outputs, conn_cmds, cversion, expected_result", From 2e7dce9d5b4ee30262da8aa7e75413dddc1f91e7 Mon Sep 17 00:00:00 2001 From: DHANABALAN SELVARAJ Date: Mon, 22 Dec 2025 10:33:53 +0000 Subject: [PATCH 7/7] updatedthe impacted version check --- aci-preupgrade-validation-script.py | 130 ++++++++++++++-------------- 1 file changed, 63 insertions(+), 67 deletions(-) diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index fd253cf1..a1b38a2d 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6017,82 +6017,78 @@ def bootx_service_failure_checks(fabric_nodes, cversion, username, password, **k if not cversion: return Result(result=MANUAL, msg="Current version not provided") - - affected = False - if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \ - (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")): - affected = True - - if not affected: - return Result(result=PASS, msg=VER_NOT_AFFECTED) - if not fabric_nodes: return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url) - # Filter for controller nodes only - controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller'] - if not controller: - return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url) + if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \ + (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")): - checked_apics = {} - has_error = False + # Filter for controller nodes only + controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller'] + if not controller: + return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url) - for apic in controller: - attr = apic['fabricNode']['attributes'] - if attr['address'] in checked_apics: - continue - checked_apics[attr['address']] = 1 - node_id = attr['id'] - - try: - c = Connection(attr['address']) - c.username = username - c.password = password - c.log = LOG_FILE - c.connect() - except Exception as e: - data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) - has_error = True - continue - - try: - # Check if /firmware/tmp directory exists and count files - c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0') - file_count = 0 - for line in c.output.strip().split('\n'): - line = line.strip() - if line.isdigit(): - file_count = int(line) - break + checked_apics = {} + has_error = False + + for apic in controller: + attr = apic['fabricNode']['attributes'] + if attr['address'] in checked_apics: + continue + checked_apics[attr['address']] = 1 + node_id = attr['id'] - # Check for fatal errors in bootx logs - c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0') - fatal_count = 0 - for line in c.output.strip().split('\n'): - line = line.strip() - if line.isdigit(): - fatal_count = int(line) - break + try: + c = Connection(attr['address']) + c.username = username + c.password = password + c.log = LOG_FILE + c.connect() + except Exception as e: + data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) + has_error = True + continue - # Determine status - if file_count >= 1000: - status = 'FAIL - High file count' - data.append([node_id, str(file_count),"-", status]) - result = FAIL_UF - - if fatal_count > 0: - status = 'FAIL - Fatal errors found' - data.append([node_id, "-", str(fatal_count), status]) - result = FAIL_UF + try: + # Check if /firmware/tmp directory exists and count files + c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0') + file_count = 0 + for line in c.output.strip().split('\n'): + line = line.strip() + if line.isdigit(): + file_count = int(line) + break - except Exception as e: - data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) - has_error = True - continue - c.close() - if has_error and result == PASS: - result = ERROR + # Check for fatal errors in bootx logs + c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0') + fatal_count = 0 + for line in c.output.strip().split('\n'): + line = line.strip() + if line.isdigit(): + fatal_count = int(line) + break + + # Determine status + if file_count >= 1000: + status = 'FAIL - High file count' + data.append([node_id, str(file_count),"-", status]) + result = FAIL_UF + + if fatal_count > 0: + status = 'FAIL - Fatal errors found' + data.append([node_id, "-", str(fatal_count), status]) + result = FAIL_UF + + except Exception as e: + data.append([node_id, '-', '-', 'ERROR: %s' % str(e)]) + has_error = True + continue + c.close() + if has_error and result == PASS: + result = ERROR + else: + return Result(result=PASS, msg=VER_NOT_AFFECTED) return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)