From f26a0e1d6ab142a69fe5bb38cd54eea909839cbb Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Thu, 4 Dec 2025 13:13:05 +0000
Subject: [PATCH 1/7] Added new script validation for CSCwn37676

---
 aci-preupgrade-validation-script.py           | 106 ++++++
 docs/docs/validations.md                      |  18 ++
 .../bootx_firmware_tmp_check/infraWiNode.json |  62 ++++
 .../test_bootx_firmware_tmp_check.py          | 301 ++++++++++++++++++
 4 files changed, 487 insertions(+)
 create mode 100644 tests/checks/bootx_firmware_tmp_check/infraWiNode.json
 create mode 100644 tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index bfca5bb6..1dc5d9af 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5962,6 +5962,111 @@ def configpush_shard_check(tversion, **kwargs):
 
     return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
 
+@check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks')
+def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
+    result = PASS
+    headers = ["Pod", "Node", "File Count", "Fatal Errors Found", "Status"]
+    data = []
+    recommended_action = 'Contact Cisco TAC to investigate all flagged high file and log counts'
+    doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#bootx_service_failure_log_and_firmware_tmp_directory_checks'
+
+    if not cversion:
+        return Result(result=MANUAL, msg="Current version not provided")
+
+    
+    affected = False
+    if (not cversion.older_than("6.0(2f)") and not cversion.newer_than("6.0(8f)")) or \
+       (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2f)")):
+        affected = True
+
+    if not affected:
+        return Result(result=PASS, msg=VER_NOT_AFFECTED)
+
+    
+    controller = icurl('class', 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))')
+    if not controller:
+        return Result(result=ERROR, msg="infraWiNode response empty. Is the cluster healthy?", doc_url=doc_url)
+
+    print('')
+    checked_apics = {}
+    has_error = False
+    nodes_file_count_result = []
+    nodes_fatal_errors_result = []
+
+    for apic in controller:
+        attr = apic['infraWiNode']['attributes']
+        if attr['addr'] in checked_apics:
+            continue
+        checked_apics[attr['addr']] = 1
+        pod_id = attr['podId']
+        node_id = attr['id']
+        node_name = attr['name']
+        node_title = 'Checking %s...' % node_name
+        
+        try:
+            c = Connection(attr['addr'])
+            c.username = username
+            c.password = password
+            c.log = LOG_FILE
+            c.connect()
+        except Exception as e:
+            data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)])
+            has_error = True
+            continue
+        
+        try:
+            # Check if /firmware/tmp directory exists and count files
+            c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0')
+            file_count = 0
+            for line in c.output.strip().split('\n'):
+                line = line.strip()
+                if line.isdigit():
+                    file_count = int(line)
+                    break
+            
+            # Check for fatal errors in bootx logs
+            c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0')
+            fatal_count = 0
+            for line in c.output.strip().split('\n'):
+                line = line.strip()
+                if line.isdigit():
+                    fatal_count = int(line)
+                    break
+            
+            # Determine status
+            if file_count >= 1000:
+                status = 'FAIL - High file count'
+                data.append([pod_id, node_id, str(file_count), str(fatal_count), status])
+                result = FAIL_UF
+                nodes_file_count_result.append(result)
+            elif fatal_count > 0:
+                status = 'WARNING - Fatal errors found'
+                data.append([pod_id, node_id, str(file_count), str(fatal_count), status])
+                if result == PASS:
+                    result = MANUAL
+                nodes_fatal_errors_result.append(result)
+                
+        except Exception as e:
+            data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)])
+            has_error = True
+            continue
+    
+    if FAIL_UF in nodes_file_count_result:
+        result = FAIL_UF
+    if MANUAL in nodes_fatal_errors_result:
+        result = MANUAL
+    if has_error and result == PASS:
+        result = ERROR
+    
+    return Result(
+        result=result,
+        headers=headers,
+        data=data,
+        recommended_action=recommended_action,
+        doc_url=doc_url,
+    )
+
+
 # ---- Script Execution ----
 
 
@@ -6049,6 +6154,7 @@ class CheckManager:
         post_upgrade_cb_check,
         validate_32_64_bit_image_check,
         fabric_link_redundancy_check,
+        bootx_firmware_tmp_check,
 
         # Faults
         apic_disk_space_faults_check,
diff --git a/docs/docs/validations.md b/docs/docs/validations.md
index e395564f..c8ca2fab 100644
--- a/docs/docs/validations.md
+++ b/docs/docs/validations.md
@@ -191,6 +191,7 @@ Items                                           | Defect       | This Script
 [Stale pconsRA Object][d26]                     | CSCwp22212   | :warning:{title="Deprecated"} | :no_entry_sign:
 [ISIS DTEPs Byte Size][d27]                     | CSCwp15375   | :white_check_mark: | :no_entry_sign:
 [Policydist configpushShardCont Crash][d28]     | CSCwp95515   | :white_check_mark: | 
+[Bootx Service failure log & firmware/tmp directory checks][d29]  | CSCwn37676  | :white_check_mark: | :no_entry_sign:
 
 [d1]: #ep-announce-compatibility
 [d2]: #eventmgr-db-size-defect-susceptibility
@@ -220,6 +221,7 @@ Items                                           | Defect       | This Script
 [d26]: #stale-pconsra-object
 [d27]: #isis-dteps-byte-size
 [d28]: #policydist-configpushshardcont-crash
+[d29]: #bootx_service_failure_log_and_firmware_tmp_directory_checks
 
 
 ## General Check Details
@@ -2604,6 +2606,21 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf
 If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade.
 
 
+### Bootx Service failure log & firmware/tmp directory checks
+
+Due to [CSCwn37676][62], ACI runs on releases 6.0(2) through 6.0(8) or 6.1(1) through 6.1(2) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures.
+
+The script performs two validations on each APIC:
+
+1. Checks if `/firmware/tmp/` directory contains 1000 or more files
+2. Searches for "fatal" errors in `/var/log/bootx/logs/`
+
+!!! warning
+    If this check fails, verify the bootx service status on the affected APIC(s) by running `systemctl status bootx`. If the service is not running, the APIC is already experiencing the issue and must be resolved before proceeding with the upgrade.
+
+!!! tip
+    Certain high churn logging configurations have been found to cause excessive files in `/firmware/tmp/while on non-fixed versions. If this check identifies issues, work with Cisco TAC to clean up excess files and resolve any bootx service failures before attempting the upgrade.
+
 [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
 [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
 [2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
@@ -2666,3 +2683,4 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC
 [59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515
 [60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter
 [61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression
+[62]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwn37676
\ No newline at end of file
diff --git a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json
new file mode 100644
index 00000000..b6626d02
--- /dev/null
+++ b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json
@@ -0,0 +1,62 @@
+[
+  {
+    "infraWiNode": {
+      "attributes": {
+        "addr": "10.0.0.1",
+        "adminSt": "in-service",
+        "apicMode": "active",
+        "cntrlSbstState": "approved",
+        "dn": "topology/pod-1/node-1/av/node-1",
+        "failoverStatus": "idle",
+        "health": "fully-fit",
+        "id": "1",
+        "mbSn": "FCH1234ABCD",
+        "name": "",
+        "nodeName": "apic1",
+        "operSt": "available",
+        "podId": "0",
+        "targetMbSn": ""
+      }
+    }
+  },
+  {
+    "infraWiNode": {
+      "attributes": {
+        "addr": "10.0.0.2",
+        "adminSt": "in-service",
+        "apicMode": "active",
+        "cntrlSbstState": "approved",
+        "dn": "topology/pod-1/node-1/av/node-2",
+        "failoverStatus": "idle",
+        "health": "fully-fit",
+        "id": "2",
+        "mbSn": "FCH1235ABCD",
+        "name": "",
+        "nodeName": "apic2",
+        "operSt": "available",
+        "podId": "0",
+        "targetMbSn": ""
+      }
+    }
+  },
+  {
+    "infraWiNode": {
+      "attributes": {
+        "addr": "10.0.0.3",
+        "adminSt": "in-service",
+        "apicMode": "active",
+        "cntrlSbstState": "approved",
+        "dn": "topology/pod-1/node-1/av/node-3",
+        "failoverStatus": "idle",
+        "health": "fully-fit",
+        "id": "3",
+        "mbSn": "FCH1236ABCD",
+        "name": "",
+        "nodeName": "apic3",
+        "operSt": "available",
+        "podId": "1",
+        "targetMbSn": ""
+      }
+    }
+  }
+]
diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
new file mode 100644
index 00000000..f4a9c3e0
--- /dev/null
+++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
@@ -0,0 +1,301 @@
+import os
+import pytest
+import logging
+import importlib
+from helpers.utils import read_data
+
+script = importlib.import_module("aci-preupgrade-validation-script")
+
+log = logging.getLogger(__name__)
+dir = os.path.dirname(os.path.abspath(__file__))
+
+# API query for controllers
+infraWiNode_api = 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))'
+
+# Commands that will be executed via SSH
+ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0'
+grep_fatal_bootx_cmd = '[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0'
+
+test_function = "bootx_firmware_tmp_check"
+
+@pytest.mark.parametrize(
+    "icurl_outputs, conn_cmds, cversion, expected_result",
+    [
+        # Test 1: Version not provided (cversion is None)
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            None,
+            script.MANUAL,
+        ),
+        # Test 2: Version not affected (below 6.0(2f))
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            "6.0(1a)",
+            script.PASS,
+        ),
+        # Test 3: Version not affected (above 6.0(8f))
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            "6.0(9a)",
+            script.PASS,
+        ),
+        # Test 4: Version not affected (between 6.0(8f) and 6.1(1f))
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            "6.0(9h)",
+            script.PASS,
+        ),
+        # Test 5: Version not affected (above 6.1(2f))
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            "6.1(3a)",
+            script.PASS,
+        ),
+        # Test 6: Affected version 6.0(2f), no issues found
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "0\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "0\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "0\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(2f)",
+            script.PASS,
+        ),
+        # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(5a)",
+            script.FAIL_UF,
+        ),
+        # Test 8: Affected version 6.0(8f), fatal errors found on one APIC
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "5\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "30\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "20\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(8f)",
+            script.MANUAL,
+        ),
+        # Test 9: Affected version 6.1(1f), both high file count and fatal errors
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "2000\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "10\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "500\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.1(1f)",
+            script.FAIL_UF,
+        ),
+        # Test 10: Affected version 6.1(2f), multiple APICs with issues
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "1200\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "2\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.1(2f)",
+            script.FAIL_UF,
+        ),
+        # Test 11: Affected version, file count exactly 1000 (boundary test)
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "1000\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(3a)",
+            script.FAIL_UF,
+        ),
+        # Test 12: Affected version, file count just below 1000 (boundary test)
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "999\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(4a)",
+            script.PASS,
+        ),
+        # Test 13: Affected version, only fatal errors (no high file count)
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "10\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "20\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "3\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "15\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "7\napic3#", "exception": None},
+                ],
+            },
+            "6.1(2a)",
+            script.MANUAL,
+        ),
+    ],
+)
+def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result):
+    cver = script.AciVersion(cversion) if cversion else None
+    result = run_check(cversion=cver, username="admin", password="password")
+    assert result.result == expected_result
+
+
+@pytest.mark.parametrize(
+    "icurl_outputs, conn_cmds, conn_failure, cversion, expected_result",
+    [
+        # Test 14: SSH connection failure on one APIC
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {},
+            True,
+            "6.0(5a)",
+            script.ERROR,
+        ),
+        # Test 15: SSH command execution error on one APIC
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "", "exception": Exception("Command failed")},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "100\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            False,
+            "6.0(7a)",
+            script.ERROR,
+        ),
+    ],
+)
+def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_result):
+    cver = script.AciVersion(cversion) if cversion else None
+    result = run_check(cversion=cver, username="admin", password="password")
+    assert result.result == expected_result
+
+
+@pytest.mark.parametrize(
+    "icurl_outputs, conn_cmds, cversion, expected_result",
+    [
+        # Test 16: Empty topSystem response (unhealthy cluster)
+        (
+            {infraWiNode_api: []},
+            {},
+            "6.0(5a)",
+            script.ERROR,
+        ),
+        # Test 17: Non-numeric output from commands (edge case)
+        (
+            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {
+                "10.0.0.1": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "error\napic1#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic1#", "exception": None},
+                ],
+                "10.0.0.2": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "0\napic2#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "invalid\napic2#", "exception": None},
+                ],
+                "10.0.0.3": [
+                    {"cmd": ls_firmware_tmp_cmd, "output": "50\napic3#", "exception": None},
+                    {"cmd": grep_fatal_bootx_cmd, "output": "0\napic3#", "exception": None},
+                ],
+            },
+            "6.0(6a)",
+            script.PASS,
+        ),
+    ],
+)
+def test_edge_cases(run_check, mock_icurl, mock_conn, cversion, expected_result):
+    cver = script.AciVersion(cversion) if cversion else None
+    result = run_check(cversion=cver, username="admin", password="password")
+    assert result.result == expected_result
\ No newline at end of file

From 0e94543fd5e86ffc5d872f1284f2de1449bfa0a0 Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Tue, 9 Dec 2025 10:24:04 +0000
Subject: [PATCH 2/7] Modified the mo class, Updated the release version
 alphabets, Modified the test case

---
 aci-preupgrade-validation-script.py           | 28 ++++-----
 .../bootx_firmware_tmp_check/fabricNode.json  | 50 +++++++++++++++
 .../bootx_firmware_tmp_check/infraWiNode.json | 62 -------------------
 .../test_bootx_firmware_tmp_check.py          | 36 +++++------
 4 files changed, 81 insertions(+), 95 deletions(-)
 create mode 100644 tests/checks/bootx_firmware_tmp_check/fabricNode.json
 delete mode 100644 tests/checks/bootx_firmware_tmp_check/infraWiNode.json

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index 1dc5d9af..5418a92a 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5965,7 +5965,7 @@ def configpush_shard_check(tversion, **kwargs):
 @check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks')
 def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
     result = PASS
-    headers = ["Pod", "Node", "File Count", "Fatal Errors Found", "Status"]
+    headers = ["Node", "File Count", "Fatal Errors Found", "Status"]
     data = []
     recommended_action = 'Contact Cisco TAC to investigate all flagged high file and log counts'
     doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#bootx_service_failure_log_and_firmware_tmp_directory_checks'
@@ -5975,17 +5975,16 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
 
     
     affected = False
-    if (not cversion.older_than("6.0(2f)") and not cversion.newer_than("6.0(8f)")) or \
-       (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2f)")):
+    if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \
+       (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")):
         affected = True
 
     if not affected:
         return Result(result=PASS, msg=VER_NOT_AFFECTED)
 
-    
-    controller = icurl('class', 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))')
+    controller = icurl('class', 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))')
     if not controller:
-        return Result(result=ERROR, msg="infraWiNode response empty. Is the cluster healthy?", doc_url=doc_url)
+        return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url)
 
     print('')
     checked_apics = {}
@@ -5994,23 +5993,22 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
     nodes_fatal_errors_result = []
 
     for apic in controller:
-        attr = apic['infraWiNode']['attributes']
-        if attr['addr'] in checked_apics:
+        attr = apic['fabricNode']['attributes']
+        if attr['address'] in checked_apics:
             continue
-        checked_apics[attr['addr']] = 1
-        pod_id = attr['podId']
+        checked_apics[attr['address']] = 1
         node_id = attr['id']
         node_name = attr['name']
         node_title = 'Checking %s...' % node_name
         
         try:
-            c = Connection(attr['addr'])
+            c = Connection(attr['address'])
             c.username = username
             c.password = password
             c.log = LOG_FILE
             c.connect()
         except Exception as e:
-            data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)])
+            data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
             has_error = True
             continue
         
@@ -6036,18 +6034,18 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
             # Determine status
             if file_count >= 1000:
                 status = 'FAIL - High file count'
-                data.append([pod_id, node_id, str(file_count), str(fatal_count), status])
+                data.append([node_id, str(file_count), str(fatal_count), status])
                 result = FAIL_UF
                 nodes_file_count_result.append(result)
             elif fatal_count > 0:
                 status = 'WARNING - Fatal errors found'
-                data.append([pod_id, node_id, str(file_count), str(fatal_count), status])
+                data.append([node_id, str(file_count), str(fatal_count), status])
                 if result == PASS:
                     result = MANUAL
                 nodes_fatal_errors_result.append(result)
                 
         except Exception as e:
-            data.append([pod_id, node_id, '-', '-', 'ERROR: %s' % str(e)])
+            data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
             has_error = True
             continue
     
diff --git a/tests/checks/bootx_firmware_tmp_check/fabricNode.json b/tests/checks/bootx_firmware_tmp_check/fabricNode.json
new file mode 100644
index 00000000..d102b18b
--- /dev/null
+++ b/tests/checks/bootx_firmware_tmp_check/fabricNode.json
@@ -0,0 +1,50 @@
+[
+  {
+    "fabricNode": {
+      "attributes": {
+        "address": "10.0.0.1",
+        "dn": "topology/pod-1/node-1",
+        "fabricSt": "commissioned",
+        "id": "1",
+        "model": "APIC-SERVER-L2",
+        "monPolDn": "uni/fabric/monfab-default",
+        "name": "apic1",
+        "nodeType": "unspecified",
+        "podId": "1",
+        "role": "controller"
+      }
+    }
+  },
+  {
+    "fabricNode": {
+      "attributes": {
+        "address": "10.0.0.2",
+        "dn": "topology/pod-1/node-2",
+        "fabricSt": "commissioned",
+        "id": "2",
+        "model": "APIC-SERVER-L2",
+        "monPolDn": "uni/fabric/monfab-default",
+        "name": "apic2",
+        "nodeType": "unspecified",
+        "podId": "1",
+        "role": "controller"
+      }
+    }
+  },
+  {
+    "fabricNode": {
+      "attributes": {
+        "address": "10.0.0.3",
+        "dn": "topology/pod-1/node-3",
+        "fabricSt": "commissioned",
+        "id": "3",
+        "model": "APIC-SERVER-L2",
+        "monPolDn": "uni/fabric/monfab-default",
+        "name": "apic3",
+        "nodeType": "unspecified",
+        "podId": "1",
+        "role": "controller"
+      }
+    }
+  }
+]
diff --git a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json b/tests/checks/bootx_firmware_tmp_check/infraWiNode.json
deleted file mode 100644
index b6626d02..00000000
--- a/tests/checks/bootx_firmware_tmp_check/infraWiNode.json
+++ /dev/null
@@ -1,62 +0,0 @@
-[
-  {
-    "infraWiNode": {
-      "attributes": {
-        "addr": "10.0.0.1",
-        "adminSt": "in-service",
-        "apicMode": "active",
-        "cntrlSbstState": "approved",
-        "dn": "topology/pod-1/node-1/av/node-1",
-        "failoverStatus": "idle",
-        "health": "fully-fit",
-        "id": "1",
-        "mbSn": "FCH1234ABCD",
-        "name": "",
-        "nodeName": "apic1",
-        "operSt": "available",
-        "podId": "0",
-        "targetMbSn": ""
-      }
-    }
-  },
-  {
-    "infraWiNode": {
-      "attributes": {
-        "addr": "10.0.0.2",
-        "adminSt": "in-service",
-        "apicMode": "active",
-        "cntrlSbstState": "approved",
-        "dn": "topology/pod-1/node-1/av/node-2",
-        "failoverStatus": "idle",
-        "health": "fully-fit",
-        "id": "2",
-        "mbSn": "FCH1235ABCD",
-        "name": "",
-        "nodeName": "apic2",
-        "operSt": "available",
-        "podId": "0",
-        "targetMbSn": ""
-      }
-    }
-  },
-  {
-    "infraWiNode": {
-      "attributes": {
-        "addr": "10.0.0.3",
-        "adminSt": "in-service",
-        "apicMode": "active",
-        "cntrlSbstState": "approved",
-        "dn": "topology/pod-1/node-1/av/node-3",
-        "failoverStatus": "idle",
-        "health": "fully-fit",
-        "id": "3",
-        "mbSn": "FCH1236ABCD",
-        "name": "",
-        "nodeName": "apic3",
-        "operSt": "available",
-        "podId": "1",
-        "targetMbSn": ""
-      }
-    }
-  }
-]
diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
index f4a9c3e0..c94ac4a1 100644
--- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
+++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
@@ -10,7 +10,7 @@
 dir = os.path.dirname(os.path.abspath(__file__))
 
 # API query for controllers
-infraWiNode_api = 'infraWiNode.json?query-target-filter=and(wcard(infraWiNode.dn,"topology/pod-1/node-1"))'
+fabricNode_api = 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))'
 
 # Commands that will be executed via SSH
 ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0'
@@ -23,42 +23,42 @@
     [
         # Test 1: Version not provided (cversion is None)
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             None,
             script.MANUAL,
         ),
         # Test 2: Version not affected (below 6.0(2f))
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(1a)",
             script.PASS,
         ),
         # Test 3: Version not affected (above 6.0(8f))
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(9a)",
             script.PASS,
         ),
         # Test 4: Version not affected (between 6.0(8f) and 6.1(1f))
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(9h)",
             script.PASS,
         ),
         # Test 5: Version not affected (above 6.1(2f))
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.1(3a)",
             script.PASS,
         ),
         # Test 6: Affected version 6.0(2f), no issues found
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "0\napic1#", "exception": None},
@@ -78,7 +78,7 @@
         ),
         # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "1500\napic1#", "exception": None},
@@ -98,7 +98,7 @@
         ),
         # Test 8: Affected version 6.0(8f), fatal errors found on one APIC
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "50\napic1#", "exception": None},
@@ -118,7 +118,7 @@
         ),
         # Test 9: Affected version 6.1(1f), both high file count and fatal errors
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "2000\napic1#", "exception": None},
@@ -138,7 +138,7 @@
         ),
         # Test 10: Affected version 6.1(2f), multiple APICs with issues
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "1200\napic1#", "exception": None},
@@ -158,7 +158,7 @@
         ),
         # Test 11: Affected version, file count exactly 1000 (boundary test)
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "1000\napic1#", "exception": None},
@@ -178,7 +178,7 @@
         ),
         # Test 12: Affected version, file count just below 1000 (boundary test)
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "999\napic1#", "exception": None},
@@ -198,7 +198,7 @@
         ),
         # Test 13: Affected version, only fatal errors (no high file count)
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "10\napic1#", "exception": None},
@@ -229,7 +229,7 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result):
     [
         # Test 14: SSH connection failure on one APIC
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             True,
             "6.0(5a)",
@@ -237,7 +237,7 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result):
         ),
         # Test 15: SSH command execution error on one APIC
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "", "exception": Exception("Command failed")},
@@ -268,14 +268,14 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_
     [
         # Test 16: Empty topSystem response (unhealthy cluster)
         (
-            {infraWiNode_api: []},
+            {fabricNode_api: []},
             {},
             "6.0(5a)",
             script.ERROR,
         ),
         # Test 17: Non-numeric output from commands (edge case)
         (
-            {infraWiNode_api: read_data(dir, "infraWiNode.json")},
+            {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
                 "10.0.0.1": [
                     {"cmd": ls_firmware_tmp_cmd, "output": "error\napic1#", "exception": None},

From eb6157ad41e312917cffd42d52927e7f5a3be5de Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Thu, 11 Dec 2025 06:17:01 +0000
Subject: [PATCH 3/7] Removed then fabricNode api query. Added the filtered
 logic in code. Modified the test cases

---
 aci-preupgrade-validation-script.py           | 18 ++++++++----------
 .../test_bootx_firmware_tmp_check.py          | 19 +++++++++++--------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index 5418a92a..6ae0e6e0 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5963,7 +5963,7 @@ def configpush_shard_check(tversion, **kwargs):
     return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
 
 @check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks')
-def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
+def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs):
     result = PASS
     headers = ["Node", "File Count", "Fatal Errors Found", "Status"]
     data = []
@@ -5982,10 +5982,14 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
     if not affected:
         return Result(result=PASS, msg=VER_NOT_AFFECTED)
 
-    controller = icurl('class', 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))')
-    if not controller:
+    if not fabric_nodes:
         return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url)
 
+    # Filter for controller nodes only
+    controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller']
+    if not controller:
+        return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url)
+
     print('')
     checked_apics = {}
     has_error = False
@@ -6056,13 +6060,7 @@ def bootx_firmware_tmp_check(cversion, username, password, **kwargs):
     if has_error and result == PASS:
         result = ERROR
     
-    return Result(
-        result=result,
-        headers=headers,
-        data=data,
-        recommended_action=recommended_action,
-        doc_url=doc_url,
-    )
+    return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
 
 
 # ---- Script Execution ----
diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
index c94ac4a1..8f8b289d 100644
--- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
+++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
@@ -9,8 +9,8 @@
 log = logging.getLogger(__name__)
 dir = os.path.dirname(os.path.abspath(__file__))
 
-# API query for controllers
-fabricNode_api = 'fabricNode.json?query-target-filter=and(eq(fabricNode.role,"controller"))'
+# API query for fabricNode (get_fabric_nodes() uses 'fabricNode.json' without filter)
+fabricNode_api = 'fabricNode.json'
 
 # Commands that will be executed via SSH
 ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0'
@@ -218,9 +218,10 @@
         ),
     ],
 )
-def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result):
+def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result):
     cver = script.AciVersion(cversion) if cversion else None
-    result = run_check(cversion=cver, username="admin", password="password")
+    fabric_nodes = icurl_outputs.get(fabricNode_api, [])
+    result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password")
     assert result.result == expected_result
 
 
@@ -257,9 +258,10 @@ def test_logic(run_check, mock_icurl, mock_conn, cversion, expected_result):
         ),
     ],
 )
-def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_result):
+def test_connection_errors(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result):
     cver = script.AciVersion(cversion) if cversion else None
-    result = run_check(cversion=cver, username="admin", password="password")
+    fabric_nodes = icurl_outputs.get(fabricNode_api, [])
+    result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password")
     assert result.result == expected_result
 
 
@@ -295,7 +297,8 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, cversion, expected_
         ),
     ],
 )
-def test_edge_cases(run_check, mock_icurl, mock_conn, cversion, expected_result):
+def test_edge_cases(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expected_result):
     cver = script.AciVersion(cversion) if cversion else None
-    result = run_check(cversion=cver, username="admin", password="password")
+    fabric_nodes = icurl_outputs.get(fabricNode_api, [])
+    result = run_check(fabric_nodes=fabric_nodes, cversion=cver, username="admin", password="password")
     assert result.result == expected_result
\ No newline at end of file

From 3de425564d4750582da9a6957429bf1868caa7f1 Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Fri, 12 Dec 2025 05:32:47 +0000
Subject: [PATCH 4/7] Incorporated the review comments

---
 aci-preupgrade-validation-script.py | 4 +---
 docs/docs/validations.md            | 8 ++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index 6ae0e6e0..e689db3b 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5962,7 +5962,7 @@ def configpush_shard_check(tversion, **kwargs):
 
     return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
 
-@check_wrapper(check_title = 'Bootx Service failure log & firmware/tmp directory checks')
+@check_wrapper(check_title = 'Bootx Service failure checks')
 def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs):
     result = PASS
     headers = ["Node", "File Count", "Fatal Errors Found", "Status"]
@@ -5990,7 +5990,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
     if not controller:
         return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url)
 
-    print('')
     checked_apics = {}
     has_error = False
     nodes_file_count_result = []
@@ -6003,7 +6002,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
         checked_apics[attr['address']] = 1
         node_id = attr['id']
         node_name = attr['name']
-        node_title = 'Checking %s...' % node_name
         
         try:
             c = Connection(attr['address'])
diff --git a/docs/docs/validations.md b/docs/docs/validations.md
index c8ca2fab..d5d9a622 100644
--- a/docs/docs/validations.md
+++ b/docs/docs/validations.md
@@ -191,7 +191,7 @@ Items                                           | Defect       | This Script
 [Stale pconsRA Object][d26]                     | CSCwp22212   | :warning:{title="Deprecated"} | :no_entry_sign:
 [ISIS DTEPs Byte Size][d27]                     | CSCwp15375   | :white_check_mark: | :no_entry_sign:
 [Policydist configpushShardCont Crash][d28]     | CSCwp95515   | :white_check_mark: | 
-[Bootx Service failure log & firmware/tmp directory checks][d29]  | CSCwn37676  | :white_check_mark: | :no_entry_sign:
+[Bootx Service failure checks][d29]             | CSCwn37676   | :white_check_mark: | :no_entry_sign:
 
 [d1]: #ep-announce-compatibility
 [d2]: #eventmgr-db-size-defect-susceptibility
@@ -221,7 +221,7 @@ Items                                           | Defect       | This Script
 [d26]: #stale-pconsra-object
 [d27]: #isis-dteps-byte-size
 [d28]: #policydist-configpushshardcont-crash
-[d29]: #bootx_service_failure_log_and_firmware_tmp_directory_checks
+[d29]: #bootx-service-failure-checks
 
 
 ## General Check Details
@@ -2606,9 +2606,9 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf
 If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade.
 
 
-### Bootx Service failure log & firmware/tmp directory checks
+### Bootx Service failure checks
 
-Due to [CSCwn37676][62], ACI runs on releases 6.0(2) through 6.0(8) or 6.1(1) through 6.1(2) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures.
+Due to [CSCwn37676][62], ACI runs on releases 6.0(2h) through 6.0(8h) or 6.1(1f) through 6.1(2g) , upgrading to any target version with a high number of files in the `/firmware/tmp/` directory (1000 or more) or the presence of fatal errors in `/var/log/bootx/logs/` can cause the bootx service to fail, resulting in upgrade failures.
 
 The script performs two validations on each APIC:
 

From 7f700b675d8d44cde4caa80e57f93d4cbde14170 Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Fri, 12 Dec 2025 13:24:23 +0000
Subject: [PATCH 5/7] Modified the script logic based on the updated preupgrade
 flag

---
 aci-preupgrade-validation-script.py           | 23 ++++--------
 .../test_bootx_firmware_tmp_check.py          | 36 +++++++++----------
 2 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index e689db3b..a7ac27dd 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5991,9 +5991,7 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
         return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url)
 
     checked_apics = {}
-    has_error = False
-    nodes_file_count_result = []
-    nodes_fatal_errors_result = []
+    has_error = False   
 
     for apic in controller:
         attr = apic['fabricNode']['attributes']
@@ -6001,7 +5999,6 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
             continue
         checked_apics[attr['address']] = 1
         node_id = attr['id']
-        node_name = attr['name']
         
         try:
             c = Connection(attr['address'])
@@ -6036,25 +6033,19 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
             # Determine status
             if file_count >= 1000:
                 status = 'FAIL - High file count'
-                data.append([node_id, str(file_count), str(fatal_count), status])
+                data.append([node_id, str(file_count),"-", status])
+                result = FAIL_UF
+
+            if fatal_count > 0:
+                status = 'FAIL - Fatal errors found'
+                data.append([node_id, "-", str(fatal_count), status])
                 result = FAIL_UF
-                nodes_file_count_result.append(result)
-            elif fatal_count > 0:
-                status = 'WARNING - Fatal errors found'
-                data.append([node_id, str(file_count), str(fatal_count), status])
-                if result == PASS:
-                    result = MANUAL
-                nodes_fatal_errors_result.append(result)
                 
         except Exception as e:
             data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
             has_error = True
             continue
     
-    if FAIL_UF in nodes_file_count_result:
-        result = FAIL_UF
-    if MANUAL in nodes_fatal_errors_result:
-        result = MANUAL
     if has_error and result == PASS:
         result = ERROR
     
diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
index 8f8b289d..7366a6f4 100644
--- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
+++ b/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
@@ -28,35 +28,35 @@
             None,
             script.MANUAL,
         ),
-        # Test 2: Version not affected (below 6.0(2f))
+        # Test 2: Version not affected (below 6.0(2h))
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(1a)",
             script.PASS,
         ),
-        # Test 3: Version not affected (above 6.0(8f))
+        # Test 3: Version not affected (above 6.0(8h))
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(9a)",
             script.PASS,
         ),
-        # Test 4: Version not affected (between 6.0(8f) and 6.1(1f))
+        # Test 4: Version not affected (between 6.0(8h) and 6.1(1f))
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.0(9h)",
             script.PASS,
         ),
-        # Test 5: Version not affected (above 6.1(2f))
+        # Test 5: Version not affected (above 6.1(2g))
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
             "6.1(3a)",
             script.PASS,
         ),
-        # Test 6: Affected version 6.0(2f), no issues found
+        # Test 6: Version not affected 6.0(2f) (below 6.0(2h)), no issues found
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -76,7 +76,7 @@
             "6.0(2f)",
             script.PASS,
         ),
-        # Test 7: Affected version 6.0(5a), file count >= 1000 on one APIC
+        # Test 7: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), file count >= 1000 on one APIC
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -96,7 +96,7 @@
             "6.0(5a)",
             script.FAIL_UF,
         ),
-        # Test 8: Affected version 6.0(8f), fatal errors found on one APIC
+        # Test 8: Affected version 6.0(8f) (within 6.0(2h) to 6.0(8h)), fatal errors found on one APIC
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -114,9 +114,9 @@
                 ],
             },
             "6.0(8f)",
-            script.MANUAL,
+            script.FAIL_UF,
         ),
-        # Test 9: Affected version 6.1(1f), both high file count and fatal errors
+        # Test 9: Affected version 6.1(1f) (within 6.1(1f) to 6.1(2g)), both high file count and fatal errors
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -136,7 +136,7 @@
             "6.1(1f)",
             script.FAIL_UF,
         ),
-        # Test 10: Affected version 6.1(2f), multiple APICs with issues
+        # Test 10: Affected version 6.1(2f) (within 6.1(1f) to 6.1(2g)), multiple APICs with issues
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -156,7 +156,7 @@
             "6.1(2f)",
             script.FAIL_UF,
         ),
-        # Test 11: Affected version, file count exactly 1000 (boundary test)
+        # Test 11: Affected version 6.0(3a) (within 6.0(2h) to 6.0(8h)), file count exactly 1000 (boundary test)
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -176,7 +176,7 @@
             "6.0(3a)",
             script.FAIL_UF,
         ),
-        # Test 12: Affected version, file count just below 1000 (boundary test)
+        # Test 12: Affected version 6.0(4a) (within 6.0(2h) to 6.0(8h)), file count just below 1000 (boundary test)
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -196,7 +196,7 @@
             "6.0(4a)",
             script.PASS,
         ),
-        # Test 13: Affected version, only fatal errors (no high file count)
+        # Test 13: Affected version 6.1(2a) (within 6.1(1f) to 6.1(2g)), only fatal errors (no high file count)
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -214,7 +214,7 @@
                 ],
             },
             "6.1(2a)",
-            script.MANUAL,
+            script.FAIL_UF,
         ),
     ],
 )
@@ -228,7 +228,7 @@ def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expect
 @pytest.mark.parametrize(
     "icurl_outputs, conn_cmds, conn_failure, cversion, expected_result",
     [
-        # Test 14: SSH connection failure on one APIC
+        # Test 14: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), SSH connection failure on one APIC
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {},
@@ -236,7 +236,7 @@ def test_logic(run_check, mock_icurl, mock_conn, icurl_outputs, cversion, expect
             "6.0(5a)",
             script.ERROR,
         ),
-        # Test 15: SSH command execution error on one APIC
+        # Test 15: Affected version 6.0(7a) (within 6.0(2h) to 6.0(8h)), SSH command execution error on one APIC
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {
@@ -268,14 +268,14 @@ def test_connection_errors(run_check, mock_icurl, mock_conn, icurl_outputs, cver
 @pytest.mark.parametrize(
     "icurl_outputs, conn_cmds, cversion, expected_result",
     [
-        # Test 16: Empty topSystem response (unhealthy cluster)
+        # Test 16: Affected version 6.0(5a) (within 6.0(2h) to 6.0(8h)), Empty fabricNode response (unhealthy cluster)
         (
             {fabricNode_api: []},
             {},
             "6.0(5a)",
             script.ERROR,
         ),
-        # Test 17: Non-numeric output from commands (edge case)
+        # Test 17: Affected version 6.0(6a) (within 6.0(2h) to 6.0(8h)), Non-numeric output from commands (edge case)
         (
             {fabricNode_api: read_data(dir, "fabricNode.json")},
             {

From d8c19d24a977996dabacd9773f7dbb1d94bf647c Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Tue, 16 Dec 2025 11:38:05 +0000
Subject: [PATCH 6/7] Addressed the review comments

---
 aci-preupgrade-validation-script.py                         | 6 +++---
 .../fabricNode.json                                         | 0
 .../test_bootx_service_failure_checks.py}                   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename tests/checks/{bootx_firmware_tmp_check => bootx_service_failure_checks}/fabricNode.json (100%)
 rename tests/checks/{bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py => bootx_service_failure_checks/test_bootx_service_failure_checks.py} (99%)

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index a7ac27dd..b1544f51 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -5963,7 +5963,7 @@ def configpush_shard_check(tversion, **kwargs):
     return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
 
 @check_wrapper(check_title = 'Bootx Service failure checks')
-def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwargs):
+def bootx_service_failure_checks(fabric_nodes, cversion, username, password, **kwargs):
     result = PASS
     headers = ["Node", "File Count", "Fatal Errors Found", "Status"]
     data = []
@@ -6045,7 +6045,7 @@ def bootx_firmware_tmp_check(fabric_nodes, cversion, username, password, **kwarg
             data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
             has_error = True
             continue
-    
+    c.close()        
     if has_error and result == PASS:
         result = ERROR
     
@@ -6139,7 +6139,7 @@ class CheckManager:
         post_upgrade_cb_check,
         validate_32_64_bit_image_check,
         fabric_link_redundancy_check,
-        bootx_firmware_tmp_check,
+        bootx_service_failure_checks,
 
         # Faults
         apic_disk_space_faults_check,
diff --git a/tests/checks/bootx_firmware_tmp_check/fabricNode.json b/tests/checks/bootx_service_failure_checks/fabricNode.json
similarity index 100%
rename from tests/checks/bootx_firmware_tmp_check/fabricNode.json
rename to tests/checks/bootx_service_failure_checks/fabricNode.json
diff --git a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py b/tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py
similarity index 99%
rename from tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
rename to tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py
index 7366a6f4..a8e7f4cb 100644
--- a/tests/checks/bootx_firmware_tmp_check/test_bootx_firmware_tmp_check.py
+++ b/tests/checks/bootx_service_failure_checks/test_bootx_service_failure_checks.py
@@ -16,7 +16,7 @@
 ls_firmware_tmp_cmd = '[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0'
 grep_fatal_bootx_cmd = '[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0'
 
-test_function = "bootx_firmware_tmp_check"
+test_function = "bootx_service_failure_checks"
 
 @pytest.mark.parametrize(
     "icurl_outputs, conn_cmds, cversion, expected_result",

From 2e7dce9d5b4ee30262da8aa7e75413dddc1f91e7 Mon Sep 17 00:00:00 2001
From: DHANABALAN SELVARAJ <dhaselva@cisco.com>
Date: Mon, 22 Dec 2025 10:33:53 +0000
Subject: [PATCH 7/7] updatedthe impacted version check

---
 aci-preupgrade-validation-script.py | 130 ++++++++++++++--------------
 1 file changed, 63 insertions(+), 67 deletions(-)

diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py
index fd253cf1..a1b38a2d 100644
--- a/aci-preupgrade-validation-script.py
+++ b/aci-preupgrade-validation-script.py
@@ -6017,82 +6017,78 @@ def bootx_service_failure_checks(fabric_nodes, cversion, username, password, **k
 
     if not cversion:
         return Result(result=MANUAL, msg="Current version not provided")
-
     
-    affected = False
-    if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \
-       (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")):
-        affected = True
-
-    if not affected:
-        return Result(result=PASS, msg=VER_NOT_AFFECTED)
-
     if not fabric_nodes:
         return Result(result=ERROR, msg="Fabric node response empty. Is the cluster healthy?", doc_url=doc_url)
 
-    # Filter for controller nodes only
-    controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller']
-    if not controller:
-        return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url)
+    if (not cversion.older_than("6.0(2h)") and not cversion.newer_than("6.0(8h)")) or \
+       (not cversion.older_than("6.1(1f)") and not cversion.newer_than("6.1(2g)")):
 
-    checked_apics = {}
-    has_error = False   
+        # Filter for controller nodes only
+        controller = [node for node in fabric_nodes if node['fabricNode']['attributes']['role'] == 'controller']
+        if not controller:
+            return Result(result=ERROR, msg="No controller nodes found. Is the cluster healthy?", doc_url=doc_url)
 
-    for apic in controller:
-        attr = apic['fabricNode']['attributes']
-        if attr['address'] in checked_apics:
-            continue
-        checked_apics[attr['address']] = 1
-        node_id = attr['id']
-        
-        try:
-            c = Connection(attr['address'])
-            c.username = username
-            c.password = password
-            c.log = LOG_FILE
-            c.connect()
-        except Exception as e:
-            data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
-            has_error = True
-            continue
-        
-        try:
-            # Check if /firmware/tmp directory exists and count files
-            c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0')
-            file_count = 0
-            for line in c.output.strip().split('\n'):
-                line = line.strip()
-                if line.isdigit():
-                    file_count = int(line)
-                    break
+        checked_apics = {}
+        has_error = False   
+
+        for apic in controller:
+            attr = apic['fabricNode']['attributes']
+            if attr['address'] in checked_apics:
+                continue
+            checked_apics[attr['address']] = 1
+            node_id = attr['id']
             
-            # Check for fatal errors in bootx logs
-            c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0')
-            fatal_count = 0
-            for line in c.output.strip().split('\n'):
-                line = line.strip()
-                if line.isdigit():
-                    fatal_count = int(line)
-                    break
+            try:
+                c = Connection(attr['address'])
+                c.username = username
+                c.password = password
+                c.log = LOG_FILE
+                c.connect()
+            except Exception as e:
+                data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
+                has_error = True
+                continue
             
-            # Determine status
-            if file_count >= 1000:
-                status = 'FAIL - High file count'
-                data.append([node_id, str(file_count),"-", status])
-                result = FAIL_UF
-
-            if fatal_count > 0:
-                status = 'FAIL - Fatal errors found'
-                data.append([node_id, "-", str(fatal_count), status])
-                result = FAIL_UF
+            try:
+                # Check if /firmware/tmp directory exists and count files
+                c.cmd('[ -d /firmware/tmp ] && ls -1 /firmware/tmp 2>/dev/null | wc -l || echo 0')
+                file_count = 0
+                for line in c.output.strip().split('\n'):
+                    line = line.strip()
+                    if line.isdigit():
+                        file_count = int(line)
+                        break
                 
-        except Exception as e:
-            data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
-            has_error = True
-            continue
-    c.close()        
-    if has_error and result == PASS:
-        result = ERROR
+                # Check for fatal errors in bootx logs
+                c.cmd('[ -d /var/log/bootx/logs ] && grep -Ri "fatal" /var/log/bootx/logs/* 2>/dev/null | wc -l || echo 0')
+                fatal_count = 0
+                for line in c.output.strip().split('\n'):
+                    line = line.strip()
+                    if line.isdigit():
+                        fatal_count = int(line)
+                        break
+                
+                # Determine status
+                if file_count >= 1000:
+                    status = 'FAIL - High file count'
+                    data.append([node_id, str(file_count),"-", status])
+                    result = FAIL_UF
+
+                if fatal_count > 0:
+                    status = 'FAIL - Fatal errors found'
+                    data.append([node_id, "-", str(fatal_count), status])
+                    result = FAIL_UF
+                    
+            except Exception as e:
+                data.append([node_id, '-', '-', 'ERROR: %s' % str(e)])
+                has_error = True
+                continue
+        c.close()        
+        if has_error and result == PASS:
+            result = ERROR
+    else:
+        return Result(result=PASS, msg=VER_NOT_AFFECTED)
     
     return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)