Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion aci-preupgrade-validation-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from textwrap import TextWrapper
from getpass import getpass
from collections import defaultdict, OrderedDict
from datetime import datetime
from datetime import datetime, timedelta
from argparse import ArgumentParser
from itertools import chain
import threading
Expand Down Expand Up @@ -6410,6 +6410,40 @@ def svccore_excessive_data_check(**kwargs):
return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url)


@check_wrapper(check_title="Stale dbgacEpgSummaryTask Objects")
def stale_epg_summary_task_check(tversion, **kwargs):
result = PASS
headers = ["DN", "Start Time"]
data = []
recommended_action = "Delete the listed stale dbgacEpgSummaryTask objects to prevent policymgr crash."
doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#stale-dbgacepgsummarytask-objects"

if not tversion:
return Result(result=MANUAL, msg=TVER_MISSING)

version_affected = (
(tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)")))
or (tversion.major1 == "6" and tversion.major2 == "2" and (tversion.older_than("6.2(1g)") or tversion.same_as("6.2(1g)")))
)
if not version_affected:
return Result(result=NA, msg=VER_NOT_AFFECTED)

threshold = datetime.utcnow() - timedelta(hours=24)
for obj in icurl("class", 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'):
attr = obj["dbgacEpgSummaryTask"]["attributes"]
dn = attr.get("dn", "")
start_ts = attr.get("startTs", "")
try:
task_dt = datetime.strptime(start_ts[:19], "%Y-%m-%dT%H:%M:%S")
except ValueError:
continue
if task_dt < threshold:
data.append([dn, start_ts])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see node_id in the output. Pls add it to know on which node issue is encountered.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

node_id is not available in the object's attributes or DN. The DN is already unique enough to identify and delete the specific object.


if data:
result = FAIL_O
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)

# ---- Script Execution ----


Expand Down Expand Up @@ -6581,6 +6615,7 @@ class CheckManager:
rogue_ep_coop_exception_mac_check,
n9k_c9408_model_lem_count_check,
inband_management_policy_misconfig_check,
stale_epg_summary_task_check,
]
ssh_checks = [
# General
Expand Down
11 changes: 10 additions & 1 deletion docs/docs/validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ Items | Defect | This Script
[N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign:
[Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign:
[Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign:
[Stale dbgacEpgSummaryTask Objects][d34] | CSCwt69100 | :white_check_mark: | :no_entry_sign:

[d1]: #ep-announce-compatibility
[d2]: #eventmgr-db-size-defect-susceptibility
Expand Down Expand Up @@ -237,6 +238,7 @@ Items | Defect | This Script
[d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems
[d32]: #multi-pod-modular-spine-bootscript-file
[d33]: #inband-management-policy-misconfiguration
[d34]: #stale-dbgacepgsummarytask-objects

## General Check Details

Expand Down Expand Up @@ -2797,6 +2799,12 @@ Administrators may be unable to access or operate the APIC GUI, potentially impa

This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade.

### Stale dbgacEpgSummaryTask Objects

Due to [CSCwt69100][70], a stale `dbgacEpgSummaryTask` object stuck in `processing` state with empty content can cause the policymgr process to crash on all APICs during an upgrade or process restart.

Delete any stale `dbgacEpgSummaryTask` objects before proceeding to prevent policymgr from crashing on restart.


[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
Expand Down Expand Up @@ -2867,4 +2875,5 @@ This check will verify the count of the `svccoreCtrlr` Managed Object and raise
[66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848
[67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837
[68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232
[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232
[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2026-01-14T11:00:00.000+00:00"
}
}
},
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2026-01-14T12:01:00.000+00:00"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2026-01-14T12:00:00.000+00:00"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2024-01-01T00:00:00.000+00:00"
}
}
},
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2026-01-15T11:30:00.000+00:00"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2026-01-15T11:30:00.000+00:00"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{
"dbgacEpgSummaryTask": {
"attributes": {
"dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"operSt": "processing",
"startTs": "2024-01-01T00:00:00.000+00:00"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import os
import pytest
import importlib
from datetime import datetime
from helpers.utils import read_data

script = importlib.import_module("aci-preupgrade-validation-script")

dir = os.path.dirname(os.path.abspath(__file__))

test_function = "stale_epg_summary_task_check"

# icurl query key
task_api = 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'

# Fixed "now" used by mock_datetime fixture: 2026-01-15 12:00:00 UTC
# Stale threshold = 2026-01-14 12:00:00 UTC (24h before fixed now)
# dbgacEpgSummaryTask_stale.json -> startTs 2024-01-01 (way before threshold) -> FAIL_O
# dbgacEpgSummaryTask_recent.json -> startTs 2026-01-15 11:30 UTC (30 min before fixed now) -> PASS
FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0)


class MockDatetime:
"""Replaces datetime class in script to return a fixed 'now' for deterministic tests."""
@staticmethod
def utcnow():
return FIXED_NOW

@staticmethod
def strptime(date_string, format):
return datetime.strptime(date_string, format)

def __new__(cls, *args, **kwargs):
return datetime(*args, **kwargs)


@pytest.fixture
def mock_datetime(monkeypatch):
"""Monkeypatches script.datetime so utcnow() returns a fixed timestamp."""
monkeypatch.setattr(script, "datetime", MockDatetime)


@pytest.mark.parametrize(
"tversion, icurl_outputs, expected_result, expected_data",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls add the test case for tversion missing.

[
# Case 1: tversion is missing. Expected: MANUAL.
(
None,
{},
script.MANUAL,
[],
),
# Case 2: Target version 6.2(1h) is beyond both affected ranges (6.1(5e) and 6.2(1g)).
# The target binary has the fix so version gate fails. Expected: NA without any API calls.
(
"6.2(1h)",
{},
script.NA,
[],
),
# Case 2: Target version 6.1(5e) is affected, no dbgacEpgSummaryTask objects found.
# No stale tasks present — system is safe. Expected: PASS.
(
"6.1(5e)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_empty.json"),
},
script.PASS,
[],
),
# Case 3: Target version 6.1(5e) is affected, one task in processing state but startTs is
# only 30 minutes old (within 24-hour threshold). Not considered stale.
# Expected: PASS.
(
"6.1(5e)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_recent.json"),
},
script.PASS,
[],
),
# Case 4: Target version 6.1(5e) is affected, one task stuck in processing with startTs
# from 2024 (way older than 24 hours). Stale task detected.
# Expected: FAIL_O with the offending DN and startTs reported.
(
"6.1(5e)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_stale.json"),
},
script.FAIL_O,
[
[
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"2024-01-01T00:00:00.000+00:00",
]
],
),
# Case 5: Target version 6.2(1g) is affected, two tasks — one stale (2024), one recent.
# Only the stale task should be reported. Expected: FAIL_O with one row.
(
"6.2(1g)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_mixed.json"),
},
script.FAIL_O,
[
[
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"2024-01-01T00:00:00.000+00:00",
]
],
),
# Case 6: Task started exactly 24 hours ago (startTs == threshold).
# Boundary condition: task_dt < threshold is False when equal. Expected: PASS.
(
"6.1(5e)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_exactly_24h.json"),
},
script.PASS,
[],
),
# Case 7: Two tasks — one at 25 hours (stale) and one at 23h59m (not stale).
# Only the 25h task crosses the threshold. Expected: FAIL_O with one row.
(
"6.1(5e)",
{
task_api: read_data(dir, "dbgacEpgSummaryTask_boundary_combo.json"),
},
script.FAIL_O,
[
[
"action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef",
"2026-01-14T11:00:00.000+00:00",
]
],
),
],
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls add the test cases for the following
Stale exist for exactly 24hrs
Stale exists for more than 24hrs(25hrs) and less than 24hrs(like 23hrs 59mins) combo

def test_logic(run_check, mock_icurl, mock_datetime, tversion, icurl_outputs, expected_result, expected_data):
result = run_check(
tversion=script.AciVersion(tversion) if tversion else None,
)
assert result.result == expected_result
assert result.data == expected_data