Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions aci-preupgrade-validation-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -6007,6 +6007,91 @@ def apic_vmm_inventory_sync_faults_check(**kwargs):
recommended_action=recommended_action,
doc_url=doc_url)


@check_wrapper(check_title='Snapshot files check')
def snapshot_files_check(fabric_nodes, cversion, username, password, **kwargs):
result = PASS
headers = ['apic_id', 'apic_name', 'snapshot_files']
data = []
recommended_action = 'Contact Cisco TAC for Support before upgrade'
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#Snapshot-files-check'
if cversion.older_than('6.0(3d)'):
apics = [node for node in fabric_nodes if node["fabricNode"]["attributes"]["role"] == "controller"]
if not apics:
return Result(result=ERROR, msg="No fabricNode of APIC. Is the cluster healthy?", doc_url=doc_url)
# `fabricNode` in pre-4.0 does not have `address`
if not apics[0]["fabricNode"]["attributes"].get("address"):
apic1 = [apic for apic in apics if apic["fabricNode"]["attributes"]["id"] == "1"][0]
apic1_dn = apic1["fabricNode"]["attributes"]["dn"]
apics = icurl("class", "{}/infraWiNode.json".format(apic1_dn))
has_error = False
for apic in apics:
if apic.get("fabricNode"):
apic_id = apic["fabricNode"]["attributes"]["id"]
apic_name = apic["fabricNode"]["attributes"]["name"]
apic_addr = apic["fabricNode"]["attributes"]["address"]
else:
apic_id = apic["infraWiNode"]["attributes"]["id"]
apic_name = apic["infraWiNode"]["attributes"]["nodeName"]
apic_addr = apic["infraWiNode"]["attributes"]["addr"]
try:
c = Connection(apic_addr)
c.username = username
c.password = password
c.log = LOG_FILE
c.connect()
except Exception as e:
data.append([apic_id, apic_name, str(e)])
has_error = True
continue
try:
c.cmd('tail -n 1000 /var/log/dme/log/access.log | grep "GET /snapshots" | grep 404')
access_logs = c.output.splitlines()
if len(access_logs) < 15 and any("No such file or directory" in line for line in access_logs):
data.append([apic_id, apic_name, '/var/log/dme/log/access.log not found'])
has_error = True
continue

requests = []

for line in access_logs:
timestamp_match = re.search(r'\[(\d{1,2}/\w{3}/\d{4}):(\d{2}:\d{2}:\d{2})', line)
filename_match = re.search(r'GET /snapshots/([^\s]+)', line)

if timestamp_match and filename_match:
timestamp_str = f"{timestamp_match.group(1)}:{timestamp_match.group(2)}"
filename = filename_match.group(1)
try:
timestamp = datetime.strptime(timestamp_str, "%d/%b/%Y:%H:%M:%S")
requests.append((timestamp, filename))
except:
continue

requests.sort()

# Checking if any 10 consecutive requests are within 1 minute
if len(requests) >= 10:
for i in range(len(requests) - 9):
time_diff = (requests[i + 9][0] - requests[i][0]).total_seconds()
if time_diff <= 60:
window_files = [filename for _, filename in requests[i:i+10]]
for filename in window_files:
data.append([apic_id, apic_name, filename])
break

except Exception as e:
data.append([apic_id, apic_name, str(e)])
has_error = True
continue

if has_error:
result = ERROR
elif data:
result = FAIL_UF

return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)


# ---- Script Execution ----


Expand Down Expand Up @@ -6168,6 +6253,7 @@ class CheckManager:
standby_sup_sync_check,
isis_database_byte_check,
configpush_shard_check,
snapshot_files_check,

]
ssh_checks = [
Expand Down
16 changes: 15 additions & 1 deletion docs/docs/validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ Items | Defect | This Script
[Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign:
[ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign:
[Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: |
[Snapshot files check][d29] | CSCwe07002 | :white_check_mark: | :no_entry_sign:

[d1]: #ep-announce-compatibility
[d2]: #eventmgr-db-size-defect-susceptibility
Expand Down Expand Up @@ -220,7 +221,7 @@ Items | Defect | This Script
[d26]: #stale-pconsra-object
[d27]: #isis-dteps-byte-size
[d28]: #policydist-configpushshardcont-crash

[d29]: #Snapshot-files-check

## General Check Details

Expand Down Expand Up @@ -2614,6 +2615,18 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf
If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade.


### Snapshot files check

RCA:
Issue occured in 3 node apic cluster, AE process on one apic is busy. Reason is it's trying to fetch the snapshot files which was taken earlier is missing on all apics.

IMPACT:
Access logs will be flooded with the GET calls. APIC Upgrade/downgrade will fail with message Installer Exited - Pre-upgrade callbacks were not completed.

Suggestion:
Restart AE on each APIC one at a time. For Reference [CSCwe07002][62].


[0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script
[1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html
[2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html
Expand Down Expand Up @@ -2676,3 +2689,4 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC
[59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515
[60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter
[61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression
[62]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwe07002
93 changes: 93 additions & 0 deletions tests/checks/snapshot_files_check/fabricNode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[
{
"fabricNode": {
"attributes": {
"address": "10.0.0.1",
"dn": "topology/pod-1/node-1",
"fabricSt": "commissioned",
"id": "1",
"model": "APIC-SERVER-L2",
"monPolDn": "uni/fabric/monfab-default",
"name": "apic1",
"nodeType": "unspecified",
"role": "controller"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.2",
"dn": "topology/pod-1/node-2",
"fabricSt": "commissioned",
"id": "2",
"model": "APIC-SERVER-L2",
"monPolDn": "uni/fabric/monfab-default",
"name": "apic2",
"nodeType": "unspecified",
"role": "controller"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.3",
"dn": "topology/pod-2/node-3",
"fabricSt": "commissioned",
"id": "3",
"model": "APIC-SERVER-L2",
"monPolDn": "uni/fabric/monfab-default",
"name": "apic3",
"nodeType": "unspecified",
"role": "controller"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.101",
"dn": "topology/pod-1/node-101",
"fabricSt": "active",
"id": "101",
"model": "N9K-C93180YC-FX",
"monPolDn": "uni/fabric/monfab-default",
"name": "leaf101",
"nodeType": "unspecified",
"role": "leaf"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.102",
"dn": "topology/pod-1/node-102",
"fabricSt": "active",
"id": "102",
"model": "N9K-C93180YC-FX",
"monPolDn": "uni/fabric/monfab-default",
"name": "leaf102",
"nodeType": "unspecified",
"role": "leaf"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.201",
"dn": "topology/pod-1/node-201",
"fabricSt": "active",
"id": "201",
"model": "N9K-C9504",
"monPolDn": "uni/fabric/monfab-default",
"name": "spine201",
"nodeType": "unspecified",
"role": "spine"
}
}
}
]

48 changes: 48 additions & 0 deletions tests/checks/snapshot_files_check/fabricNode_no_apic.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[
{
"fabricNode": {
"attributes": {
"address": "10.0.0.101",
"dn": "topology/pod-1/node-101",
"fabricSt": "active",
"id": "101",
"model": "N9K-C93180YC-FX",
"monPolDn": "uni/fabric/monfab-default",
"name": "leaf101",
"nodeType": "unspecified",
"role": "leaf"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.102",
"dn": "topology/pod-1/node-102",
"fabricSt": "active",
"id": "102",
"model": "N9K-C93180YC-FX",
"monPolDn": "uni/fabric/monfab-default",
"name": "leaf102",
"nodeType": "unspecified",
"role": "leaf"
}
}
},
{
"fabricNode": {
"attributes": {
"address": "10.0.0.201",
"dn": "topology/pod-1/node-201",
"fabricSt": "active",
"id": "201",
"model": "N9K-C9504",
"monPolDn": "uni/fabric/monfab-default",
"name": "spine201",
"nodeType": "unspecified",
"role": "spine"
}
}
}
]

62 changes: 62 additions & 0 deletions tests/checks/snapshot_files_check/fabricNode_old.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-1/node-1",
"fabricSt": "unknown",
"nodeType": "unspecified",
"id": "1",
"version": "A",
"role": "controller",
"adSt": "on",
"name": "apic1",
"model": "APIC-SERVER-M1"
}
}
},
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-1/node-2",
"fabricSt": "unknown",
"nodeType": "unspecified",
"id": "2",
"version": "A",
"role": "controller",
"adSt": "on",
"name": "apic2",
"model": "APIC-SERVER-M1"
}
}
},
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-2/node-3",
"fabricSt": "unknown",
"nodeType": "unspecified",
"id": "3",
"version": "A",
"role": "controller",
"adSt": "on",
"name": "apic3",
"model": "APIC-SERVER-M1"
}
}
},
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-1/node-101",
"fabricSt": "active",
"nodeType": "unspecified",
"id": "101",
"version": "",
"role": "leaf",
"adSt": "on",
"name": "leaf1",
"model": "N9K-C9396PX"
}
}
}
]
32 changes: 32 additions & 0 deletions tests/checks/snapshot_files_check/fabricNode_old_single_apic.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-1/node-1",
"fabricSt": "unknown",
"nodeType": "unspecified",
"id": "1",
"version": "A",
"role": "controller",
"adSt": "on",
"name": "apic1",
"model": "APIC-SERVER-M1"
}
}
},
{
"fabricNode": {
"attributes": {
"dn": "topology/pod-1/node-101",
"fabricSt": "active",
"nodeType": "unspecified",
"id": "101",
"version": "A",
"role": "leaf",
"adSt": "on",
"name": "leaf101",
"model": "N9K-C93180YC-EX"
}
}
}
]
Loading