From 6489f1ca11a6750a4147d96197ff3b9e02909c83 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 13 May 2026 13:04:23 -0500 Subject: [PATCH 1/3] network enhancements --- nodescraper/interfaces/task.py | 15 + nodescraper/models/event.py | 21 +- .../plugins/inband/network/ethtool_vendor.py | 660 ++++++++++++++++++ .../inband/network/network_analyzer.py | 64 +- .../inband/network/network_collector.py | 162 +++++ .../plugins/inband/network/networkdata.py | 5 + test/unit/plugin/test_network_analyzer.py | 48 +- test/unit/plugin/test_network_collector.py | 41 ++ 8 files changed, 994 insertions(+), 22 deletions(-) create mode 100644 nodescraper/plugins/inband/network/ethtool_vendor.py diff --git a/nodescraper/interfaces/task.py b/nodescraper/interfaces/task.py index 16d1a70b..8855a48a 100644 --- a/nodescraper/interfaces/task.py +++ b/nodescraper/interfaces/task.py @@ -27,6 +27,7 @@ import copy import datetime import logging +import uuid from typing import Any, Optional, Union from nodescraper.constants import DEFAULT_LOGGER @@ -54,6 +55,7 @@ def __init__( max_event_priority_level: Union[EventPriority, str] = EventPriority.CRITICAL, parent: Optional[str] = None, task_result_hooks: Optional[list[TaskResultHook]] = None, + session_id: Optional[str] = None, **kwargs: dict[str, Any], ): if logger is None: @@ -65,6 +67,16 @@ def __init__( if not task_result_hooks: task_result_hooks = [] self.task_result_hooks = task_result_hooks + + if session_id is None and "session_id" in kwargs: + session_id = kwargs.pop("session_id") # type: ignore[assignment] + if session_id is not None: + try: + uuid.UUID(str(session_id)) + except (ValueError, TypeError, AttributeError): + raise ValueError("session_id must be a valid UUID") from None + self.session_id: Optional[str] = str(session_id) if session_id is not None else None + self.result: TaskResult = self._init_result() @property @@ -115,6 +127,9 @@ def _build_event( if self.parent: data["parent"] = self.parent + if self.session_id is not None: + data["session_id"] = self.session_id + if self.system_info.metadata: data["system_metadata"] = copy.copy(self.system_info.metadata) diff --git a/nodescraper/models/event.py b/nodescraper/models/event.py index 33cf2801..25315ef2 100644 --- a/nodescraper/models/event.py +++ b/nodescraper/models/event.py @@ -28,7 +28,7 @@ import re import uuid from enum import Enum -from typing import Any, Optional, Union +from typing import Any, Optional, Union, cast from pydantic import BaseModel, Field, field_serializer, field_validator @@ -113,15 +113,22 @@ def validate_category(cls, category: Optional[Union[str, Enum]]) -> str: @field_validator("priority", mode="before") @classmethod - def validate_priority(cls, priority: Union[str, EventPriority]) -> EventPriority: - """Allow priority to be set via string priority name + def validate_priority(cls, priority: Union[str, int, EventPriority]) -> EventPriority: + """Allow priority as EventPriority, enum name string, or IntEnum value (unknown int -> ERROR). + Args: - priority (Union[str, EventPriority]): event priority string or enum + priority: EventPriority, name string, integer matching a level, or unknown int (maps to ERROR). + Raises: - ValueError: if priority string is an invalid value - Returns: - EventPriority: priority enum + ValueError: if priority string is invalid, or if a boolean is passed. """ + if type(priority) is bool: + raise ValueError("priority must not be a boolean") + if isinstance(priority, int): + try: + return cast(EventPriority, EventPriority(priority)) + except ValueError: + return EventPriority.ERROR if isinstance(priority, str): try: return getattr(EventPriority, priority.upper()) diff --git a/nodescraper/plugins/inband/network/ethtool_vendor.py b/nodescraper/plugins/inband/network/ethtool_vendor.py new file mode 100644 index 00000000..04dbf4a4 --- /dev/null +++ b/nodescraper/plugins/inband/network/ethtool_vendor.py @@ -0,0 +1,660 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +"""Vendor-specific ethtool -S statistics models (Pollara / Thor2 / ConnectX-7).""" + +from typing import ClassVar, Optional, Union + +from pydantic import BaseModel, Field, model_validator +from typing_extensions import Self + + +class PollaraEthtoolStatistics(BaseModel): + """ifname ionic. Keeping only fields of interest. Skip queue-specific stats for now""" + + rx_csum_error: Optional[int] = None + hw_tx_dropped: Optional[int] = None + hw_rx_dropped: Optional[int] = None + hw_rx_over_errors: Optional[int] = None + hw_rx_missed_errors: Optional[int] = None + hw_tx_aborted_errors: Optional[int] = None + frames_rx_bad_fcs: Optional[int] = None + frames_rx_bad_all: Optional[int] = None + frames_rx_pause: Optional[int] = None + frames_rx_bad_length: Optional[int] = None + frames_rx_undersized: Optional[int] = None + frames_rx_oversized: Optional[int] = None + frames_rx_fragments: Optional[int] = None + frames_rx_jabber: Optional[int] = None + frames_rx_pripause: Optional[int] = None + frames_rx_stomped_crc: Optional[int] = None + frames_rx_too_long: Optional[int] = None + frames_rx_dropped: Optional[int] = None + frames_rx_less_than_64b: Optional[int] = None + frames_tx_bad: Optional[int] = None + frames_tx_pause: Optional[int] = None + frames_tx_pripause: Optional[int] = None + frames_tx_less_than_64b: Optional[int] = None + frames_tx_pri_0: Optional[int] = None + frames_tx_pri_1: Optional[int] = None + frames_tx_pri_2: Optional[int] = None + frames_tx_pri_3: Optional[int] = None + frames_tx_pri_4: Optional[int] = None + frames_tx_pri_5: Optional[int] = None + frames_tx_pri_6: Optional[int] = None + frames_tx_pri_7: Optional[int] = None + frames_rx_pri_0: Optional[int] = None + frames_rx_pri_1: Optional[int] = None + frames_rx_pri_2: Optional[int] = None + frames_rx_pri_3: Optional[int] = None + frames_rx_pri_4: Optional[int] = None + frames_rx_pri_5: Optional[int] = None + frames_rx_pri_6: Optional[int] = None + frames_rx_pri_7: Optional[int] = None + tx_pripause_0_1us_count: Optional[int] = None + tx_pripause_1_1us_count: Optional[int] = None + tx_pripause_2_1us_count: Optional[int] = None + tx_pripause_3_1us_count: Optional[int] = None + tx_pripause_4_1us_count: Optional[int] = None + tx_pripause_5_1us_count: Optional[int] = None + tx_pripause_6_1us_count: Optional[int] = None + tx_pripause_7_1us_count: Optional[int] = None + rx_pripause_0_1us_count: Optional[int] = None + rx_pripause_1_1us_count: Optional[int] = None + rx_pripause_2_1us_count: Optional[int] = None + rx_pripause_3_1us_count: Optional[int] = None + rx_pripause_4_1us_count: Optional[int] = None + rx_pripause_5_1us_count: Optional[int] = None + rx_pripause_6_1us_count: Optional[int] = None + rx_pripause_7_1us_count: Optional[int] = None + rx_pause_1us_count: Optional[int] = None + frames_tx_truncated: Optional[int] = None + + error_fields: ClassVar[list[str]] = [ + "rx_csum_error", + "hw_tx_dropped", + "hw_rx_dropped", + "hw_rx_over_errors", + "hw_rx_missed_errors", + "hw_tx_aborted_errors", + "frames_rx_bad_fcs", + "frames_rx_bad_all", + "frames_rx_bad_length", + "frames_rx_undersized", + "frames_rx_oversized", + "frames_rx_fragments", + "frames_rx_jabber", + "frames_rx_stomped_crc", + "frames_rx_too_long", + "frames_rx_dropped", + "frames_rx_less_than_64b", + "frames_tx_bad", + "frames_tx_pause", + "frames_tx_pripause", + "frames_tx_less_than_64b", + "frames_tx_pri_0", + "frames_tx_pri_1", + "frames_tx_pri_2", + "frames_tx_pri_3", + "frames_tx_pri_4", + "frames_tx_pri_5", + "frames_tx_pri_6", + "frames_tx_pri_7", + "tx_pripause_0_1us_count", + "tx_pripause_1_1us_count", + "tx_pripause_2_1us_count", + "tx_pripause_3_1us_count", + "tx_pripause_4_1us_count", + "tx_pripause_5_1us_count", + "tx_pripause_6_1us_count", + "tx_pripause_7_1us_count", + "frames_tx_truncated", + ] + + warning_fields: ClassVar[list[str]] = [ + "frames_rx_pause", + "frames_rx_pripause", + "frames_rx_pri_0", + "frames_rx_pri_1", + "frames_rx_pri_2", + "frames_rx_pri_3", + "frames_rx_pri_4", + "frames_rx_pri_5", + "frames_rx_pri_6", + "frames_rx_pri_7", + "rx_pripause_0_1us_count", + "rx_pripause_1_1us_count", + "rx_pripause_2_1us_count", + "rx_pripause_3_1us_count", + "rx_pripause_4_1us_count", + "rx_pripause_5_1us_count", + "rx_pripause_6_1us_count", + "rx_pripause_7_1us_count", + "rx_pause_1us_count", + ] + + +class Thor2EthtoolStatistics(BaseModel): + """ifname bnxt. Keeping only fields of interest. Skip queue-specific stats for now""" + + rx_total_l4_csum_errors: Optional[int] = None + rx_total_resets: Optional[int] = None + rx_total_buf_errors: Optional[int] = None + rx_total_oom_discards: Optional[int] = None + rx_total_netpoll_discards: Optional[int] = None + rx_total_ring_discards: Optional[int] = None + tx_total_resets: Optional[int] = None + tx_total_ring_discards: Optional[int] = None + total_missed_irqs: Optional[int] = None + ktls_tx_rec_err: Optional[int] = None + ktls_rx_resync_discard: Optional[int] = None + rx_fcs_err_frames: Optional[int] = None + rx_pause_frames: Optional[int] = None + rx_pfc_frames: Optional[int] = None + rx_align_err_frames: Optional[int] = None + rx_ovrsz_frames: Optional[int] = None + rx_jbr_frames: Optional[int] = None + rx_mtu_err_frames: Optional[int] = None + rx_pfc_ena_frames_pri0: Optional[int] = None + rx_pfc_ena_frames_pri1: Optional[int] = None + rx_pfc_ena_frames_pri2: Optional[int] = None + rx_pfc_ena_frames_pri3: Optional[int] = None + rx_pfc_ena_frames_pri4: Optional[int] = None + rx_pfc_ena_frames_pri5: Optional[int] = None + rx_pfc_ena_frames_pri6: Optional[int] = None + rx_pfc_ena_frames_pri7: Optional[int] = None + rx_undrsz_frames: Optional[int] = None + rx_runt_bytes: Optional[int] = None + rx_runt_frames: Optional[int] = None + rx_stat_discard: Optional[int] = None + rx_stat_err: Optional[int] = None + tx_pause_frames: Optional[int] = None + tx_pfc_frames: Optional[int] = None + tx_jabber_frames: Optional[int] = None + tx_fcs_err_frames: Optional[int] = None + tx_err: Optional[int] = None + tx_fifo_underruns: Optional[int] = None + tx_pfc_ena_frames_pri0: Optional[int] = None + tx_pfc_ena_frames_pri1: Optional[int] = None + tx_pfc_ena_frames_pri2: Optional[int] = None + tx_pfc_ena_frames_pri3: Optional[int] = None + tx_pfc_ena_frames_pri4: Optional[int] = None + tx_pfc_ena_frames_pri5: Optional[int] = None + tx_pfc_ena_frames_pri6: Optional[int] = None + tx_pfc_ena_frames_pri7: Optional[int] = None + tx_total_collisions: Optional[int] = None + tx_stat_discard: Optional[int] = None + tx_stat_error: Optional[int] = None + link_down_events: Optional[int] = None + continuous_pause_events: Optional[int] = None + resume_pause_events: Optional[int] = None + continuous_roce_pause_events: Optional[int] = None + resume_roce_pause_events: Optional[int] = None + pfc_pri0_rx_transitions: Optional[int] = None + pfc_pri1_rx_transitions: Optional[int] = None + pfc_pri2_rx_transitions: Optional[int] = None + pfc_pri3_rx_transitions: Optional[int] = None + pfc_pri4_rx_transitions: Optional[int] = None + pfc_pri5_rx_transitions: Optional[int] = None + pfc_pri6_rx_transitions: Optional[int] = None + pfc_pri7_rx_transitions: Optional[int] = None + rx_pcs_symbol_err: Optional[int] = None + rx_discard_bytes_cos0: Optional[int] = None + rx_discard_packets_cos0: Optional[int] = None + rx_discard_bytes_cos1: Optional[int] = None + rx_discard_packets_cos1: Optional[int] = None + rx_discard_bytes_cos2: Optional[int] = None + rx_discard_packets_cos2: Optional[int] = None + rx_discard_bytes_cos3: Optional[int] = None + rx_discard_packets_cos3: Optional[int] = None + rx_discard_bytes_cos4: Optional[int] = None + rx_discard_packets_cos4: Optional[int] = None + rx_discard_bytes_cos5: Optional[int] = None + rx_discard_packets_cos5: Optional[int] = None + rx_discard_bytes_cos6: Optional[int] = None + rx_discard_packets_cos6: Optional[int] = None + rx_discard_bytes_cos7: Optional[int] = None + rx_discard_packets_cos7: Optional[int] = None + rx_fec_uncorrectable_blocks: Optional[int] = None + rx_filter_miss: Optional[int] = None + pfc_pri0_tx_transitions: Optional[int] = None + pfc_pri1_tx_transitions: Optional[int] = None + pfc_pri2_tx_transitions: Optional[int] = None + pfc_pri3_tx_transitions: Optional[int] = None + pfc_pri4_tx_transitions: Optional[int] = None + pfc_pri5_tx_transitions: Optional[int] = None + pfc_pri6_tx_transitions: Optional[int] = None + pfc_pri7_tx_transitions: Optional[int] = None + hw_db_recov_dbs_dropped: Optional[int] = None + hw_db_recov_oo_drop_count: Optional[int] = None + lpbk_tx_discards: Optional[int] = None + lpbk_tx_errors: Optional[int] = None + lpbk_rx_discards: Optional[int] = None + lpbk_rx_errors: Optional[int] = None + + error_fields: ClassVar[list[str]] = [ + "rx_total_l4_csum_errors", + "rx_total_buf_errors", + "rx_total_oom_discards", + "rx_total_netpoll_discards", + "rx_total_ring_discards", + "tx_total_ring_discards", + "total_missed_irqs", + "ktls_tx_rec_err", + "ktls_rx_resync_discard", + "rx_fcs_err_frames", + "rx_align_err_frames", + "rx_ovrsz_frames", + "rx_jbr_frames", + "rx_mtu_err_frames", + "rx_undrsz_frames", + "rx_runt_bytes", + "rx_runt_frames", + "rx_stat_discard", + "rx_stat_err", + "tx_pause_frames", + "tx_pfc_frames", + "tx_jabber_frames", + "tx_fcs_err_frames", + "tx_err", + "tx_fifo_underruns", + "tx_pfc_ena_frames_pri0", + "tx_pfc_ena_frames_pri1", + "tx_pfc_ena_frames_pri2", + "tx_pfc_ena_frames_pri3", + "tx_pfc_ena_frames_pri4", + "tx_pfc_ena_frames_pri5", + "tx_pfc_ena_frames_pri6", + "tx_pfc_ena_frames_pri7", + "tx_total_collisions", + "tx_stat_discard", + "tx_stat_error", + "link_down_events", + "continuous_pause_events", + "resume_pause_events", + "continuous_roce_pause_events", + "resume_roce_pause_events", + "rx_pcs_symbol_err", + "rx_discard_bytes_cos0", + "rx_discard_packets_cos0", + "rx_discard_bytes_cos1", + "rx_discard_packets_cos1", + "rx_discard_bytes_cos2", + "rx_discard_packets_cos2", + "rx_discard_bytes_cos3", + "rx_discard_packets_cos3", + "rx_discard_bytes_cos4", + "rx_discard_packets_cos4", + "rx_discard_bytes_cos5", + "rx_discard_packets_cos5", + "rx_discard_bytes_cos6", + "rx_discard_packets_cos6", + "rx_discard_bytes_cos7", + "rx_discard_packets_cos7", + "rx_fec_uncorrectable_blocks", + "rx_filter_miss", + "pfc_pri0_tx_transitions", + "pfc_pri1_tx_transitions", + "pfc_pri2_tx_transitions", + "pfc_pri3_tx_transitions", + "pfc_pri4_tx_transitions", + "pfc_pri5_tx_transitions", + "pfc_pri6_tx_transitions", + "pfc_pri7_tx_transitions", + "hw_db_recov_dbs_dropped", + "hw_db_recov_oo_drop_count", + "lpbk_tx_discards", + "lpbk_tx_errors", + "lpbk_rx_discards", + "lpbk_rx_errors", + ] + + warning_fields: ClassVar[list[str]] = [ + "rx_total_resets", + "tx_total_resets", + "rx_pause_frames", + "rx_pfc_frames", + "rx_pfc_ena_frames_pri0", + "rx_pfc_ena_frames_pri1", + "rx_pfc_ena_frames_pri2", + "rx_pfc_ena_frames_pri3", + "rx_pfc_ena_frames_pri4", + "rx_pfc_ena_frames_pri5", + "rx_pfc_ena_frames_pri6", + "rx_pfc_ena_frames_pri7", + "pfc_pri0_rx_transitions", + "pfc_pri1_rx_transitions", + "pfc_pri2_rx_transitions", + "pfc_pri3_rx_transitions", + "pfc_pri4_rx_transitions", + "pfc_pri5_rx_transitions", + "pfc_pri6_rx_transitions", + "pfc_pri7_rx_transitions", + ] + + +class Cx7EthtoolStatistics(BaseModel): + """ifname mlx. Keeping only fields of interest. Skip queue-specific stats for now""" + + rx_xdp_drop: Optional[int] = None + rx_xdp_tx_err: Optional[int] = None + tx_queue_dropped: Optional[int] = None + tx_cqe_err: Optional[int] = None + tx_xdp_err: Optional[int] = None + rx_wqe_err: Optional[int] = None + rx_oversize_pkts_sw_drop: Optional[int] = None + rx_buff_alloc_err: Optional[int] = None + rx_arfs_err: Optional[int] = None + rx_tls_err: Optional[int] = None + rx_xsk_xdp_drop: Optional[int] = None + rx_xsk_wqe_err: Optional[int] = None + rx_xsk_oversize_pkts_sw_drop: Optional[int] = None + rx_xsk_buff_alloc_err: Optional[int] = None + tx_xsk_err: Optional[int] = None + rx_out_of_buffer: Optional[int] = None + rx_if_down_packets: Optional[int] = None + rx_steer_missed_packets: Optional[int] = None + rx_oversize_pkts_buffer: Optional[int] = None + rx_crc_errors_phy: Optional[int] = None + rx_in_range_len_errors_phy: Optional[int] = None + rx_out_of_range_len_phy: Optional[int] = None + rx_oversize_pkts_phy: Optional[int] = None + rx_symbol_err_phy: Optional[int] = None + rx_unsupported_op_phy: Optional[int] = None + rx_pause_ctrl_phy: Optional[int] = None + tx_pause_ctrl_phy: Optional[int] = None + rx_discards_phy: Optional[int] = None + tx_discards_phy: Optional[int] = None + tx_errors_phy: Optional[int] = None + rx_undersize_pkts_phy: Optional[int] = None + rx_fragments_phy: Optional[int] = None + rx_jabbers_phy: Optional[int] = None + link_down_events_phy: Optional[int] = None + rx_pcs_symbol_err_phy: Optional[int] = None + rx_pci_signal_integrity: Optional[int] = None + tx_pci_signal_integrity: Optional[int] = None + outbound_pci_stalled_rd: Optional[int] = None + outbound_pci_stalled_wr: Optional[int] = None + outbound_pci_stalled_rd_events: Optional[int] = None + outbound_pci_stalled_wr_events: Optional[int] = None + rx_prio0_discards: Optional[int] = None + rx_prio1_discards: Optional[int] = None + rx_prio2_discards: Optional[int] = None + rx_prio3_discards: Optional[int] = None + rx_prio4_discards: Optional[int] = None + rx_prio5_discards: Optional[int] = None + rx_prio6_discards: Optional[int] = None + rx_prio7_discards: Optional[int] = None + rx_global_pause: Optional[int] = None + rx_prio0_pause: Optional[int] = None + rx_prio1_pause: Optional[int] = None + rx_prio2_pause: Optional[int] = None + rx_prio3_pause: Optional[int] = None + rx_prio4_pause: Optional[int] = None + rx_prio5_pause: Optional[int] = None + rx_prio6_pause: Optional[int] = None + rx_prio7_pause: Optional[int] = None + rx_global_pause_duration: Optional[int] = None + rx_prio0_pause_duration: Optional[int] = None + rx_prio1_pause_duration: Optional[int] = None + rx_prio2_pause_duration: Optional[int] = None + rx_prio3_pause_duration: Optional[int] = None + rx_prio4_pause_duration: Optional[int] = None + rx_prio5_pause_duration: Optional[int] = None + rx_prio6_pause_duration: Optional[int] = None + rx_prio7_pause_duration: Optional[int] = None + tx_global_pause: Optional[int] = None + tx_prio0_pause: Optional[int] = None + tx_prio1_pause: Optional[int] = None + tx_prio2_pause: Optional[int] = None + tx_prio3_pause: Optional[int] = None + tx_prio4_pause: Optional[int] = None + tx_prio5_pause: Optional[int] = None + tx_prio6_pause: Optional[int] = None + tx_prio7_pause: Optional[int] = None + tx_global_pause_duration: Optional[int] = None + tx_prio0_pause_duration: Optional[int] = None + tx_prio1_pause_duration: Optional[int] = None + tx_prio2_pause_duration: Optional[int] = None + tx_prio3_pause_duration: Optional[int] = None + tx_prio4_pause_duration: Optional[int] = None + tx_prio5_pause_duration: Optional[int] = None + tx_prio6_pause_duration: Optional[int] = None + tx_prio7_pause_duration: Optional[int] = None + rx_global_pause_transition: Optional[int] = None + rx_prio0_pause_transition: Optional[int] = None + rx_prio1_pause_transition: Optional[int] = None + rx_prio2_pause_transition: Optional[int] = None + rx_prio3_pause_transition: Optional[int] = None + rx_prio4_pause_transition: Optional[int] = None + rx_prio5_pause_transition: Optional[int] = None + rx_prio6_pause_transition: Optional[int] = None + rx_prio7_pause_transition: Optional[int] = None + tx_pause_storm_warning_events: Optional[int] = None + tx_pause_storm_error_events: Optional[int] = None + module_unplug: Optional[int] = None + module_bus_stuck: Optional[int] = None + module_high_temp: Optional[int] = None + module_bad_shorted: Optional[int] = None + ipsec_rx_drop_pkts: Optional[int] = None + ipsec_rx_drop_bytes: Optional[int] = None + ipsec_rx_drop_mismatch_sa_sel: Optional[int] = None + ipsec_tx_drop_pkts: Optional[int] = None + ipsec_tx_drop_bytes: Optional[int] = None + ipsec_rx_drop_sp_alloc: Optional[int] = None + ipsec_rx_drop_sadb_miss: Optional[int] = None + ipsec_rx_drop_syndrome: Optional[int] = None + ipsec_tx_drop_bundle: Optional[int] = None + ipsec_tx_drop_no_state: Optional[int] = None + ipsec_tx_drop_not_ip: Optional[int] = None + ipsec_tx_drop_trailer: Optional[int] = None + rx_prio0_buf_discard: Optional[int] = None + rx_prio0_cong_discard: Optional[int] = None + rx_prio1_buf_discard: Optional[int] = None + rx_prio1_cong_discard: Optional[int] = None + rx_prio2_buf_discard: Optional[int] = None + rx_prio2_cong_discard: Optional[int] = None + rx_prio3_buf_discard: Optional[int] = None + rx_prio3_cong_discard: Optional[int] = None + rx_prio4_buf_discard: Optional[int] = None + rx_prio4_cong_discard: Optional[int] = None + rx_prio5_buf_discard: Optional[int] = None + rx_prio5_cong_discard: Optional[int] = None + rx_prio6_buf_discard: Optional[int] = None + rx_prio6_cong_discard: Optional[int] = None + rx_prio7_buf_discard: Optional[int] = None + rx_prio7_cong_discard: Optional[int] = None + + error_fields: ClassVar[list[str]] = [ + "rx_xdp_drop", + "rx_xdp_tx_err", + "tx_queue_dropped", + "tx_cqe_err", + "tx_xdp_err", + "rx_wqe_err", + "rx_oversize_pkts_sw_drop", + "rx_buff_alloc_err", + "rx_arfs_err", + "rx_tls_err", + "rx_xsk_xdp_drop", + "rx_xsk_wqe_err", + "rx_xsk_oversize_pkts_sw_drop", + "rx_xsk_buff_alloc_err", + "tx_xsk_err", + "rx_out_of_buffer", + "rx_if_down_packets", + "rx_steer_missed_packets", + "rx_oversize_pkts_buffer", + "rx_crc_errors_phy", + "rx_in_range_len_errors_phy", + "rx_out_of_range_len_phy", + "rx_oversize_pkts_phy", + "rx_symbol_err_phy", + "rx_unsupported_op_phy", + "tx_pause_ctrl_phy", + "rx_discards_phy", + "tx_discards_phy", + "tx_errors_phy", + "rx_undersize_pkts_phy", + "rx_fragments_phy", + "rx_jabbers_phy", + "link_down_events_phy", + "rx_pcs_symbol_err_phy", + "rx_pci_signal_integrity", + "tx_pci_signal_integrity", + "outbound_pci_stalled_rd", + "outbound_pci_stalled_wr", + "outbound_pci_stalled_rd_events", + "outbound_pci_stalled_wr_events", + "rx_prio0_discards", + "rx_prio1_discards", + "rx_prio2_discards", + "rx_prio3_discards", + "rx_prio4_discards", + "rx_prio5_discards", + "rx_prio6_discards", + "rx_prio7_discards", + "tx_global_pause", + "tx_prio0_pause", + "tx_prio1_pause", + "tx_prio2_pause", + "tx_prio3_pause", + "tx_prio4_pause", + "tx_prio5_pause", + "tx_prio6_pause", + "tx_prio7_pause", + "tx_global_pause_duration", + "tx_prio0_pause_duration", + "tx_prio1_pause_duration", + "tx_prio2_pause_duration", + "tx_prio3_pause_duration", + "tx_prio4_pause_duration", + "tx_prio5_pause_duration", + "tx_prio6_pause_duration", + "tx_prio7_pause_duration", + "tx_pause_storm_warning_events", + "tx_pause_storm_error_events", + "module_unplug", + "module_bus_stuck", + "module_high_temp", + "module_bad_shorted", + "ipsec_rx_drop_pkts", + "ipsec_rx_drop_bytes", + "ipsec_rx_drop_mismatch_sa_sel", + "ipsec_tx_drop_pkts", + "ipsec_tx_drop_bytes", + "ipsec_rx_drop_sp_alloc", + "ipsec_rx_drop_sadb_miss", + "ipsec_rx_drop_syndrome", + "ipsec_tx_drop_bundle", + "ipsec_tx_drop_no_state", + "ipsec_tx_drop_not_ip", + "ipsec_tx_drop_trailer", + "rx_prio0_buf_discard", + "rx_prio0_cong_discard", + "rx_prio1_buf_discard", + "rx_prio1_cong_discard", + "rx_prio2_buf_discard", + "rx_prio2_cong_discard", + "rx_prio3_buf_discard", + "rx_prio3_cong_discard", + "rx_prio4_buf_discard", + "rx_prio4_cong_discard", + "rx_prio5_buf_discard", + "rx_prio5_cong_discard", + "rx_prio6_buf_discard", + "rx_prio6_cong_discard", + "rx_prio7_buf_discard", + "rx_prio7_cong_discard", + ] + + warning_fields: ClassVar[list[str]] = [ + "rx_pause_ctrl_phy", + "rx_global_pause", + "rx_prio0_pause", + "rx_prio1_pause", + "rx_prio2_pause", + "rx_prio3_pause", + "rx_prio4_pause", + "rx_prio5_pause", + "rx_prio6_pause", + "rx_prio7_pause", + "rx_global_pause_transition", + "rx_prio0_pause_transition", + "rx_prio1_pause_transition", + "rx_prio2_pause_transition", + "rx_prio3_pause_transition", + "rx_prio4_pause_transition", + "rx_prio5_pause_transition", + "rx_prio6_pause_transition", + "rx_prio7_pause_transition", + "rx_global_pause_duration", + "rx_prio0_pause_duration", + "rx_prio1_pause_duration", + "rx_prio2_pause_duration", + "rx_prio3_pause_duration", + "rx_prio4_pause_duration", + "rx_prio5_pause_duration", + "rx_prio6_pause_duration", + "rx_prio7_pause_duration", + ] + + +VendorEthtoolStatisticsModel = ( + PollaraEthtoolStatistics | Thor2EthtoolStatistics | Cx7EthtoolStatistics +) + +VendorEthtoolStatisticsCls = Union[ + type[PollaraEthtoolStatistics], + type[Thor2EthtoolStatistics], + type[Cx7EthtoolStatistics], +] + + +# Map ifname prefixes to vendor-specific statistic models +# If netdev is ens, use Cx7 +# If netdev is benic, check if it starts with ionic or bnxt to determine if it's Pollara or Thor2 +VENDOR_PREFIX_MAP: dict[str, VendorEthtoolStatisticsCls] = { + "ionic": PollaraEthtoolStatistics, + "bnxt": Thor2EthtoolStatistics, + "mlx": Cx7EthtoolStatistics, +} + + +class EthtoolStatistics(BaseModel): + """Per-netdev ethtool -S row with optional vendor-parsed counters.""" + + netdev: Optional[str] = None + rdma_ifname: Optional[str] = Field( + default=None, + description="RDMA interface name from 'rdma link -j' used for vendor prefix selection", + ) + vendor_statistics: Optional[VendorEthtoolStatisticsModel] = None + + @model_validator(mode="after") + def validate_atleast_one_field(self) -> Self: + if not self.model_fields_set: + raise ValueError("At least one field must be set in EthtoolStatistics") + return self diff --git a/nodescraper/plugins/inband/network/network_analyzer.py b/nodescraper/plugins/inband/network/network_analyzer.py index dbd39fc8..27280c37 100644 --- a/nodescraper/plugins/inband/network/network_analyzer.py +++ b/nodescraper/plugins/inband/network/network_analyzer.py @@ -61,17 +61,16 @@ class NetworkAnalyzer(RegexAnalyzer[NetworkDataModel, NetworkAnalyzerArgs]): def analyze_data( self, data: NetworkDataModel, args: Optional[NetworkAnalyzerArgs] = None ) -> TaskResult: - """Analyze network statistics for non-zero error counters. - Currently only checks ethtool -S statistics. + """Analyze ethtool -S statistics: regex-based (per interface) and vendor-based (RDMA-scoped). Args: - data: Network data model with ethtool_info containing interface statistics. + data: Network data model with ethtool_info and/or rdma_ethtool_statistics. args: Optional analyzer arguments with custom error regex support. Returns: - TaskResult with status OK if no errors, ERROR if any error counter > 0. + TaskResult with OK, WARNING (no data or vendor warning counters only), or ERROR. """ - if not data.ethtool_info: + if not data.ethtool_info and not data.rdma_ethtool_statistics: self.result.message = "No network devices found" self.result.status = ExecutionStatus.WARNING return self.result @@ -81,26 +80,23 @@ def analyze_data( final_error_regex = self._convert_and_extend_error_regex(args.error_regex, self.ERROR_REGEX) - error_state = False + regex_error = False for interface_name, ethtool_info in data.ethtool_info.items(): - errors_on_interface = [] # (error_field, value) - # Loop through all statistics in the ethtool statistics dict + errors_on_interface: list[tuple[str, int]] = [] for stat_name, stat_value in ethtool_info.statistics.items(): - # Check if this statistic matches any error field pattern for error_regex_obj in final_error_regex: if error_regex_obj.regex.match(stat_name): - # Try to convert string value to int try: value = int(stat_value) except (ValueError, TypeError): - break # Skip non-numeric values + break if value > 0: errors_on_interface.append((stat_name, value)) - break # Stop checking patterns once we find a match + break if errors_on_interface: - error_state = True + regex_error = True error_names = [e[0] for e in errors_on_interface] errors_data = {field: value for field, value in errors_on_interface} self._log_event( @@ -114,9 +110,49 @@ def analyze_data( console_log=True, ) - if error_state: + vendor_error = False + vendor_warning = False + for stat in data.rdma_ethtool_statistics: + if stat.vendor_statistics is None: + continue + + vs = stat.vendor_statistics + error_fields = vs.error_fields + warning_fields = vs.warning_fields + + for field_name in error_fields + warning_fields: + error_value = getattr(vs, field_name, None) + if error_value is not None and error_value > 0: + is_warning_tier = field_name in warning_fields + priority = EventPriority.WARNING if is_warning_tier else EventPriority.ERROR + if is_warning_tier: + vendor_warning = True + else: + vendor_error = True + desc = ( + f"Ethtool warning detected: {field_name}" + if is_warning_tier + else f"Ethtool error detected: {field_name}" + ) + self._log_event( + category=EventCategory.NETWORK, + description=desc, + data={ + "netdev": stat.netdev, + "rdma_ifname": stat.rdma_ifname, + "error_field": field_name, + "error_count": error_value, + }, + priority=priority, + console_log=True, + ) + + if regex_error or vendor_error: self.result.message = "Network errors detected in statistics" self.result.status = ExecutionStatus.ERROR + elif vendor_warning: + self.result.message = "Network vendor ethtool warning counters non-zero" + self.result.status = ExecutionStatus.WARNING else: self.result.message = "No network errors detected in statistics" self.result.status = ExecutionStatus.OK diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index d530bd98..a962ddf7 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -23,15 +23,24 @@ # SOFTWARE. # ############################################################################### +import json import re from typing import Dict, List, Optional, Tuple +from pydantic import ValidationError + from nodescraper.base import InBandDataCollector from nodescraper.connection.inband import TextFileArtifact from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult +from nodescraper.utils import get_exception_traceback from .collector_args import NetworkCollectorArgs +from .ethtool_vendor import ( + VENDOR_PREFIX_MAP, + EthtoolStatistics, + VendorEthtoolStatisticsModel, +) from .networkdata import ( EthtoolInfo, IpAddress, @@ -53,6 +62,7 @@ class NetworkCollector(InBandDataCollector[NetworkDataModel, NetworkCollectorArg CMD_NEIGHBOR = "ip neighbor show" CMD_ETHTOOL_TEMPLATE = "ethtool {interface}" CMD_ETHTOOL_S_TEMPLATE = "ethtool -S {interface}" + CMD_RDMA_LINK_JSON = "rdma link -j" CMD_PING = "ping" CMD_WGET = "wget" CMD_CURL = "curl" @@ -519,6 +529,151 @@ def _collect_ethtool_info(self, interfaces: List[NetworkInterface]) -> Dict[str, return ethtool_data + def _collect_rdma_link_json(self) -> Optional[list[dict]]: + """Parse JSON from `rdma link -j`. Returns None on failure, [] when no links.""" + res = self._run_sut_cmd(self.CMD_RDMA_LINK_JSON) + if res.exit_code != 0: + self._log_event( + category=EventCategory.NETWORK, + description="rdma link -j failed (RDMA-scoped ethtool collection skipped)", + data={ + "command": self.CMD_RDMA_LINK_JSON, + "exit_code": res.exit_code, + "stderr": res.stderr, + }, + priority=EventPriority.WARNING, + ) + return None + if not res.stdout.strip(): + return [] + try: + parsed = json.loads(res.stdout) + except json.JSONDecodeError as e: + self._log_event( + category=EventCategory.NETWORK, + description="Failed to parse rdma link -j JSON", + data={"exception": get_exception_traceback(e)}, + priority=EventPriority.WARNING, + ) + return None + if not isinstance(parsed, list): + self._log_event( + category=EventCategory.NETWORK, + description="Unexpected rdma link -j JSON type", + data={"data_type": type(parsed).__name__}, + priority=EventPriority.WARNING, + ) + return None + return parsed + + def _collect_rdma_scoped_ethtool_statistic( + self, netdev: str, ifname: str + ) -> Optional[EthtoolStatistics]: + """Run `ethtool -S` for netdev and attach vendor-parsed stats (prefix from RDMA ifname).""" + cmd_s = f"ethtool -S {netdev}" + res = self._run_sut_cmd(cmd_s, sudo=True) + if res.exit_code != 0: + self._log_event( + category=EventCategory.NETWORK, + description=f"Error executing ethtool -S for device {netdev}", + data={ + "command": cmd_s, + "exit_code": res.exit_code, + "stderr": res.stderr, + }, + priority=EventPriority.ERROR, + console_log=True, + ) + return None + + if res.stdout: + self.result.artifacts.append( + TextFileArtifact( + filename=f"rdma-ethtool-{netdev}.log", + contents=res.stdout, + ) + ) + stats_dict = self._parse_ethtool_statistics(res.stdout, netdev) + + vendor_stats: VendorEthtoolStatisticsModel | None = None + for prefix, vendor_cls in VENDOR_PREFIX_MAP.items(): + if ifname.startswith(prefix): + vendor_fields = set(vendor_cls.model_fields.keys()) + stat_fields = set(stats_dict.keys()) - {"netdev"} + + missing_fields = vendor_fields - stat_fields + if missing_fields: + sorted_missing = sorted(missing_fields) + self._log_event( + category=EventCategory.NETWORK, + description=f"Missing fields in ethtool statistic for {netdev}", + data={ + "netdev": netdev, + "ifname": ifname, + "missing_fields_count": len(sorted_missing), + "missing_fields": sorted_missing[:50], + }, + priority=EventPriority.WARNING, + ) + + filtered_stats = {k: v for k, v in stats_dict.items() if k in vendor_fields} + try: + vendor_stats = vendor_cls.model_validate(filtered_stats) + except ValidationError as ve: + self._log_event( + category=EventCategory.NETWORK, + description=f"Failed to build vendor ethtool model for {netdev}", + data={"exception": get_exception_traceback(ve)}, + priority=EventPriority.WARNING, + ) + break + + return EthtoolStatistics( + netdev=netdev, + rdma_ifname=ifname or None, + vendor_statistics=vendor_stats, + ) + + def _collect_rdma_scoped_ethtool(self) -> tuple[List[str], List[EthtoolStatistics]]: + """Collect ethtool -S for netdevs listed on RDMA links (error-scraper EthtoolCollector parity).""" + netdev_list: List[str] = [] + statistics_list: List[EthtoolStatistics] = [] + + link_data = self._collect_rdma_link_json() + if link_data is None: + return netdev_list, statistics_list + + for link in link_data: + if not isinstance(link, dict): + self._log_event( + category=EventCategory.NETWORK, + description="Invalid data type for RDMA link entry", + data={"data_type": type(link).__name__}, + priority=EventPriority.WARNING, + ) + continue + + netdev = link.get("netdev") or "" + ifname = link.get("ifname") or "" + + if netdev: + netdev_list.append(netdev) + stat = self._collect_rdma_scoped_ethtool_statistic(netdev, ifname) + if stat is not None: + statistics_list.append(stat) + + if netdev_list: + self._log_event( + category=EventCategory.NETWORK, + description=( + f"Collected RDMA-scoped ethtool -S for {len(statistics_list)}/" + f"{len(netdev_list)} netdev(s) from rdma link" + ), + priority=EventPriority.INFO, + ) + + return netdev_list, statistics_list + def _collect_lldp_info(self) -> None: """Collect LLDP information using lldpcli and lldpctl commands.""" # Run lldpcli show neighbor @@ -618,6 +773,8 @@ def collect_data( neighbors = [] ethtool_data = {} network_accessible: Optional[bool] = None + rdma_ethtool_netdevs: List[str] = [] + rdma_ethtool_statistics: List[EthtoolStatistics] = [] # Check network connectivity if URL is provided if args and args.url: @@ -662,6 +819,9 @@ def collect_data( priority=EventPriority.INFO, ) + if self.system_info.os_family == OSFamily.LINUX: + rdma_ethtool_netdevs, rdma_ethtool_statistics = self._collect_rdma_scoped_ethtool() + # Collect routing table res_route = self._run_sut_cmd(self.CMD_ROUTE) if res_route.exit_code == 0: @@ -724,6 +884,8 @@ def collect_data( rules=rules, neighbors=neighbors, ethtool_info=ethtool_data, + rdma_ethtool_netdevs=rdma_ethtool_netdevs, + rdma_ethtool_statistics=rdma_ethtool_statistics, accessible=network_accessible, ) self.result.status = ExecutionStatus.OK diff --git a/nodescraper/plugins/inband/network/networkdata.py b/nodescraper/plugins/inband/network/networkdata.py index 20caaeca..c90a1fc1 100644 --- a/nodescraper/plugins/inband/network/networkdata.py +++ b/nodescraper/plugins/inband/network/networkdata.py @@ -29,6 +29,8 @@ from nodescraper.models import DataModel +from .ethtool_vendor import EthtoolStatistics + class IpAddress(BaseModel): """Individual IP address on an interface""" @@ -117,4 +119,7 @@ class NetworkDataModel(DataModel): ethtool_info: Dict[str, EthtoolInfo] = Field( default_factory=dict ) # Interface name -> EthtoolInfo mapping + # RDMA-scoped ethtool -S: netdevs from `rdma link -j` with vendor-parsed counters + rdma_ethtool_netdevs: List[str] = Field(default_factory=list) + rdma_ethtool_statistics: List[EthtoolStatistics] = Field(default_factory=list) accessible: Optional[bool] = None # Network accessibility check via ping diff --git a/test/unit/plugin/test_network_analyzer.py b/test/unit/plugin/test_network_analyzer.py index e886b765..6b7aeff3 100644 --- a/test/unit/plugin/test_network_analyzer.py +++ b/test/unit/plugin/test_network_analyzer.py @@ -27,6 +27,10 @@ from nodescraper.enums import EventPriority, ExecutionStatus from nodescraper.plugins.inband.network.analyzer_args import NetworkAnalyzerArgs +from nodescraper.plugins.inband.network.ethtool_vendor import ( + EthtoolStatistics, + Thor2EthtoolStatistics, +) from nodescraper.plugins.inband.network.network_analyzer import NetworkAnalyzer from nodescraper.plugins.inband.network.networkdata import ( EthtoolInfo, @@ -158,13 +162,55 @@ def test_multiple_interfaces_with_errors(network_analyzer): def test_empty_ethtool_info(network_analyzer): - """Test with empty ethtool_info: WARNING and message logged.""" + """Test with empty ethtool_info and no RDMA ethtool: WARNING and message logged.""" model = NetworkDataModel(ethtool_info={}) result = network_analyzer.analyze_data(model) assert result.status == ExecutionStatus.WARNING assert result.message == "No network devices found" +def test_rdma_ethtool_vendor_error_only(network_analyzer): + """RDMA-scoped vendor ethtool: error-tier counter raises ERROR.""" + stat = EthtoolStatistics( + netdev="eth0", + rdma_ifname="bnxt0", + vendor_statistics=Thor2EthtoolStatistics(tx_pfc_frames=4), + ) + model = NetworkDataModel(ethtool_info={}, rdma_ethtool_statistics=[stat]) + result = network_analyzer.analyze_data(model) + assert result.status == ExecutionStatus.ERROR + assert "Network errors detected" in result.message + assert len(result.events) == 1 + assert result.events[0].data["error_field"] == "tx_pfc_frames" + assert result.events[0].data["error_count"] == 4 + assert result.events[0].priority == EventPriority.ERROR + + +def test_rdma_ethtool_vendor_warning_only(network_analyzer): + """RDMA-scoped vendor ethtool: only warning-tier counters -> WARNING status.""" + stat = EthtoolStatistics( + netdev="eth0", + rdma_ifname="bnxt0", + vendor_statistics=Thor2EthtoolStatistics(rx_pause_frames=2), + ) + model = NetworkDataModel(ethtool_info={}, rdma_ethtool_statistics=[stat]) + result = network_analyzer.analyze_data(model) + assert result.status == ExecutionStatus.WARNING + assert "warning counters" in result.message + assert len(result.events) == 1 + assert result.events[0].data["error_field"] == "rx_pause_frames" + assert result.events[0].priority == EventPriority.WARNING + + +def test_rdma_ethtool_no_vendor_model_ok(network_analyzer): + """RDMA ethtool row without parsed vendor statistics is ignored by vendor path.""" + stat = EthtoolStatistics(netdev="eth0", rdma_ifname="unknown0", vendor_statistics=None) + model = NetworkDataModel(ethtool_info={}, rdma_ethtool_statistics=[stat]) + result = network_analyzer.analyze_data(model) + assert result.status == ExecutionStatus.OK + assert len(result.events) == 0 + + def test_regex_patterns_priority_numbers(network_analyzer): """Test that regex patterns match various priority numbers (0-7 and beyond).""" ethtool = EthtoolInfo( diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index 6382adeb..a7b1faae 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -648,3 +648,44 @@ def run_sut_cmd_side_effect(cmd, **kwargs): result, accessible = collector.check_network_accessibility() assert result.status == ExecutionStatus.ERRORS_DETECTED assert accessible is False + + +def test_collect_data_includes_rdma_ethtool(collector, conn_mock): + """RDMA-scoped ethtool -S is stored on NetworkDataModel when rdma link succeeds.""" + import json + + collector.system_info.os_family = OSFamily.LINUX + + rdma_link = [{"netdev": "eth0", "ifname": "bnxt0"}] + ethtool_s_bnxt = "NIC statistics:\n tx_pfc_frames: 0\n rx_pause_frames: 0\n" + + def run_sut_cmd_side_effect(cmd, **kwargs): + if "addr show" in cmd: + return MagicMock(exit_code=0, stdout=IP_ADDR_OUTPUT, command=cmd) + elif "route show" in cmd: + return MagicMock(exit_code=0, stdout=IP_ROUTE_OUTPUT, command=cmd) + elif "rule show" in cmd: + return MagicMock(exit_code=0, stdout=IP_RULE_OUTPUT, command=cmd) + elif "neighbor show" in cmd: + return MagicMock(exit_code=0, stdout=IP_NEIGHBOR_OUTPUT, command=cmd) + elif "rdma link -j" in cmd: + return MagicMock(exit_code=0, stdout=json.dumps(rdma_link), command=cmd) + elif "ethtool -S" in cmd and "eth0" in cmd: + return MagicMock(exit_code=0, stdout=ethtool_s_bnxt, command=cmd) + elif "ethtool" in cmd: + return MagicMock(exit_code=1, stdout="", command=cmd) + elif "lldpcli" in cmd or "lldpctl" in cmd: + return MagicMock(exit_code=1, stdout="", command=cmd) + return MagicMock(exit_code=1, stdout="", command=cmd) + + collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert data is not None + assert "eth0" in data.rdma_ethtool_netdevs + assert len(data.rdma_ethtool_statistics) == 1 + assert data.rdma_ethtool_statistics[0].netdev == "eth0" + assert data.rdma_ethtool_statistics[0].rdma_ifname == "bnxt0" + assert data.rdma_ethtool_statistics[0].vendor_statistics is not None From b7c291368034282fd3f2de4232d320df678da3a7 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Fri, 15 May 2026 11:08:55 -0500 Subject: [PATCH 2/3] utest fix --- nodescraper/plugins/inband/network/ethtool_vendor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nodescraper/plugins/inband/network/ethtool_vendor.py b/nodescraper/plugins/inband/network/ethtool_vendor.py index 04dbf4a4..47e91be9 100644 --- a/nodescraper/plugins/inband/network/ethtool_vendor.py +++ b/nodescraper/plugins/inband/network/ethtool_vendor.py @@ -622,9 +622,11 @@ class Cx7EthtoolStatistics(BaseModel): ] -VendorEthtoolStatisticsModel = ( - PollaraEthtoolStatistics | Thor2EthtoolStatistics | Cx7EthtoolStatistics -) +VendorEthtoolStatisticsModel = Union[ + PollaraEthtoolStatistics, + Thor2EthtoolStatistics, + Cx7EthtoolStatistics, +] VendorEthtoolStatisticsCls = Union[ type[PollaraEthtoolStatistics], From 825b2e0fe26b08736fb571ebdc9a9cc3d53362e9 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Fri, 15 May 2026 12:19:03 -0500 Subject: [PATCH 3/3] fix --- nodescraper/plugins/inband/network/network_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index a962ddf7..7e5e4a39 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -595,7 +595,7 @@ def _collect_rdma_scoped_ethtool_statistic( ) stats_dict = self._parse_ethtool_statistics(res.stdout, netdev) - vendor_stats: VendorEthtoolStatisticsModel | None = None + vendor_stats: Optional[VendorEthtoolStatisticsModel] = None for prefix, vendor_cls in VENDOR_PREFIX_MAP.items(): if ifname.startswith(prefix): vendor_fields = set(vendor_cls.model_fields.keys())