Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions nodescraper/plugins/inband/nvme/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
###############################################################################
#
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
###############################################################################
from .nvme_plugin import NvmePlugin

__all__ = ["NvmePlugin"]
153 changes: 153 additions & 0 deletions nodescraper/plugins/inband/nvme/nvme_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
###############################################################################
#
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
###############################################################################
import os
import re

from pydantic import ValidationError

from nodescraper.base import InBandDataCollector
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
from nodescraper.models import TaskResult

from .nvmedata import NvmeDataModel


class NvmeCollector(InBandDataCollector[NvmeDataModel, None]):
"""Collect NVMe details from the system."""

DATA_MODEL = NvmeDataModel

def collect_data(
self,
args=None,
) -> tuple[TaskResult, NvmeDataModel | None]:
"""Collect detailed NVMe information from all NVMe devices.

Returns:
tuple[TaskResult, NvmeDataModel | None]: Task result and data model with NVMe command outputs.
"""
if self.system_info.os_family == OSFamily.WINDOWS:
self._log_event(
category=EventCategory.SW_DRIVER,
description="NVMe collection not supported on Windows",
priority=EventPriority.WARNING,
)
self.result.message = "NVMe data collection skipped on Windows"
self.result.status = ExecutionStatus.NOT_RAN
return self.result, None

nvme_devices = self._get_nvme_devices()
if not nvme_devices:
self._log_event(
category=EventCategory.SW_DRIVER,
description="No NVMe devices found",
priority=EventPriority.ERROR,
)
self.result.message = "No NVMe devices found"
self.result.status = ExecutionStatus.ERROR
return self.result, None

all_device_data = {}

for dev in nvme_devices:
device_data = {}
commands = {
"smart_log": f"nvme smart-log {dev}",
"error_log": f"nvme error-log {dev} --log-entries=256",
"id_ctrl": f"nvme id-ctrl {dev}",
"id_ns": f"nvme id-ns {dev}n1",
"fw_log": f"nvme fw-log {dev}",
"self_test_log": f"nvme self-test-log {dev}",
"get_log": f"nvme get-log {dev} --log-id=6 --log-len=512",
}

for key, cmd in commands.items():
res = self._run_sut_cmd(cmd, sudo=True)
if res.exit_code == 0:
device_data[key] = res.stdout
else:
self._log_event(
category=EventCategory.SW_DRIVER,
description=f"Failed to execute NVMe command: '{cmd}'",
data={"command": cmd, "exit_code": res.exit_code},
priority=EventPriority.WARNING,
console_log=True,
)

if device_data:
all_device_data[os.path.basename(dev)] = device_data

if all_device_data:
try:
nvme_data = NvmeDataModel(devices=all_device_data)
except ValidationError as exp:
self._log_event(
category=EventCategory.SW_DRIVER,
description="Validation error while building NvmeDataModel",
data={"errors": exp.errors(include_url=False)},
priority=EventPriority.ERROR,
)
self.result.message = "NVMe data invalid format"
self.result.status = ExecutionStatus.ERROR
return self.result, None

self._log_event(
category=EventCategory.SW_DRIVER,
description="Collected NVMe data",
data=nvme_data.model_dump(),
priority=EventPriority.INFO,
)
self.result.message = "NVMe data successfully collected"
self.result.status = ExecutionStatus.OK
return self.result, nvme_data
else:
self._log_event(
category=EventCategory.SW_DRIVER,
description="Failed to collect any NVMe data",
priority=EventPriority.ERROR,
)
self.result.message = "No NVMe data collected"
self.result.status = ExecutionStatus.ERROR
return self.result, None

def _get_nvme_devices(self) -> list[str]:
nvme_devs = []

res = self._run_sut_cmd("ls /dev", sudo=False)
if res.exit_code != 0:
self._log_event(
category=EventCategory.SW_DRIVER,
description="Failed to list /dev directory",
data={"exit_code": res.exit_code, "stderr": res.stderr},
priority=EventPriority.ERROR,
)
return []

for entry in res.stdout.strip().splitlines():
if re.fullmatch(r"nvme\d+$", entry):
nvme_devs.append(f"/dev/{entry}")

return nvme_devs
37 changes: 37 additions & 0 deletions nodescraper/plugins/inband/nvme/nvme_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
###############################################################################
#
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
###############################################################################
from nodescraper.base import InBandDataPlugin

from .nvme_collector import NvmeCollector
from .nvmedata import NvmeDataModel


class NvmePlugin(InBandDataPlugin[NvmeDataModel, None, None]):
"""Plugin for collection and analysis of nvme data"""

DATA_MODEL = NvmeDataModel

COLLECTOR = NvmeCollector
43 changes: 43 additions & 0 deletions nodescraper/plugins/inband/nvme/nvmedata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
###############################################################################
#
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
###############################################################################
from pydantic import BaseModel

from nodescraper.models import DataModel


class DeviceNvmeData(BaseModel):
smart_log: str | None = None
error_log: str | None = None
id_ctrl: str | None = None
id_ns: str | None = None
fw_log: str | None = None
self_test_log: str | None = None
get_log: str | None = None
telemetry_log: str | None = None


class NvmeDataModel(DataModel):
devices: dict[str, DeviceNvmeData]
115 changes: 115 additions & 0 deletions test/unit/plugin/test_nvme_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
###############################################################################
#
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
###############################################################################
from unittest.mock import MagicMock

import pytest

from nodescraper.enums import EventPriority, ExecutionStatus, OSFamily
from nodescraper.enums.systeminteraction import SystemInteractionLevel
from nodescraper.models import TaskResult
from nodescraper.plugins.inband.nvme.nvme_collector import NvmeCollector
from nodescraper.plugins.inband.nvme.nvmedata import NvmeDataModel


@pytest.fixture
def collector(system_info, conn_mock):
c = NvmeCollector(
system_info=system_info,
system_interaction_level=SystemInteractionLevel.PASSIVE,
connection=conn_mock,
)
c._log_event = MagicMock()
c._run_sut_cmd = MagicMock()
c.result = TaskResult()
return c


def test_skips_on_windows(collector):
collector.system_info = MagicMock(os_family=OSFamily.WINDOWS)
result, data = collector.collect_data()

assert result.status == ExecutionStatus.NOT_RAN
assert data is None
collector._log_event.assert_called_once()
assert "Windows" in collector._log_event.call_args.kwargs["description"]


@pytest.mark.skip(reason="No NVME device in testing infrastructure")
def test_successful_collection(collector):
collector.system_info = MagicMock(os_family=OSFamily.LINUX)

collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout="output")

result, data = collector.collect_data()

assert result.status == ExecutionStatus.OK
assert result.message == "NVMe data successfully collected"
assert isinstance(data, NvmeDataModel)
assert collector._run_sut_cmd.call_count == 7
assert any(
"Collected NVMe data" in call.kwargs["description"]
for call in collector._log_event.call_args_list
)


def test_partial_failures(collector):
collector.system_info = MagicMock(os_family=OSFamily.LINUX)

def fake_cmd(cmd, sudo):
return MagicMock(exit_code=0 if "smart-log" in cmd else 1, stdout="out")

collector._run_sut_cmd.side_effect = fake_cmd

result, data = collector.collect_data()

assert result.status in {ExecutionStatus.OK, ExecutionStatus.ERROR}
assert collector._log_event.call_count >= 1


@pytest.mark.skip(reason="No NVME device in testing infrastructure")
def test_no_data_collected(collector):
collector.system_info = MagicMock(os_family=OSFamily.LINUX)

collector._run_sut_cmd.return_value = MagicMock(exit_code=1, stdout="")

result, data = collector.collect_data()

assert result.status == ExecutionStatus.ERROR
assert data is None
assert "No NVMe data collected" in result.message
assert any(
call.kwargs["priority"] == EventPriority.ERROR
for call in collector._log_event.call_args_list
)


def test_get_nvme_devices_filters_partitions(collector):
fake_ls_output = "\n".join(["nvme0", "nvme0n1", "nvme1", "nvme1n1", "sda", "loop0", "nvme2"])
collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout=fake_ls_output)

devices = collector._get_nvme_devices()

assert devices == ["/dev/nvme0", "/dev/nvme1", "/dev/nvme2"]