diff --git a/nodescraper/plugins/inband/nvme/__init__.py b/nodescraper/plugins/inband/nvme/__init__.py new file mode 100644 index 00000000..802e4c50 --- /dev/null +++ b/nodescraper/plugins/inband/nvme/__init__.py @@ -0,0 +1,28 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from .nvme_plugin import NvmePlugin + +__all__ = ["NvmePlugin"] diff --git a/nodescraper/plugins/inband/nvme/nvme_collector.py b/nodescraper/plugins/inband/nvme/nvme_collector.py new file mode 100644 index 00000000..9704db48 --- /dev/null +++ b/nodescraper/plugins/inband/nvme/nvme_collector.py @@ -0,0 +1,153 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import os +import re + +from pydantic import ValidationError + +from nodescraper.base import InBandDataCollector +from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily +from nodescraper.models import TaskResult + +from .nvmedata import NvmeDataModel + + +class NvmeCollector(InBandDataCollector[NvmeDataModel, None]): + """Collect NVMe details from the system.""" + + DATA_MODEL = NvmeDataModel + + def collect_data( + self, + args=None, + ) -> tuple[TaskResult, NvmeDataModel | None]: + """Collect detailed NVMe information from all NVMe devices. + + Returns: + tuple[TaskResult, NvmeDataModel | None]: Task result and data model with NVMe command outputs. + """ + if self.system_info.os_family == OSFamily.WINDOWS: + self._log_event( + category=EventCategory.SW_DRIVER, + description="NVMe collection not supported on Windows", + priority=EventPriority.WARNING, + ) + self.result.message = "NVMe data collection skipped on Windows" + self.result.status = ExecutionStatus.NOT_RAN + return self.result, None + + nvme_devices = self._get_nvme_devices() + if not nvme_devices: + self._log_event( + category=EventCategory.SW_DRIVER, + description="No NVMe devices found", + priority=EventPriority.ERROR, + ) + self.result.message = "No NVMe devices found" + self.result.status = ExecutionStatus.ERROR + return self.result, None + + all_device_data = {} + + for dev in nvme_devices: + device_data = {} + commands = { + "smart_log": f"nvme smart-log {dev}", + "error_log": f"nvme error-log {dev} --log-entries=256", + "id_ctrl": f"nvme id-ctrl {dev}", + "id_ns": f"nvme id-ns {dev}n1", + "fw_log": f"nvme fw-log {dev}", + "self_test_log": f"nvme self-test-log {dev}", + "get_log": f"nvme get-log {dev} --log-id=6 --log-len=512", + } + + for key, cmd in commands.items(): + res = self._run_sut_cmd(cmd, sudo=True) + if res.exit_code == 0: + device_data[key] = res.stdout + else: + self._log_event( + category=EventCategory.SW_DRIVER, + description=f"Failed to execute NVMe command: '{cmd}'", + data={"command": cmd, "exit_code": res.exit_code}, + priority=EventPriority.WARNING, + console_log=True, + ) + + if device_data: + all_device_data[os.path.basename(dev)] = device_data + + if all_device_data: + try: + nvme_data = NvmeDataModel(devices=all_device_data) + except ValidationError as exp: + self._log_event( + category=EventCategory.SW_DRIVER, + description="Validation error while building NvmeDataModel", + data={"errors": exp.errors(include_url=False)}, + priority=EventPriority.ERROR, + ) + self.result.message = "NVMe data invalid format" + self.result.status = ExecutionStatus.ERROR + return self.result, None + + self._log_event( + category=EventCategory.SW_DRIVER, + description="Collected NVMe data", + data=nvme_data.model_dump(), + priority=EventPriority.INFO, + ) + self.result.message = "NVMe data successfully collected" + self.result.status = ExecutionStatus.OK + return self.result, nvme_data + else: + self._log_event( + category=EventCategory.SW_DRIVER, + description="Failed to collect any NVMe data", + priority=EventPriority.ERROR, + ) + self.result.message = "No NVMe data collected" + self.result.status = ExecutionStatus.ERROR + return self.result, None + + def _get_nvme_devices(self) -> list[str]: + nvme_devs = [] + + res = self._run_sut_cmd("ls /dev", sudo=False) + if res.exit_code != 0: + self._log_event( + category=EventCategory.SW_DRIVER, + description="Failed to list /dev directory", + data={"exit_code": res.exit_code, "stderr": res.stderr}, + priority=EventPriority.ERROR, + ) + return [] + + for entry in res.stdout.strip().splitlines(): + if re.fullmatch(r"nvme\d+$", entry): + nvme_devs.append(f"/dev/{entry}") + + return nvme_devs diff --git a/nodescraper/plugins/inband/nvme/nvme_plugin.py b/nodescraper/plugins/inband/nvme/nvme_plugin.py new file mode 100644 index 00000000..29557290 --- /dev/null +++ b/nodescraper/plugins/inband/nvme/nvme_plugin.py @@ -0,0 +1,37 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.base import InBandDataPlugin + +from .nvme_collector import NvmeCollector +from .nvmedata import NvmeDataModel + + +class NvmePlugin(InBandDataPlugin[NvmeDataModel, None, None]): + """Plugin for collection and analysis of nvme data""" + + DATA_MODEL = NvmeDataModel + + COLLECTOR = NvmeCollector diff --git a/nodescraper/plugins/inband/nvme/nvmedata.py b/nodescraper/plugins/inband/nvme/nvmedata.py new file mode 100644 index 00000000..fd660912 --- /dev/null +++ b/nodescraper/plugins/inband/nvme/nvmedata.py @@ -0,0 +1,43 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from pydantic import BaseModel + +from nodescraper.models import DataModel + + +class DeviceNvmeData(BaseModel): + smart_log: str | None = None + error_log: str | None = None + id_ctrl: str | None = None + id_ns: str | None = None + fw_log: str | None = None + self_test_log: str | None = None + get_log: str | None = None + telemetry_log: str | None = None + + +class NvmeDataModel(DataModel): + devices: dict[str, DeviceNvmeData] diff --git a/test/unit/plugin/test_nvme_collector.py b/test/unit/plugin/test_nvme_collector.py new file mode 100644 index 00000000..f64dec22 --- /dev/null +++ b/test/unit/plugin/test_nvme_collector.py @@ -0,0 +1,115 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from unittest.mock import MagicMock + +import pytest + +from nodescraper.enums import EventPriority, ExecutionStatus, OSFamily +from nodescraper.enums.systeminteraction import SystemInteractionLevel +from nodescraper.models import TaskResult +from nodescraper.plugins.inband.nvme.nvme_collector import NvmeCollector +from nodescraper.plugins.inband.nvme.nvmedata import NvmeDataModel + + +@pytest.fixture +def collector(system_info, conn_mock): + c = NvmeCollector( + system_info=system_info, + system_interaction_level=SystemInteractionLevel.PASSIVE, + connection=conn_mock, + ) + c._log_event = MagicMock() + c._run_sut_cmd = MagicMock() + c.result = TaskResult() + return c + + +def test_skips_on_windows(collector): + collector.system_info = MagicMock(os_family=OSFamily.WINDOWS) + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.NOT_RAN + assert data is None + collector._log_event.assert_called_once() + assert "Windows" in collector._log_event.call_args.kwargs["description"] + + +@pytest.mark.skip(reason="No NVME device in testing infrastructure") +def test_successful_collection(collector): + collector.system_info = MagicMock(os_family=OSFamily.LINUX) + + collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout="output") + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert result.message == "NVMe data successfully collected" + assert isinstance(data, NvmeDataModel) + assert collector._run_sut_cmd.call_count == 7 + assert any( + "Collected NVMe data" in call.kwargs["description"] + for call in collector._log_event.call_args_list + ) + + +def test_partial_failures(collector): + collector.system_info = MagicMock(os_family=OSFamily.LINUX) + + def fake_cmd(cmd, sudo): + return MagicMock(exit_code=0 if "smart-log" in cmd else 1, stdout="out") + + collector._run_sut_cmd.side_effect = fake_cmd + + result, data = collector.collect_data() + + assert result.status in {ExecutionStatus.OK, ExecutionStatus.ERROR} + assert collector._log_event.call_count >= 1 + + +@pytest.mark.skip(reason="No NVME device in testing infrastructure") +def test_no_data_collected(collector): + collector.system_info = MagicMock(os_family=OSFamily.LINUX) + + collector._run_sut_cmd.return_value = MagicMock(exit_code=1, stdout="") + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.ERROR + assert data is None + assert "No NVMe data collected" in result.message + assert any( + call.kwargs["priority"] == EventPriority.ERROR + for call in collector._log_event.call_args_list + ) + + +def test_get_nvme_devices_filters_partitions(collector): + fake_ls_output = "\n".join(["nvme0", "nvme0n1", "nvme1", "nvme1n1", "sda", "loop0", "nvme2"]) + collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout=fake_ls_output) + + devices = collector._get_nvme_devices() + + assert devices == ["/dev/nvme0", "/dev/nvme1", "/dev/nvme2"]