diff --git a/nodescraper/base/inbandcollectortask.py b/nodescraper/base/inbandcollectortask.py index bf551da6..85630592 100644 --- a/nodescraper/base/inbandcollectortask.py +++ b/nodescraper/base/inbandcollectortask.py @@ -27,7 +27,7 @@ from typing import Generic, Optional from nodescraper.connection.inband import InBandConnection -from nodescraper.connection.inband.inband import CommandArtifact, FileArtifact +from nodescraper.connection.inband.inband import BaseFileArtifact, CommandArtifact from nodescraper.enums import EventPriority, OSFamily, SystemInteractionLevel from nodescraper.generictypes import TCollectArg, TDataModel from nodescraper.interfaces import DataCollector, TaskResultHook @@ -99,7 +99,7 @@ def _run_sut_cmd( def _read_sut_file( self, filename: str, encoding="utf-8", strip: bool = True, log_artifact=True - ) -> FileArtifact: + ) -> BaseFileArtifact: """ Read a file from the SUT and return its content. @@ -110,7 +110,7 @@ def _read_sut_file( log_artifact (bool, optional): whether we should log the contents of the file. Defaults to True. Returns: - FileArtifact: The content of the file read from the SUT, which includes the file name and content + BaseFileArtifact: The content of the file read from the SUT, which includes the file name and content """ file_res = self.connection.read_file(filename=filename, encoding=encoding, strip=strip) if log_artifact: diff --git a/nodescraper/connection/inband/__init__.py b/nodescraper/connection/inband/__init__.py index 7979aa76..3d7faa6c 100644 --- a/nodescraper/connection/inband/__init__.py +++ b/nodescraper/connection/inband/__init__.py @@ -23,7 +23,13 @@ # SOFTWARE. # ############################################################################### -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + BinaryFileArtifact, + CommandArtifact, + InBandConnection, + TextFileArtifact, +) from .inbandlocal import LocalShell from .inbandmanager import InBandConnectionManager from .sshparams import SSHConnectionParams @@ -33,6 +39,8 @@ "LocalShell", "InBandConnectionManager", "InBandConnection", - "FileArtifact", + "BaseFileArtifact", + "TextFileArtifact", + "BinaryFileArtifact", "CommandArtifact", ] diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 22161243..ef9cb9e2 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -24,6 +24,8 @@ # ############################################################################### import abc +import os +from typing import Optional from pydantic import BaseModel @@ -37,12 +39,103 @@ class CommandArtifact(BaseModel): exit_code: int -class FileArtifact(BaseModel): - """Artifact to contains contents of file read into memory""" +class BaseFileArtifact(BaseModel, abc.ABC): + """Base class for files""" filename: str + + @abc.abstractmethod + def log_model(self, log_path: str) -> None: + """Write file to path + + Args: + log_path (str): Path for file + """ + pass + + @abc.abstractmethod + def contents_str(self) -> str: + pass + + @classmethod + def from_bytes( + cls, + filename: str, + raw_contents: bytes, + encoding: Optional[str] = "utf-8", + strip: bool = True, + ) -> "BaseFileArtifact": + """factory method + + Args: + filename (str): name of file to be read + raw_contents (bytes): Raw file content + encoding (Optional[str], optional): Optional encoding. Defaults to "utf-8". + strip (bool, optional): Remove padding. Defaults to True. + + Returns: + BaseFileArtifact: _Returns instance of Artifact file + """ + if encoding is None: + return BinaryFileArtifact(filename=filename, contents=raw_contents) + + try: + text = raw_contents.decode(encoding) + return TextFileArtifact(filename=filename, contents=text.strip() if strip else text) + except UnicodeDecodeError: + return BinaryFileArtifact(filename=filename, contents=raw_contents) + + +class TextFileArtifact(BaseFileArtifact): + """Class for text file artifacts""" + contents: str + def log_model(self, log_path: str) -> None: + """Write file to disk + + Args: + log_path (str): Path for file + """ + path = os.path.join(log_path, self.filename) + with open(path, "w", encoding="utf-8") as f: + f.write(self.contents) + + def contents_str(self) -> str: + """Get content as str + + Returns: + str: Str instance of file content + """ + return self.contents + + +class BinaryFileArtifact(BaseFileArtifact): + """Class for binary file artifacts""" + + contents: bytes + + def log_model(self, log_path: str) -> None: + """Write file to disk + + Args: + log_path (str): Path for file + """ + log_name = os.path.join(log_path, self.filename) + with open(log_name, "wb") as f: + f.write(self.contents) + + def contents_str(self) -> str: + """File content + + Returns: + str: Str instance of file content + """ + try: + return self.contents.decode("utf-8") + except UnicodeDecodeError: + return f"" + class InBandConnection(abc.ABC): @@ -63,8 +156,10 @@ def run_command( """ @abc.abstractmethod - def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact: - """Read a file into a FileArtifact + def read_file( + self, filename: str, encoding: str = "utf-8", strip: bool = True + ) -> BaseFileArtifact: + """Read a file into a BaseFileArtifact Args: filename (str): filename @@ -72,5 +167,5 @@ def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) strip (bool): automatically strip file contents Returns: - FileArtifact: file artifact + BaseFileArtifact: file artifact """ diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index b9cf6aa0..8429366b 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -26,7 +26,11 @@ import os import subprocess -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + CommandArtifact, + InBandConnection, +) class LocalShell(InBandConnection): @@ -64,8 +68,10 @@ def run_command( exit_code=res.returncode, ) - def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact: - """Read a local file into a FileArtifact + def read_file( + self, filename: str, encoding: str = "utf-8", strip: bool = True + ) -> BaseFileArtifact: + """Read a local file into a BaseFileArtifact Args: filename (str): filename @@ -73,13 +79,14 @@ def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) strip (bool): automatically strip file contents Returns: - FileArtifact: file artifact + BaseFileArtifact: file artifact """ - contents = "" - with open(filename, "r", encoding=encoding) as local_file: - contents = local_file.read().strip() + with open(filename, "rb") as f: + raw_contents = f.read() - return FileArtifact( + return BaseFileArtifact.from_bytes( filename=os.path.basename(filename), - contents=contents.strip() if strip else contents, + raw_contents=raw_contents, + encoding=encoding, + strip=strip, ) diff --git a/nodescraper/connection/inband/inbandremote.py b/nodescraper/connection/inband/inbandremote.py index dd97b1ab..a32e15ca 100644 --- a/nodescraper/connection/inband/inbandremote.py +++ b/nodescraper/connection/inband/inbandremote.py @@ -34,7 +34,11 @@ SSHException, ) -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + CommandArtifact, + InBandConnection, +) from .sshparams import SSHConnectionParams @@ -94,27 +98,26 @@ def connect_ssh(self): def read_file( self, filename: str, - encoding="utf-8", + encoding: str | None = "utf-8", strip: bool = True, - ) -> FileArtifact: - """Read a remote file into a file artifact + ) -> BaseFileArtifact: + """Read a remote file into a BaseFileArtifact. Args: - filename (str): filename - encoding (str, optional): remote file encoding. Defaults to "utf-8". - strip (bool): automatically strip file contents + filename (str): Path to file on remote host + encoding (str | None, optional): If None, file is read as binary. If str, decode using that encoding. Defaults to "utf-8". + strip (bool): Strip whitespace for text files. Ignored for binary. Returns: - FileArtifact: file artifact + BaseFileArtifact: Object representing file contents """ - contents = "" - - with self.client.open_sftp().open(filename) as remote_file: - contents = remote_file.read().decode(encoding=encoding, errors="ignore") - - return FileArtifact( + with self.client.open_sftp().open(filename, "rb") as remote_file: + raw_contents = remote_file.read() + return BaseFileArtifact.from_bytes( filename=os.path.basename(filename), - contents=contents.strip() if strip else contents, + raw_contents=raw_contents, + encoding=encoding, + strip=strip, ) def run_command( diff --git a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py index 78f84a27..c374f8a8 100644 --- a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py +++ b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py @@ -28,7 +28,7 @@ from typing import Optional from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer -from nodescraper.connection.inband import FileArtifact +from nodescraper.connection.inband import TextFileArtifact from nodescraper.enums import EventCategory, EventPriority from nodescraper.models import Event, TaskResult @@ -386,7 +386,7 @@ def analyze_data( args.analysis_range_end, ) self.result.artifacts.append( - FileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) + TextFileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) ) else: dmesg_content = data.dmesg_content diff --git a/nodescraper/plugins/inband/nvme/nvme_collector.py b/nodescraper/plugins/inband/nvme/nvme_collector.py index 9704db48..7971ea5e 100644 --- a/nodescraper/plugins/inband/nvme/nvme_collector.py +++ b/nodescraper/plugins/inband/nvme/nvme_collector.py @@ -71,6 +71,7 @@ def collect_data( return self.result, None all_device_data = {} + f_name = "telemetry_log.bin" for dev in nvme_devices: device_data = {} @@ -82,10 +83,13 @@ def collect_data( "fw_log": f"nvme fw-log {dev}", "self_test_log": f"nvme self-test-log {dev}", "get_log": f"nvme get-log {dev} --log-id=6 --log-len=512", + "telemetry_log": f"nvme telemetry-log {dev} --output-file={dev}_{f_name}", } for key, cmd in commands.items(): res = self._run_sut_cmd(cmd, sudo=True) + if "--output-file" in cmd: + _ = self._read_sut_file(filename=f"{dev}_{f_name}", encoding=None) if res.exit_code == 0: device_data[key] = res.stdout else: diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 3360b021..34abb9f8 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -27,7 +27,7 @@ import os from typing import Optional -from nodescraper.connection.inband import FileArtifact +from nodescraper.connection.inband import BaseFileArtifact from nodescraper.interfaces.taskresulthook import TaskResultHook from nodescraper.models import DataModel, TaskResult from nodescraper.utils import get_unique_filename, pascal_to_snake @@ -60,10 +60,10 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No artifact_map = {} for artifact in task_result.artifacts: - if isinstance(artifact, FileArtifact): + if isinstance(artifact, BaseFileArtifact): log_name = get_unique_filename(log_path, artifact.filename) - with open(os.path.join(log_path, log_name), "w", encoding="utf-8") as log_file: - log_file.write(artifact.contents) + artifact.log_model(log_path) + else: name = f"{pascal_to_snake(artifact.__class__.__name__)}s" if name in artifact_map: diff --git a/test/unit/framework/test_file_artifact.py b/test/unit/framework/test_file_artifact.py new file mode 100644 index 00000000..b6741d20 --- /dev/null +++ b/test/unit/framework/test_file_artifact.py @@ -0,0 +1,48 @@ +from pathlib import Path + +from nodescraper.connection.inband.inband import ( + BaseFileArtifact, + BinaryFileArtifact, + TextFileArtifact, +) + + +def test_textfileartifact_contents_str(): + artifact = TextFileArtifact(filename="text.txt", contents="hello") + assert artifact.contents_str() == "hello" + + +def test_binaryfileartifact_contents_str(): + artifact = BinaryFileArtifact(filename="blob.bin", contents=b"\xff\x00\xab") + result = artifact.contents_str() + assert result.startswith("" in result + + +def test_from_bytes_text(): + artifact = BaseFileArtifact.from_bytes("test.txt", b"simple text", encoding="utf-8") + assert isinstance(artifact, TextFileArtifact) + assert artifact.contents == "simple text" + + +def test_from_bytes_binary(): + artifact = BaseFileArtifact.from_bytes("data.bin", b"\xff\x00\xab", encoding="utf-8") + assert isinstance(artifact, BinaryFileArtifact) + assert artifact.contents == b"\xff\x00\xab" + + +def test_log_model_text(tmp_path: Path): + artifact = TextFileArtifact(filename="log.txt", contents="some text") + artifact.log_model(str(tmp_path)) + output_path = tmp_path / "log.txt" + assert output_path.exists() + assert output_path.read_text(encoding="utf-8") == "some text" + + +def test_log_model_binary(tmp_path: Path): + binary_data = b"\x01\x02\xffDATA" + artifact = BinaryFileArtifact(filename="binary.bin", contents=binary_data) + artifact.log_model(str(tmp_path)) + output_path = tmp_path / "binary.bin" + assert output_path.exists() + assert output_path.read_bytes() == binary_data diff --git a/test/unit/plugin/test_nvme_collector.py b/test/unit/plugin/test_nvme_collector.py index f64dec22..f66bc9d7 100644 --- a/test/unit/plugin/test_nvme_collector.py +++ b/test/unit/plugin/test_nvme_collector.py @@ -60,19 +60,22 @@ def test_skips_on_windows(collector): @pytest.mark.skip(reason="No NVME device in testing infrastructure") def test_successful_collection(collector): collector.system_info = MagicMock(os_family=OSFamily.LINUX) - collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout="output") + fake_artifact = MagicMock() + fake_artifact.filename = "telemetry_log" + fake_artifact.contents = b"telemetry-raw-binary" + + collector._read_sut_file = MagicMock(return_value=fake_artifact) + result, data = collector.collect_data() assert result.status == ExecutionStatus.OK assert result.message == "NVMe data successfully collected" assert isinstance(data, NvmeDataModel) - assert collector._run_sut_cmd.call_count == 7 - assert any( - "Collected NVMe data" in call.kwargs["description"] - for call in collector._log_event.call_args_list - ) + assert collector._run_sut_cmd.call_count == 8 + + collector._read_sut_file.assert_called_once_with(filename="telemetry_log", encoding=None) def test_partial_failures(collector):