From da097349db8fb7b4bd8e4bcc862727f38ae309f5 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Wed, 23 Jul 2025 12:46:03 -0500 Subject: [PATCH 01/11] fixed build from model to align with --plugin-configs --- nodescraper/plugins/inband/os/analyzer_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/os/analyzer_args.py b/nodescraper/plugins/inband/os/analyzer_args.py index 56719ebc..351d3fa4 100644 --- a/nodescraper/plugins/inband/os/analyzer_args.py +++ b/nodescraper/plugins/inband/os/analyzer_args.py @@ -59,4 +59,4 @@ def build_from_model(cls, datamodel: OsDataModel) -> "OsAnalyzerArgs": Returns: OsAnalyzerArgs: instance of analyzer args class """ - return cls(exp_os=datamodel.os_version) + return cls(exp_os=datamodel.os_name) From 61c273fd05142b6e434f0891c67a9ee6cd21847c Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Wed, 23 Jul 2025 14:03:46 -0500 Subject: [PATCH 02/11] fix for package plugin to match dumped data --- nodescraper/plugins/inband/package/analyzer_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/package/analyzer_args.py b/nodescraper/plugins/inband/package/analyzer_args.py index c5ab1b6b..a246745d 100644 --- a/nodescraper/plugins/inband/package/analyzer_args.py +++ b/nodescraper/plugins/inband/package/analyzer_args.py @@ -31,7 +31,7 @@ class PackageAnalyzerArgs(AnalyzerArgs): exp_package_ver: dict[str, str | None] = Field(default_factory=dict) - regex_match: bool = True + regex_match: bool = False @classmethod def build_from_model(cls, datamodel: PackageDataModel) -> "PackageAnalyzerArgs": From a70fad811016708850529775bf889d357ab87590 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 24 Jul 2025 13:10:07 -0500 Subject: [PATCH 03/11] added writing file --- nodescraper/connection/inband/inband.py | 28 +++++++++++++++++-- .../plugins/inband/nvme/nvme_collector.py | 9 ++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 22161243..9cfea24a 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -24,8 +24,10 @@ # ############################################################################### import abc +import io +import os -from pydantic import BaseModel +from pydantic import BaseModel, Field, field_validator class CommandArtifact(BaseModel): @@ -41,7 +43,29 @@ class FileArtifact(BaseModel): """Artifact to contains contents of file read into memory""" filename: str - contents: str + contents: bytes = Field(exclude=True) + + @field_validator("file_contents", mode="before") + @classmethod + def file_contents_conformer(cls, value) -> bytes: + if isinstance(value, io.BytesIO): + return value.getvalue() + if isinstance(value, str): + return value.encode("utf-8") + return value + + def log_model(self, log_path: str): + """Log data model to a file + + Args: + log_path (str): log path + """ + log_name = os.path.join(log_path, self.filename) + with open(log_name, "wb") as log_file: + log_file.write(self.file_contents) + + def file_contents_str(self): + return self.file_contents.decode("utf-8") class InBandConnection(abc.ABC): diff --git a/nodescraper/plugins/inband/nvme/nvme_collector.py b/nodescraper/plugins/inband/nvme/nvme_collector.py index 37ad7b09..02a244d9 100644 --- a/nodescraper/plugins/inband/nvme/nvme_collector.py +++ b/nodescraper/plugins/inband/nvme/nvme_collector.py @@ -58,6 +58,7 @@ def collect_data( self.result.status = ExecutionStatus.NOT_RAN return self.result, None + f_name = "telemtry_log" commands = [ "nvme smart-log /dev/nvme0", "nvme error-log /dev/nvme0 --log-entries=256", @@ -66,10 +67,18 @@ def collect_data( "nvme fw-log /dev/nvme0", "nvme self-test-log /dev/nvme0", "nvme get-log /dev/nvme0 --log-id=6 --log-len=512", + f"nvme telemetry-log /dev/nvme0 --output-file={f_name}", ] for cmd in commands: res = self._run_sut_cmd(cmd, sudo=True) + if "--output-file" in cmd: + f_artifact = self.ib_interface.read_file(f_name, encoding=None, strip=False) + self._log_file_artifact( + f_artifact.filename, + f_artifact.contents, + ) + if res.exit_code == 0: data[cmd] = res.stdout else: From 0e523b5f4f0f456d15d641e04e2931be48bac8ba Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Thu, 24 Jul 2025 18:45:48 -0500 Subject: [PATCH 04/11] added support for copying over binary file through read_sut_file --- nodescraper/connection/inband/inband.py | 38 +++++++++++++------ nodescraper/connection/inband/inbandlocal.py | 16 ++++++-- .../plugins/inband/nvme/nvme_collector.py | 9 ++--- .../taskresulthooks/filesystemloghook.py | 11 +++++- 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 9cfea24a..0f5af909 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -26,6 +26,7 @@ import abc import io import os +from typing import Optional from pydantic import BaseModel, Field, field_validator @@ -45,27 +46,42 @@ class FileArtifact(BaseModel): filename: str contents: bytes = Field(exclude=True) - @field_validator("file_contents", mode="before") - @classmethod - def file_contents_conformer(cls, value) -> bytes: + @staticmethod + def coerce_contents(value: io.BytesIO | str | bytes) -> bytes: if isinstance(value, io.BytesIO): return value.getvalue() if isinstance(value, str): return value.encode("utf-8") return value - def log_model(self, log_path: str): - """Log data model to a file + @field_validator("contents", mode="before") + @classmethod + def contents_conformer(cls, value): + return cls.coerce_contents(value) + + def log_model(self, log_path: str, encoding: Optional[str] = None) -> None: + """Log the file contents to disk. Args: - log_path (str): log path + log_path (str): path to write the file + encoding (str | None): if None, writes as binary """ log_name = os.path.join(log_path, self.filename) - with open(log_name, "wb") as log_file: - log_file.write(self.file_contents) - - def file_contents_str(self): - return self.file_contents.decode("utf-8") + contents = self.coerce_contents(self.contents) + + if encoding is None: + with open(log_name, "wb") as f: + f.write(contents) + else: + with open(log_name, "w", encoding=encoding) as f: + f.write(contents.decode(encoding)) + + def contents_str(self) -> str: + """Safe string representation of contents (for logs).""" + try: + return self.contents.decode("utf-8") + except UnicodeDecodeError: + return f"" class InBandConnection(abc.ABC): diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index b9cf6aa0..cd27ca9d 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -75,11 +75,19 @@ def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) Returns: FileArtifact: file artifact """ - contents = "" - with open(filename, "r", encoding=encoding) as local_file: - contents = local_file.read().strip() + + if encoding is None: + # Read as binary + with open(filename, "rb") as f: + contents = f.read() + else: + # Read as text + with open(filename, "r", encoding=encoding) as f: + contents = f.read() + if strip: + contents = contents.strip() return FileArtifact( filename=os.path.basename(filename), - contents=contents.strip() if strip else contents, + contents=contents, ) diff --git a/nodescraper/plugins/inband/nvme/nvme_collector.py b/nodescraper/plugins/inband/nvme/nvme_collector.py index 02a244d9..dad12d59 100644 --- a/nodescraper/plugins/inband/nvme/nvme_collector.py +++ b/nodescraper/plugins/inband/nvme/nvme_collector.py @@ -58,7 +58,7 @@ def collect_data( self.result.status = ExecutionStatus.NOT_RAN return self.result, None - f_name = "telemtry_log" + f_name = "telemetry_log" commands = [ "nvme smart-log /dev/nvme0", "nvme error-log /dev/nvme0 --log-entries=256", @@ -73,11 +73,8 @@ def collect_data( for cmd in commands: res = self._run_sut_cmd(cmd, sudo=True) if "--output-file" in cmd: - f_artifact = self.ib_interface.read_file(f_name, encoding=None, strip=False) - self._log_file_artifact( - f_artifact.filename, - f_artifact.contents, - ) + # should we do something with the returned FileArtifact? + _ = self._read_sut_file(filename=f_name, encoding=None) if res.exit_code == 0: data[cmd] = res.stdout diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 3360b021..27cb193f 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -62,8 +62,15 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No for artifact in task_result.artifacts: if isinstance(artifact, FileArtifact): log_name = get_unique_filename(log_path, artifact.filename) - with open(os.path.join(log_path, log_name), "w", encoding="utf-8") as log_file: - log_file.write(artifact.contents) + file_path = os.path.join(log_path, log_name) + + contents = artifact.contents + if isinstance(contents, bytes): + with open(file_path, "wb") as log_file: + log_file.write(contents) + else: + with open(file_path, "w", encoding="utf-8") as log_file: + log_file.write(contents) else: name = f"{pascal_to_snake(artifact.__class__.__name__)}s" if name in artifact_map: From 5f8b1708b66ede6e55bb779cfad716cb39035b48 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Fri, 25 Jul 2025 09:54:19 -0500 Subject: [PATCH 05/11] fix --- nodescraper/connection/inband/inband.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 0f5af909..11151dc9 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -44,7 +44,7 @@ class FileArtifact(BaseModel): """Artifact to contains contents of file read into memory""" filename: str - contents: bytes = Field(exclude=True) + contents: str | bytes = Field(exclude=True) @staticmethod def coerce_contents(value: io.BytesIO | str | bytes) -> bytes: From 7bf4953323d59175f30e4f83e27e0b8243fd4671 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Fri, 25 Jul 2025 10:24:41 -0500 Subject: [PATCH 06/11] added utest --- test/unit/framework/test_file_artifact.py | 60 +++++++++++++++++++++++ test/unit/plugin/test_nvme_collector.py | 15 +++--- 2 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 test/unit/framework/test_file_artifact.py diff --git a/test/unit/framework/test_file_artifact.py b/test/unit/framework/test_file_artifact.py new file mode 100644 index 00000000..028f6d6e --- /dev/null +++ b/test/unit/framework/test_file_artifact.py @@ -0,0 +1,60 @@ +import io + +import pytest + +from nodescraper.connection.inband.inband import ( + FileArtifact, # adjust import path accordingly +) + + +def test_fileartifact_accepts_str_and_encodes(): + artifact = FileArtifact(filename="test.txt", contents="hello") + assert isinstance(artifact.contents, bytes) + assert artifact.contents == b"hello" + + +def test_fileartifact_accepts_bytes(): + artifact = FileArtifact(filename="data.bin", contents=b"\x00\xff") + assert artifact.contents == b"\x00\xff" + + +def test_fileartifact_accepts_bytesio(): + artifact = FileArtifact(filename="stream.txt", contents=io.BytesIO(b"abc123")) + assert artifact.contents == b"abc123" + + +def test_contents_str_decodes_utf8(): + artifact = FileArtifact(filename="text.txt", contents="hello") + assert artifact.contents_str() == "hello" + + +def test_contents_str_handles_binary(): + artifact = FileArtifact(filename="blob.bin", contents=b"\xff\x00\xab") + result = artifact.contents_str() + assert result.startswith("" in result + + +def test_log_model_text(tmp_path): + artifact = FileArtifact(filename="log.txt", contents="text content") + artifact.log_model(str(tmp_path), encoding="utf-8") + + out_path = tmp_path / "log.txt" + assert out_path.exists() + assert out_path.read_text(encoding="utf-8") == "text content" + + +def test_log_model_binary(tmp_path): + binary_data = b"\x01\x02\xffDATA" + artifact = FileArtifact(filename="binary.bin", contents=binary_data) + artifact.log_model(str(tmp_path), encoding=None) + + out_path = tmp_path / "binary.bin" + assert out_path.exists() + assert out_path.read_bytes() == binary_data + + +def test_log_model_raises_on_invalid_encoding(tmp_path): + artifact = FileArtifact(filename="bad.txt", contents=b"\xff\xff") + with pytest.raises(UnicodeDecodeError): + artifact.log_model(str(tmp_path), encoding="utf-8") diff --git a/test/unit/plugin/test_nvme_collector.py b/test/unit/plugin/test_nvme_collector.py index 131608a2..1dedec4d 100644 --- a/test/unit/plugin/test_nvme_collector.py +++ b/test/unit/plugin/test_nvme_collector.py @@ -59,19 +59,22 @@ def test_skips_on_windows(collector): def test_successful_collection(collector): collector.system_info = MagicMock(os_family=OSFamily.LINUX) - collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout="output") + fake_artifact = MagicMock() + fake_artifact.filename = "telemetry_log" + fake_artifact.contents = b"telemetry-raw-binary" + + collector._read_sut_file = MagicMock(return_value=fake_artifact) + result, data = collector.collect_data() assert result.status == ExecutionStatus.OK assert result.message == "NVMe data successfully collected" assert isinstance(data, NvmeDataModel) - assert collector._run_sut_cmd.call_count == 7 - assert any( - "Collected NVMe data" in call.kwargs["description"] - for call in collector._log_event.call_args_list - ) + assert collector._run_sut_cmd.call_count == 8 + + collector._read_sut_file.assert_called_once_with(filename="telemetry_log", encoding=None) def test_partial_failures(collector): From 65a6ebe3065464b5b007e17c59a908b300b30fca Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Wed, 30 Jul 2025 14:09:06 -0500 Subject: [PATCH 07/11] addressed reviews --- nodescraper/connection/inband/inband.py | 27 ++++++++++--------- .../plugins/inband/nvme/nvme_collector.py | 4 +-- .../taskresulthooks/filesystemloghook.py | 9 +------ 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 11151dc9..40d76a0c 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -46,35 +46,36 @@ class FileArtifact(BaseModel): filename: str contents: str | bytes = Field(exclude=True) - @staticmethod - def coerce_contents(value: io.BytesIO | str | bytes) -> bytes: + @field_validator("contents", mode="before") + @classmethod + def validate_contents(cls, value: io.BytesIO | str | bytes): if isinstance(value, io.BytesIO): return value.getvalue() if isinstance(value, str): return value.encode("utf-8") return value - @field_validator("contents", mode="before") - @classmethod - def contents_conformer(cls, value): - return cls.coerce_contents(value) - def log_model(self, log_path: str, encoding: Optional[str] = None) -> None: """Log the file contents to disk. Args: log_path (str): path to write the file - encoding (str | None): if None, writes as binary + encoding (str | None): if None, auto-detect binary or not """ log_name = os.path.join(log_path, self.filename) - contents = self.coerce_contents(self.contents) + contents = self.contents - if encoding is None: - with open(log_name, "wb") as f: - f.write(contents) - else: + if encoding: with open(log_name, "w", encoding=encoding) as f: f.write(contents.decode(encoding)) + else: + try: + decoded = contents.decode("utf-8") + with open(log_name, "w", encoding="utf-8") as f: + f.write(decoded) + except UnicodeDecodeError: + with open(log_name, "wb") as f: + f.write(contents) def contents_str(self) -> str: """Safe string representation of contents (for logs).""" diff --git a/nodescraper/plugins/inband/nvme/nvme_collector.py b/nodescraper/plugins/inband/nvme/nvme_collector.py index 07422b8c..56622f1c 100644 --- a/nodescraper/plugins/inband/nvme/nvme_collector.py +++ b/nodescraper/plugins/inband/nvme/nvme_collector.py @@ -71,7 +71,7 @@ def collect_data( return self.result, None all_device_data = {} - f_name = "telemetry_log" + f_name = "telemetry_log.bin" for dev in nvme_devices: device_data = {} @@ -89,7 +89,7 @@ def collect_data( for key, cmd in commands.items(): res = self._run_sut_cmd(cmd, sudo=True) if "--output-file" in cmd: - _ = self._read_sut_file(filename=f_name, encoding=None) + _ = self._read_sut_file(filename=f"{dev}_{f_name}", encoding=None) if res.exit_code == 0: device_data[key] = res.stdout else: diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 27cb193f..a100f253 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -62,15 +62,8 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No for artifact in task_result.artifacts: if isinstance(artifact, FileArtifact): log_name = get_unique_filename(log_path, artifact.filename) - file_path = os.path.join(log_path, log_name) + artifact.log_model(log_path) - contents = artifact.contents - if isinstance(contents, bytes): - with open(file_path, "wb") as log_file: - log_file.write(contents) - else: - with open(file_path, "w", encoding="utf-8") as log_file: - log_file.write(contents) else: name = f"{pascal_to_snake(artifact.__class__.__name__)}s" if name in artifact_map: From 9a13d8c21308ded97e0e22ecfaac93761c6a7844 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Mon, 4 Aug 2025 14:31:05 -0500 Subject: [PATCH 08/11] split FileArtifact into Binary/Text --- nodescraper/base/inbandcollectortask.py | 6 +- nodescraper/connection/inband/__init__.py | 12 ++- nodescraper/connection/inband/inband.py | 84 +++++++++++-------- nodescraper/connection/inband/inbandlocal.py | 33 ++++---- nodescraper/connection/inband/inbandremote.py | 33 ++++---- .../plugins/inband/dmesg/dmesg_analyzer.py | 4 +- .../taskresulthooks/filesystemloghook.py | 4 +- test/unit/framework/test_file_artifact.py | 70 +++++++--------- 8 files changed, 128 insertions(+), 118 deletions(-) diff --git a/nodescraper/base/inbandcollectortask.py b/nodescraper/base/inbandcollectortask.py index bf551da6..85630592 100644 --- a/nodescraper/base/inbandcollectortask.py +++ b/nodescraper/base/inbandcollectortask.py @@ -27,7 +27,7 @@ from typing import Generic, Optional from nodescraper.connection.inband import InBandConnection -from nodescraper.connection.inband.inband import CommandArtifact, FileArtifact +from nodescraper.connection.inband.inband import BaseFileArtifact, CommandArtifact from nodescraper.enums import EventPriority, OSFamily, SystemInteractionLevel from nodescraper.generictypes import TCollectArg, TDataModel from nodescraper.interfaces import DataCollector, TaskResultHook @@ -99,7 +99,7 @@ def _run_sut_cmd( def _read_sut_file( self, filename: str, encoding="utf-8", strip: bool = True, log_artifact=True - ) -> FileArtifact: + ) -> BaseFileArtifact: """ Read a file from the SUT and return its content. @@ -110,7 +110,7 @@ def _read_sut_file( log_artifact (bool, optional): whether we should log the contents of the file. Defaults to True. Returns: - FileArtifact: The content of the file read from the SUT, which includes the file name and content + BaseFileArtifact: The content of the file read from the SUT, which includes the file name and content """ file_res = self.connection.read_file(filename=filename, encoding=encoding, strip=strip) if log_artifact: diff --git a/nodescraper/connection/inband/__init__.py b/nodescraper/connection/inband/__init__.py index 7979aa76..3d7faa6c 100644 --- a/nodescraper/connection/inband/__init__.py +++ b/nodescraper/connection/inband/__init__.py @@ -23,7 +23,13 @@ # SOFTWARE. # ############################################################################### -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + BinaryFileArtifact, + CommandArtifact, + InBandConnection, + TextFileArtifact, +) from .inbandlocal import LocalShell from .inbandmanager import InBandConnectionManager from .sshparams import SSHConnectionParams @@ -33,6 +39,8 @@ "LocalShell", "InBandConnectionManager", "InBandConnection", - "FileArtifact", + "BaseFileArtifact", + "TextFileArtifact", + "BinaryFileArtifact", "CommandArtifact", ] diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 40d76a0c..2458bb62 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -24,11 +24,10 @@ # ############################################################################### import abc -import io import os from typing import Optional -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel class CommandArtifact(BaseModel): @@ -40,45 +39,56 @@ class CommandArtifact(BaseModel): exit_code: int -class FileArtifact(BaseModel): - """Artifact to contains contents of file read into memory""" - +class BaseFileArtifact(BaseModel, abc.ABC): filename: str - contents: str | bytes = Field(exclude=True) - @field_validator("contents", mode="before") + @abc.abstractmethod + def log_model(self, log_path: str) -> None: + pass + + @abc.abstractmethod + def contents_str(self) -> str: + pass + @classmethod - def validate_contents(cls, value: io.BytesIO | str | bytes): - if isinstance(value, io.BytesIO): - return value.getvalue() - if isinstance(value, str): - return value.encode("utf-8") - return value + def from_bytes( + cls, + filename: str, + raw_contents: bytes, + encoding: Optional[str] = "utf-8", + strip: bool = True, + ) -> "BaseFileArtifact": + if encoding is None: + return BinaryFileArtifact(filename=filename, contents=raw_contents) - def log_model(self, log_path: str, encoding: Optional[str] = None) -> None: - """Log the file contents to disk. + try: + text = raw_contents.decode(encoding) + return TextFileArtifact(filename=filename, contents=text.strip() if strip else text) + except UnicodeDecodeError: + return BinaryFileArtifact(filename=filename, contents=raw_contents) - Args: - log_path (str): path to write the file - encoding (str | None): if None, auto-detect binary or not - """ + +class TextFileArtifact(BaseFileArtifact): + contents: str + + def log_model(self, log_path: str) -> None: + path = os.path.join(log_path, self.filename) + with open(path, "w", encoding="utf-8") as f: + f.write(self.contents) + + def contents_str(self) -> str: + return self.contents + + +class BinaryFileArtifact(BaseFileArtifact): + contents: bytes + + def log_model(self, log_path: str) -> None: log_name = os.path.join(log_path, self.filename) - contents = self.contents - - if encoding: - with open(log_name, "w", encoding=encoding) as f: - f.write(contents.decode(encoding)) - else: - try: - decoded = contents.decode("utf-8") - with open(log_name, "w", encoding="utf-8") as f: - f.write(decoded) - except UnicodeDecodeError: - with open(log_name, "wb") as f: - f.write(contents) + with open(log_name, "wb") as f: + f.write(self.contents) def contents_str(self) -> str: - """Safe string representation of contents (for logs).""" try: return self.contents.decode("utf-8") except UnicodeDecodeError: @@ -104,8 +114,10 @@ def run_command( """ @abc.abstractmethod - def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact: - """Read a file into a FileArtifact + def read_file( + self, filename: str, encoding: str = "utf-8", strip: bool = True + ) -> BaseFileArtifact: + """Read a file into a BaseFileArtifact Args: filename (str): filename @@ -113,5 +125,5 @@ def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) strip (bool): automatically strip file contents Returns: - FileArtifact: file artifact + BaseFileArtifact: file artifact """ diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index cd27ca9d..8429366b 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -26,7 +26,11 @@ import os import subprocess -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + CommandArtifact, + InBandConnection, +) class LocalShell(InBandConnection): @@ -64,8 +68,10 @@ def run_command( exit_code=res.returncode, ) - def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact: - """Read a local file into a FileArtifact + def read_file( + self, filename: str, encoding: str = "utf-8", strip: bool = True + ) -> BaseFileArtifact: + """Read a local file into a BaseFileArtifact Args: filename (str): filename @@ -73,21 +79,14 @@ def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) strip (bool): automatically strip file contents Returns: - FileArtifact: file artifact + BaseFileArtifact: file artifact """ + with open(filename, "rb") as f: + raw_contents = f.read() - if encoding is None: - # Read as binary - with open(filename, "rb") as f: - contents = f.read() - else: - # Read as text - with open(filename, "r", encoding=encoding) as f: - contents = f.read() - if strip: - contents = contents.strip() - - return FileArtifact( + return BaseFileArtifact.from_bytes( filename=os.path.basename(filename), - contents=contents, + raw_contents=raw_contents, + encoding=encoding, + strip=strip, ) diff --git a/nodescraper/connection/inband/inbandremote.py b/nodescraper/connection/inband/inbandremote.py index dd97b1ab..a32e15ca 100644 --- a/nodescraper/connection/inband/inbandremote.py +++ b/nodescraper/connection/inband/inbandremote.py @@ -34,7 +34,11 @@ SSHException, ) -from .inband import CommandArtifact, FileArtifact, InBandConnection +from .inband import ( + BaseFileArtifact, + CommandArtifact, + InBandConnection, +) from .sshparams import SSHConnectionParams @@ -94,27 +98,26 @@ def connect_ssh(self): def read_file( self, filename: str, - encoding="utf-8", + encoding: str | None = "utf-8", strip: bool = True, - ) -> FileArtifact: - """Read a remote file into a file artifact + ) -> BaseFileArtifact: + """Read a remote file into a BaseFileArtifact. Args: - filename (str): filename - encoding (str, optional): remote file encoding. Defaults to "utf-8". - strip (bool): automatically strip file contents + filename (str): Path to file on remote host + encoding (str | None, optional): If None, file is read as binary. If str, decode using that encoding. Defaults to "utf-8". + strip (bool): Strip whitespace for text files. Ignored for binary. Returns: - FileArtifact: file artifact + BaseFileArtifact: Object representing file contents """ - contents = "" - - with self.client.open_sftp().open(filename) as remote_file: - contents = remote_file.read().decode(encoding=encoding, errors="ignore") - - return FileArtifact( + with self.client.open_sftp().open(filename, "rb") as remote_file: + raw_contents = remote_file.read() + return BaseFileArtifact.from_bytes( filename=os.path.basename(filename), - contents=contents.strip() if strip else contents, + raw_contents=raw_contents, + encoding=encoding, + strip=strip, ) def run_command( diff --git a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py index 78f84a27..01d20519 100644 --- a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py +++ b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py @@ -28,7 +28,7 @@ from typing import Optional from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer -from nodescraper.connection.inband import FileArtifact +from nodescraper.connection.inband import BaseFileArtifact from nodescraper.enums import EventCategory, EventPriority from nodescraper.models import Event, TaskResult @@ -386,7 +386,7 @@ def analyze_data( args.analysis_range_end, ) self.result.artifacts.append( - FileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) + BaseFileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) ) else: dmesg_content = data.dmesg_content diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index a100f253..34abb9f8 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -27,7 +27,7 @@ import os from typing import Optional -from nodescraper.connection.inband import FileArtifact +from nodescraper.connection.inband import BaseFileArtifact from nodescraper.interfaces.taskresulthook import TaskResultHook from nodescraper.models import DataModel, TaskResult from nodescraper.utils import get_unique_filename, pascal_to_snake @@ -60,7 +60,7 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No artifact_map = {} for artifact in task_result.artifacts: - if isinstance(artifact, FileArtifact): + if isinstance(artifact, BaseFileArtifact): log_name = get_unique_filename(log_path, artifact.filename) artifact.log_model(log_path) diff --git a/test/unit/framework/test_file_artifact.py b/test/unit/framework/test_file_artifact.py index 028f6d6e..b6741d20 100644 --- a/test/unit/framework/test_file_artifact.py +++ b/test/unit/framework/test_file_artifact.py @@ -1,60 +1,48 @@ -import io - -import pytest +from pathlib import Path from nodescraper.connection.inband.inband import ( - FileArtifact, # adjust import path accordingly + BaseFileArtifact, + BinaryFileArtifact, + TextFileArtifact, ) -def test_fileartifact_accepts_str_and_encodes(): - artifact = FileArtifact(filename="test.txt", contents="hello") - assert isinstance(artifact.contents, bytes) - assert artifact.contents == b"hello" - - -def test_fileartifact_accepts_bytes(): - artifact = FileArtifact(filename="data.bin", contents=b"\x00\xff") - assert artifact.contents == b"\x00\xff" - - -def test_fileartifact_accepts_bytesio(): - artifact = FileArtifact(filename="stream.txt", contents=io.BytesIO(b"abc123")) - assert artifact.contents == b"abc123" - - -def test_contents_str_decodes_utf8(): - artifact = FileArtifact(filename="text.txt", contents="hello") +def test_textfileartifact_contents_str(): + artifact = TextFileArtifact(filename="text.txt", contents="hello") assert artifact.contents_str() == "hello" -def test_contents_str_handles_binary(): - artifact = FileArtifact(filename="blob.bin", contents=b"\xff\x00\xab") +def test_binaryfileartifact_contents_str(): + artifact = BinaryFileArtifact(filename="blob.bin", contents=b"\xff\x00\xab") result = artifact.contents_str() assert result.startswith("" in result -def test_log_model_text(tmp_path): - artifact = FileArtifact(filename="log.txt", contents="text content") - artifact.log_model(str(tmp_path), encoding="utf-8") +def test_from_bytes_text(): + artifact = BaseFileArtifact.from_bytes("test.txt", b"simple text", encoding="utf-8") + assert isinstance(artifact, TextFileArtifact) + assert artifact.contents == "simple text" - out_path = tmp_path / "log.txt" - assert out_path.exists() - assert out_path.read_text(encoding="utf-8") == "text content" +def test_from_bytes_binary(): + artifact = BaseFileArtifact.from_bytes("data.bin", b"\xff\x00\xab", encoding="utf-8") + assert isinstance(artifact, BinaryFileArtifact) + assert artifact.contents == b"\xff\x00\xab" -def test_log_model_binary(tmp_path): - binary_data = b"\x01\x02\xffDATA" - artifact = FileArtifact(filename="binary.bin", contents=binary_data) - artifact.log_model(str(tmp_path), encoding=None) - out_path = tmp_path / "binary.bin" - assert out_path.exists() - assert out_path.read_bytes() == binary_data +def test_log_model_text(tmp_path: Path): + artifact = TextFileArtifact(filename="log.txt", contents="some text") + artifact.log_model(str(tmp_path)) + output_path = tmp_path / "log.txt" + assert output_path.exists() + assert output_path.read_text(encoding="utf-8") == "some text" -def test_log_model_raises_on_invalid_encoding(tmp_path): - artifact = FileArtifact(filename="bad.txt", contents=b"\xff\xff") - with pytest.raises(UnicodeDecodeError): - artifact.log_model(str(tmp_path), encoding="utf-8") +def test_log_model_binary(tmp_path: Path): + binary_data = b"\x01\x02\xffDATA" + artifact = BinaryFileArtifact(filename="binary.bin", contents=binary_data) + artifact.log_model(str(tmp_path)) + output_path = tmp_path / "binary.bin" + assert output_path.exists() + assert output_path.read_bytes() == binary_data From 1d64bed858005b417d920f853caf8df091387669 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Mon, 4 Aug 2025 14:44:56 -0500 Subject: [PATCH 09/11] docstrings --- nodescraper/connection/inband/inband.py | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/nodescraper/connection/inband/inband.py b/nodescraper/connection/inband/inband.py index 2458bb62..ef9cb9e2 100644 --- a/nodescraper/connection/inband/inband.py +++ b/nodescraper/connection/inband/inband.py @@ -40,10 +40,17 @@ class CommandArtifact(BaseModel): class BaseFileArtifact(BaseModel, abc.ABC): + """Base class for files""" + filename: str @abc.abstractmethod def log_model(self, log_path: str) -> None: + """Write file to path + + Args: + log_path (str): Path for file + """ pass @abc.abstractmethod @@ -58,6 +65,17 @@ def from_bytes( encoding: Optional[str] = "utf-8", strip: bool = True, ) -> "BaseFileArtifact": + """factory method + + Args: + filename (str): name of file to be read + raw_contents (bytes): Raw file content + encoding (Optional[str], optional): Optional encoding. Defaults to "utf-8". + strip (bool, optional): Remove padding. Defaults to True. + + Returns: + BaseFileArtifact: _Returns instance of Artifact file + """ if encoding is None: return BinaryFileArtifact(filename=filename, contents=raw_contents) @@ -69,26 +87,50 @@ def from_bytes( class TextFileArtifact(BaseFileArtifact): + """Class for text file artifacts""" + contents: str def log_model(self, log_path: str) -> None: + """Write file to disk + + Args: + log_path (str): Path for file + """ path = os.path.join(log_path, self.filename) with open(path, "w", encoding="utf-8") as f: f.write(self.contents) def contents_str(self) -> str: + """Get content as str + + Returns: + str: Str instance of file content + """ return self.contents class BinaryFileArtifact(BaseFileArtifact): + """Class for binary file artifacts""" + contents: bytes def log_model(self, log_path: str) -> None: + """Write file to disk + + Args: + log_path (str): Path for file + """ log_name = os.path.join(log_path, self.filename) with open(log_name, "wb") as f: f.write(self.contents) def contents_str(self) -> str: + """File content + + Returns: + str: Str instance of file content + """ try: return self.contents.decode("utf-8") except UnicodeDecodeError: From e396cc16e0b5212e75e45377f9e4cca23dda2a74 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 12 Aug 2025 13:21:49 -0500 Subject: [PATCH 10/11] addressed reviews --- nodescraper/plugins/inband/dmesg/dmesg_analyzer.py | 4 ++-- nodescraper/taskresulthooks/filesystemloghook.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py index 01d20519..c374f8a8 100644 --- a/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py +++ b/nodescraper/plugins/inband/dmesg/dmesg_analyzer.py @@ -28,7 +28,7 @@ from typing import Optional from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer -from nodescraper.connection.inband import BaseFileArtifact +from nodescraper.connection.inband import TextFileArtifact from nodescraper.enums import EventCategory, EventPriority from nodescraper.models import Event, TaskResult @@ -386,7 +386,7 @@ def analyze_data( args.analysis_range_end, ) self.result.artifacts.append( - BaseFileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) + TextFileArtifact(filename="filtered_dmesg.log", contents=dmesg_content) ) else: dmesg_content = data.dmesg_content diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 34abb9f8..74cec4cb 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -60,7 +60,7 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No artifact_map = {} for artifact in task_result.artifacts: - if isinstance(artifact, BaseFileArtifact): + if issubclass(artifact, BaseFileArtifact): log_name = get_unique_filename(log_path, artifact.filename) artifact.log_model(log_path) From 966d3891f3c0b7dc78e009c1d8bf9fca0e4cf1ac Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Tue, 12 Aug 2025 13:32:18 -0500 Subject: [PATCH 11/11] undid issubclass -> isinstance --- nodescraper/taskresulthooks/filesystemloghook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 74cec4cb..34abb9f8 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -60,7 +60,7 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No artifact_map = {} for artifact in task_result.artifacts: - if issubclass(artifact, BaseFileArtifact): + if isinstance(artifact, BaseFileArtifact): log_name = get_unique_filename(log_path, artifact.filename) artifact.log_model(log_path)