diff --git a/nodescraper/plugins/inband/rocm/analyzer_args.py b/nodescraper/plugins/inband/rocm/analyzer_args.py index cdc04cce..d76a2421 100644 --- a/nodescraper/plugins/inband/rocm/analyzer_args.py +++ b/nodescraper/plugins/inband/rocm/analyzer_args.py @@ -33,6 +33,7 @@ class RocmAnalyzerArgs(AnalyzerArgs): exp_rocm: Union[str, list] = Field(default_factory=list) + exp_rocm_latest: str = Field(default="") @field_validator("exp_rocm", mode="before") @classmethod diff --git a/nodescraper/plugins/inband/rocm/rocm_analyzer.py b/nodescraper/plugins/inband/rocm/rocm_analyzer.py index 75aad008..1131d665 100644 --- a/nodescraper/plugins/inband/rocm/rocm_analyzer.py +++ b/nodescraper/plugins/inband/rocm/rocm_analyzer.py @@ -61,17 +61,40 @@ def analyze_data( if data.rocm_version == rocm_version: self.result.message = "ROCm version matches expected" self.result.status = ExecutionStatus.OK + break + else: + # No matching version found + self.result.message = ( + f"ROCm version mismatch! Expected: {args.exp_rocm}, actual: {data.rocm_version}" + ) + self.result.status = ExecutionStatus.ERROR + self._log_event( + category=EventCategory.SW_DRIVER, + description=f"{self.result.message}", + data={"expected": args.exp_rocm, "actual": data.rocm_version}, + priority=EventPriority.CRITICAL, + console_log=True, + ) + return self.result + + # validate rocm_latest if provided in args + if args.exp_rocm_latest: + if data.rocm_latest_versioned_path != args.exp_rocm_latest: + self.result.message = f"ROCm latest path mismatch! Expected: {args.exp_rocm_latest}, actual: {data.rocm_latest_versioned_path}" + self.result.status = ExecutionStatus.ERROR + self._log_event( + category=EventCategory.SW_DRIVER, + description=f"{self.result.message}", + data={ + "expected": args.exp_rocm_latest, + "actual": data.rocm_latest_versioned_path, + }, + priority=EventPriority.CRITICAL, + console_log=True, + ) return self.result + else: + # Update message to include rocm_latest validation result + self.result.message = f"ROCm version matches expected. ROCm latest path validated: {data.rocm_latest_versioned_path}" - self.result.message = ( - f"ROCm version mismatch! Expected: {args.exp_rocm}, actual: {data.rocm_version}" - ) - self.result.status = ExecutionStatus.ERROR - self._log_event( - category=EventCategory.SW_DRIVER, - description=f"{self.result.message}", - data={"expected": args.exp_rocm, "actual": data.rocm_version}, - priority=EventPriority.CRITICAL, - console_log=True, - ) return self.result diff --git a/nodescraper/plugins/inband/rocm/rocm_collector.py b/nodescraper/plugins/inband/rocm/rocm_collector.py index 37470f68..f7692e45 100644 --- a/nodescraper/plugins/inband/rocm/rocm_collector.py +++ b/nodescraper/plugins/inband/rocm/rocm_collector.py @@ -26,8 +26,10 @@ from typing import Optional from nodescraper.base import InBandDataCollector +from nodescraper.connection.inband import TextFileArtifact from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult +from nodescraper.utils import strip_ansi_codes from .rocmdata import RocmDataModel @@ -42,6 +44,14 @@ class RocmCollector(InBandDataCollector[RocmDataModel, None]): "/opt/rocm/.info/version-rocm", "/opt/rocm/.info/version", ] + CMD_ROCMINFO = "{rocm_path}/bin/rocminfo" + CMD_ROCM_LATEST = "ls -v -d /opt/rocm-[3-7]* | tail -1" + CMD_ROCM_DIRS = "ls -v -d /opt/rocm*" + CMD_LD_CONF = "grep -i -E 'rocm' /etc/ld.so.conf.d/*" + CMD_ROCM_LIBS = "ldconfig -p | grep -i -E 'rocm'" + CMD_ENV_VARS = "env | grep -Ei 'rocm|hsa|hip|mpi|openmp|ucx|miopen'" + CMD_CLINFO = "{rocm_path}/opencl/bin/*/clinfo" + CMD_KFD_PROC = "ls /sys/class/kfd/kfd/proc/" def collect_data(self, args=None) -> tuple[TaskResult, Optional[RocmDataModel]]: """Collect ROCm version data from the system. @@ -49,33 +59,134 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[RocmDataModel]]: Returns: tuple[TaskResult, Optional[RocmDataModel]]: tuple containing the task result and ROCm data model if available. """ - version_paths = [ - "/opt/rocm/.info/version-rocm", - "/opt/rocm/.info/version", - ] - rocm_data = None for path in self.CMD_VERSION_PATHS: res = self._run_sut_cmd(f"grep . {path}") if res.exit_code == 0: - rocm_data = RocmDataModel(rocm_version=res.stdout) - self._log_event( - category="ROCM_VERSION_READ", - description="ROCm version data collected", - data=rocm_data.model_dump(), - priority=EventPriority.INFO, - ) - self.result.message = f"ROCm: {rocm_data.model_dump()}" - self.result.status = ExecutionStatus.OK - break + try: + rocm_data = RocmDataModel(rocm_version=res.stdout) + self._log_event( + category="ROCM_VERSION_READ", + description="ROCm version data collected", + data=rocm_data.model_dump(include={"rocm_version"}), + priority=EventPriority.INFO, + ) + self.result.message = f"ROCm version: {rocm_data.rocm_version}" + self.result.status = ExecutionStatus.OK + break + except ValueError as e: + self._log_event( + category=EventCategory.OS, + description=f"Invalid ROCm version format: {res.stdout}", + data={"version": res.stdout, "error": str(e)}, + priority=EventPriority.ERROR, + console_log=True, + ) + self.result.message = f"Invalid ROCm version format: {res.stdout}" + self.result.status = ExecutionStatus.ERROR + return self.result, None else: self._log_event( category=EventCategory.OS, - description=f"Unable to read ROCm version from {version_paths}", + description=f"Unable to read ROCm version from {self.CMD_VERSION_PATHS}", data={"raw_output": res.stdout}, priority=EventPriority.ERROR, ) + # Collect additional ROCm data if version was found + if rocm_data: + # Collect latest versioned ROCm path (rocm-[3-7]*) + versioned_path_res = self._run_sut_cmd(self.CMD_ROCM_LATEST) + if versioned_path_res.exit_code == 0: + rocm_data.rocm_latest_versioned_path = versioned_path_res.stdout.strip() + + # Collect all ROCm paths as list + all_paths_res = self._run_sut_cmd(self.CMD_ROCM_DIRS) + if all_paths_res.exit_code == 0: + rocm_data.rocm_all_paths = [ + path.strip() + for path in all_paths_res.stdout.strip().split("\n") + if path.strip() + ] + + # Determine ROCm path for commands that need it + rocm_path = rocm_data.rocm_latest_versioned_path or "/opt/rocm" + + # Collect rocminfo output as list of lines with ANSI codes stripped + rocminfo_cmd = self.CMD_ROCMINFO.format(rocm_path=rocm_path) + rocminfo_res = self._run_sut_cmd(rocminfo_cmd) + rocminfo_artifact_content = "" + if rocminfo_res.exit_code == 0: + # Split into lines and strip ANSI codes from each line + rocm_data.rocminfo = [ + strip_ansi_codes(line) for line in rocminfo_res.stdout.strip().split("\n") + ] + rocminfo_artifact_content += "=" * 80 + "\n" + rocminfo_artifact_content += "ROCMNFO OUTPUT\n" + rocminfo_artifact_content += "=" * 80 + "\n\n" + rocminfo_artifact_content += rocminfo_res.stdout + + # Collect ld.so.conf ROCm entries + ld_conf_res = self._run_sut_cmd(self.CMD_LD_CONF) + if ld_conf_res.exit_code == 0: + rocm_data.ld_conf_rocm = [ + line.strip() for line in ld_conf_res.stdout.strip().split("\n") if line.strip() + ] + + # Collect ROCm libraries from ldconfig + rocm_libs_res = self._run_sut_cmd(self.CMD_ROCM_LIBS) + if rocm_libs_res.exit_code == 0: + rocm_data.rocm_libs = [ + line.strip() + for line in rocm_libs_res.stdout.strip().split("\n") + if line.strip() + ] + + # Collect ROCm-related environment variables + env_vars_res = self._run_sut_cmd(self.CMD_ENV_VARS) + if env_vars_res.exit_code == 0: + rocm_data.env_vars = [ + line.strip() for line in env_vars_res.stdout.strip().split("\n") if line.strip() + ] + + # Collect clinfo output + clinfo_cmd = self.CMD_CLINFO.format(rocm_path=rocm_path) + clinfo_res = self._run_sut_cmd(clinfo_cmd) + + # Always append clinfo section to artifact, even if empty or failed + if rocminfo_artifact_content: + rocminfo_artifact_content += "\n\n" + rocminfo_artifact_content += "=" * 80 + "\n" + rocminfo_artifact_content += "CLINFO OUTPUT\n" + rocminfo_artifact_content += "=" * 80 + "\n\n" + + if clinfo_res.exit_code == 0: + rocm_data.clinfo = [ + strip_ansi_codes(line) for line in clinfo_res.stdout.strip().split("\n") + ] + rocminfo_artifact_content += clinfo_res.stdout + else: + # Add error information if clinfo failed + rocminfo_artifact_content += f"Command: {clinfo_res.command}\n" + rocminfo_artifact_content += f"Exit Code: {clinfo_res.exit_code}\n" + if clinfo_res.stderr: + rocminfo_artifact_content += f"Error: {clinfo_res.stderr}\n" + if clinfo_res.stdout: + rocminfo_artifact_content += f"Output: {clinfo_res.stdout}\n" + + # Add combined rocminfo and clinfo output as a text file artifact + if rocminfo_artifact_content: + self.result.artifacts.append( + TextFileArtifact(filename="rocminfo.log", contents=rocminfo_artifact_content) + ) + + # Collect KFD process list + kfd_proc_res = self._run_sut_cmd(self.CMD_KFD_PROC) + if kfd_proc_res.exit_code == 0: + rocm_data.kfd_proc = [ + proc.strip() for proc in kfd_proc_res.stdout.strip().split("\n") if proc.strip() + ] + if not rocm_data: self._log_event( category=EventCategory.OS, diff --git a/nodescraper/plugins/inband/rocm/rocmdata.py b/nodescraper/plugins/inband/rocm/rocmdata.py index 2c5388e8..f0fb2618 100644 --- a/nodescraper/plugins/inband/rocm/rocmdata.py +++ b/nodescraper/plugins/inband/rocm/rocmdata.py @@ -24,6 +24,7 @@ # ############################################################################### import re +from typing import List from pydantic import field_validator @@ -32,6 +33,14 @@ class RocmDataModel(DataModel): rocm_version: str + rocminfo: List[str] = [] + rocm_latest_versioned_path: str = "" + rocm_all_paths: List[str] = [] + ld_conf_rocm: List[str] = [] + rocm_libs: List[str] = [] + env_vars: List[str] = [] + clinfo: List[str] = [] + kfd_proc: List[str] = [] @field_validator("rocm_version") @classmethod diff --git a/nodescraper/utils.py b/nodescraper/utils.py index ceaccea3..c76470de 100644 --- a/nodescraper/utils.py +++ b/nodescraper/utils.py @@ -245,3 +245,17 @@ def nice_rotated_name(path: str, stem: str, prefix: str = "rotated_") -> str: middle = base[:-3] if base.endswith(".gz") else base return f"{prefix}{middle}.log" + + +def strip_ansi_codes(text: str) -> str: + """ + Remove ANSI escape codes from text. + + Args: + text (str): The text string containing ANSI escape codes. + + Returns: + str: The text with ANSI escape codes removed. + """ + ansi_escape = re.compile(r"\x1b\[[0-9;]*m") + return ansi_escape.sub("", text) diff --git a/test/unit/plugin/test_rocm_analyzer.py b/test/unit/plugin/test_rocm_analyzer.py index 18afed9d..9ecc7fb4 100644 --- a/test/unit/plugin/test_rocm_analyzer.py +++ b/test/unit/plugin/test_rocm_analyzer.py @@ -42,7 +42,7 @@ def analyzer(system_info): @pytest.fixture def model_obj(): - return RocmDataModel(rocm_version="6.2.0-66") + return RocmDataModel(rocm_version="6.2.0-66", rocm_latest_versioned_path="/opt/rocm-7.1.0") @pytest.fixture @@ -50,14 +50,16 @@ def config(): return { "rocm_version": ["6.2.0-66"], "invalid": "invalid", + "rocm_latest": "/opt/rocm-7.1.0", } def test_all_good_data(analyzer, model_obj, config): - args = RocmAnalyzerArgs(exp_rocm=config["rocm_version"]) + args = RocmAnalyzerArgs(exp_rocm=config["rocm_version"], exp_rocm_latest=config["rocm_latest"]) result = analyzer.analyze_data(model_obj, args) assert result.status == ExecutionStatus.OK - assert result.message == "ROCm version matches expected" + assert "ROCm version matches expected" in result.message + assert "ROCm latest path validated" in result.message assert all( event.priority not in {EventPriority.WARNING, EventPriority.ERROR, EventPriority.CRITICAL} for event in result.events @@ -94,3 +96,16 @@ def test_unexpected_rocm_version(analyzer, model_obj): def test_invalid_user_config(analyzer, model_obj, config): result = analyzer.analyze_data(model_obj, None) assert result.status == ExecutionStatus.NOT_RAN + + +def test_rocm_latest_path_mismatch(analyzer, model_obj): + """Test that rocm_latest path mismatch is detected and logged""" + args = RocmAnalyzerArgs(exp_rocm=["6.2.0-66"], exp_rocm_latest="/opt/rocm-6.2.0") + result = analyzer.analyze_data(model_obj, args) + assert result.status == ExecutionStatus.ERROR + assert "ROCm latest path mismatch" in result.message + assert "/opt/rocm-6.2.0" in result.message # expected + assert "/opt/rocm-7.1.0" in result.message # actual + for event in result.events: + assert event.priority == EventPriority.CRITICAL + assert event.category == EventCategory.SW_DRIVER.value diff --git a/test/unit/plugin/test_rocm_collector.py b/test/unit/plugin/test_rocm_collector.py index 60e63f28..2b419ad1 100644 --- a/test/unit/plugin/test_rocm_collector.py +++ b/test/unit/plugin/test_rocm_collector.py @@ -23,68 +23,266 @@ # SOFTWARE. # ############################################################################### -import copy +from unittest.mock import MagicMock import pytest from nodescraper.enums.eventcategory import EventCategory -from nodescraper.enums.eventpriority import EventPriority from nodescraper.enums.executionstatus import ExecutionStatus -from nodescraper.plugins.inband.rocm.analyzer_args import RocmAnalyzerArgs -from nodescraper.plugins.inband.rocm.rocm_analyzer import RocmAnalyzer -from nodescraper.plugins.inband.rocm.rocmdata import RocmDataModel +from nodescraper.enums.systeminteraction import SystemInteractionLevel +from nodescraper.plugins.inband.rocm.rocm_collector import RocmCollector @pytest.fixture -def model_obj(): - return RocmDataModel(rocm_version="6.2.0-66") +def collector(system_info, conn_mock): + return RocmCollector( + system_info=system_info, + system_interaction_level=SystemInteractionLevel.PASSIVE, + connection=conn_mock, + ) -@pytest.fixture -def analyzer(system_info): - return RocmAnalyzer(system_info=system_info) +def test_collect_rocm_version_success(collector): + """Test successful collection of ROCm version from version-rocm file""" + collector._run_sut_cmd = MagicMock( + return_value=MagicMock( + exit_code=0, + stdout="6.2.0-66", + command="grep . /opt/rocm/.info/version-rocm", + ) + ) + result, data = collector.collect_data() -def test_all_good_data(analyzer, model_obj): - args = RocmAnalyzerArgs(exp_rocm=["6.2.0-66"]) - result = analyzer.analyze_data(model_obj, args=args) assert result.status == ExecutionStatus.OK - assert result.message == "ROCm version matches expected" - assert all( - event.priority not in [EventPriority.WARNING, EventPriority.ERROR, EventPriority.CRITICAL] - for event in result.events + assert data is not None + assert data.rocm_version == "6.2.0-66" + assert "ROCm version: 6.2.0-66" in result.message + + +def test_collect_rocm_version_fallback(collector): + """Test fallback to version file when version-rocm fails""" + collector._run_sut_cmd = MagicMock( + side_effect=[ + MagicMock(exit_code=1, stdout="", command="grep . /opt/rocm/.info/version-rocm"), + MagicMock(exit_code=0, stdout="6.2.0-66", command="grep . /opt/rocm/.info/version"), + # Additional commands after finding version + MagicMock(exit_code=1, stdout=""), # latest path + MagicMock(exit_code=1, stdout=""), # all paths + MagicMock(exit_code=1, stdout=""), # rocminfo + MagicMock(exit_code=1, stdout=""), # ld.so.conf + MagicMock(exit_code=1, stdout=""), # rocm_libs + MagicMock(exit_code=1, stdout=""), # env_vars + MagicMock(exit_code=1, stdout=""), # clinfo + MagicMock(exit_code=1, stdout=""), # kfd_proc + ] ) + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert data is not None + assert data.rocm_version == "6.2.0-66" -def test_no_config_data(analyzer, model_obj): - result = analyzer.analyze_data(model_obj) - assert result.status == ExecutionStatus.NOT_RAN +def test_collect_rocm_version_not_found(collector): + """Test when ROCm version cannot be found""" + collector._run_sut_cmd = MagicMock( + return_value=MagicMock( + exit_code=1, + stdout="", + stderr="No such file or directory", + command="grep . /opt/rocm/.info/version-rocm", + ) + ) -def test_invalid_rocm_version(analyzer, model_obj): - modified_model = copy.deepcopy(model_obj) - modified_model.rocm_version = "some_invalid_version" - args = RocmAnalyzerArgs(exp_rocm=["6.2.0-66"]) - result = analyzer.analyze_data(modified_model, args=args) + result, data = collector.collect_data() assert result.status == ExecutionStatus.ERROR - assert "ROCm version mismatch!" in result.message - for event in result.events: - assert event.priority == EventPriority.CRITICAL - assert event.category == EventCategory.SW_DRIVER.value + assert data is None + assert "ROCm version not found" in result.message + assert any(event.category == EventCategory.OS.value for event in result.events) -def test_unexpected_rocm_version(analyzer, model_obj): - args = RocmAnalyzerArgs(exp_rocm=["9.8.7-65", "1.2.3-45"]) - result = analyzer.analyze_data(model_obj, args=args) +def test_collect_all_rocm_data(collector): + """Test collection of all ROCm data including tech support commands""" + # Mock all command outputs in sequence + collector._run_sut_cmd = MagicMock( + side_effect=[ + # ROCm version + MagicMock(exit_code=0, stdout="6.2.0-66"), + # Latest versioned path + MagicMock(exit_code=0, stdout="/opt/rocm-1.1.0"), + # All ROCm paths + MagicMock(exit_code=0, stdout="/opt/rocm\n/opt/rocm-1.2.3\n/opt/rocm-5.6.0"), + # rocminfo output + MagicMock( + exit_code=0, + stdout="ROCk module is loaded\nAgent 1\n Name: AMD Instinct MI1234XYZ\n Marketing Name: MI1234XYZ", + ), + # ld.so.conf entries + MagicMock( + exit_code=0, + stdout="/etc/ld.so.conf.d/10-rocm-opencl.conf:/opt/rocm-7.0.0/lib\n/etc/ld.so.conf.d/10-rocm-opencl.conf:/opt/rocm-7.0.0/lib64", + ), + # ROCm libraries from ldconfig + MagicMock( + exit_code=0, + stdout="librocm_smi64.so.7 (libc6,x86-64) => /opt/rocm/lib/librocm_smi64.so.7\nlibhsa-runtime64.so.1 (libc6,x86-64) => /opt/rocm/lib/libhsa-runtime64.so.1", + ), + # Environment variables + MagicMock( + exit_code=0, + stdout="ROCM_PATH=/opt/rocm\nSLURM_MPI_TYPE=pmi2\n__LMOD_REF_COUNT_MODULEPATH=/share/contrib-modules/.mfiles/Core:1\nMODULEPATH=/share/contrib-modules/", + ), + # clinfo output + MagicMock( + exit_code=0, + stdout="Number of platforms: 1\nPlatform Name: AMD Accelerated Parallel Processing\nPlatform Vendor: Advanced Micro Devices, Inc.\nPlatform Version: OpenCL 2.0 AMD-APP (XXXX.X)\nPlatform Profile: FULL_PROFILE\nPlatform Extensions: cl_khr_icd cl_khr_il_program", + ), + # KFD process list + MagicMock(exit_code=0, stdout="1234\n5678"), + ] + ) - assert result.status == ExecutionStatus.ERROR - assert "ROCm version mismatch!" in result.message - for event in result.events: - assert event.priority == EventPriority.CRITICAL - assert event.category == EventCategory.SW_DRIVER.value + result, data = collector.collect_data() + + # Verify result status + assert result.status == ExecutionStatus.OK + assert data is not None + + # Verify ROCm version + assert data.rocm_version == "6.2.0-66" + + # Verify ROCm latest path + assert data.rocm_latest_versioned_path == "/opt/rocm-1.1.0" + + # Verify all ROCm paths + assert data.rocm_all_paths == ["/opt/rocm", "/opt/rocm-1.2.3", "/opt/rocm-5.6.0"] + + # Verify rocminfo output + assert len(data.rocminfo) == 4 + assert "ROCk module is loaded" in data.rocminfo[0] + assert "AMD Instinct MI1234XYZ" in data.rocminfo[2] + + # Verify ld.so.conf entries + assert len(data.ld_conf_rocm) == 2 + assert "/etc/ld.so.conf.d/10-rocm-opencl.conf:/opt/rocm-7.0.0/lib" in data.ld_conf_rocm + assert "/etc/ld.so.conf.d/10-rocm-opencl.conf:/opt/rocm-7.0.0/lib64" in data.ld_conf_rocm + + # Verify ROCm libraries + assert len(data.rocm_libs) == 2 + assert any("librocm_smi64" in lib for lib in data.rocm_libs) + assert any("libhsa-runtime64" in lib for lib in data.rocm_libs) + + # Verify environment variables + assert len(data.env_vars) == 4 + assert "ROCM_PATH=/opt/rocm" in data.env_vars + assert "MODULEPATH=/share/contrib-modules/" in data.env_vars + + # Verify clinfo output + assert len(data.clinfo) == 6 + assert "AMD Accelerated Parallel Processing" in data.clinfo[1] + # Verify KFD process list + assert len(data.kfd_proc) == 2 + assert "1234" in data.kfd_proc + assert "5678" in data.kfd_proc -def test_invalid_user_config(analyzer, model_obj): - result = analyzer.analyze_data(model_obj, None) - assert result.status == ExecutionStatus.NOT_RAN + # Verify artifact was created + assert len(result.artifacts) == 1 + assert result.artifacts[0].filename == "rocminfo.log" + assert "ROCMNFO OUTPUT" in result.artifacts[0].contents + assert "CLINFO OUTPUT" in result.artifacts[0].contents + + +def test_collect_with_clinfo_failure(collector): + """Test that clinfo failure is handled gracefully and captured in artifact""" + collector._run_sut_cmd = MagicMock( + side_effect=[ + # ROCm version + MagicMock(exit_code=0, stdout="6.2.0-66"), + # Latest versioned path + MagicMock(exit_code=0, stdout="/opt/rocm-7.1.0"), + # All ROCm paths + MagicMock(exit_code=0, stdout="/opt/rocm"), + # rocminfo success + MagicMock(exit_code=0, stdout="ROCk module loaded"), + # Other commands + MagicMock(exit_code=1, stdout=""), + MagicMock(exit_code=1, stdout=""), + MagicMock(exit_code=1, stdout=""), + # clinfo failure + MagicMock( + exit_code=127, + stdout="", + stderr="No such file or directory", + command="/opt/rocm-7.1.0/opencl/bin/*/clinfo", + ), + # kfd_proc + MagicMock(exit_code=0, stdout=""), + ] + ) + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert data.clinfo == [] + + # Verify artifact contains error information + assert len(result.artifacts) == 1 + artifact_content = result.artifacts[0].contents + assert "CLINFO OUTPUT" in artifact_content + assert "Exit Code: 127" in artifact_content + assert "No such file or directory" in artifact_content + + +def test_collect_minimal_data(collector): + """Test collection when only version is available""" + collector._run_sut_cmd = MagicMock( + side_effect=[ + # ROCm version + MagicMock(exit_code=0, stdout="6.2.0-66"), + # All subsequent commands fail + MagicMock(exit_code=1, stdout=""), # latest path + MagicMock(exit_code=1, stdout=""), # all paths + MagicMock(exit_code=1, stdout=""), # rocminfo + MagicMock(exit_code=1, stdout=""), # ld.so.conf + MagicMock(exit_code=1, stdout=""), # rocm_libs + MagicMock(exit_code=1, stdout=""), # env_vars + MagicMock(exit_code=1, stdout=""), # clinfo + MagicMock(exit_code=1, stdout=""), # kfd_proc + ] + ) + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert data is not None + assert data.rocm_version == "6.2.0-66" + + # Verify optional fields have default values + assert data.rocm_latest_versioned_path == "" + assert data.rocm_all_paths == [] + assert data.rocminfo == [] + assert data.ld_conf_rocm == [] + assert data.rocm_libs == [] + assert data.env_vars == [] + assert data.clinfo == [] + assert data.kfd_proc == [] + + +def test_invalid_rocm_version_format(collector): + """Test that invalid ROCm version format is handled gracefully""" + collector._run_sut_cmd = MagicMock( + return_value=MagicMock( + exit_code=0, + stdout="invalid_version_format", + ) + ) + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.ERROR + assert data is None + assert len(result.events) >= 1