Skip to content
2 changes: 2 additions & 0 deletions nodescraper/interfaces/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ def _build_event(
data = {"task_name": self.__class__.__name__, "task_type": self.TASK_TYPE}

else:
# Copy to avoid mutating the caller's dict
data = copy.copy(data)
data["task_name"] = self.__class__.__name__
data["task_type"] = self.TASK_TYPE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from typing import Optional

from nodescraper.base import InBandDataCollector
from nodescraper.connection.inband.inband import CommandArtifact
from nodescraper.connection.inband.inband import CommandArtifact, TextFileArtifact
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
from nodescraper.models import TaskResult

Expand All @@ -38,9 +38,10 @@ class DeviceEnumerationCollector(InBandDataCollector[DeviceEnumerationDataModel,

DATA_MODEL = DeviceEnumerationDataModel

CMD_CPU_COUNT_LINUX = "lscpu | grep Socket | awk '{ print $2 }'"
CMD_GPU_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'VGA\\|Display\\|3D' | wc -l"
CMD_VF_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'Virtual Function' | wc -l"
CMD_LSCPU_LINUX = "lscpu"
CMD_LSHW_LINUX = "lshw"

CMD_CPU_COUNT_WINDOWS = (
'powershell -Command "(Get-WmiObject -Class Win32_Processor | Measure-Object).Count"'
Expand All @@ -61,9 +62,8 @@ def _warning(
description=description,
data={
"command": command.command,
"stdout": command.stdout,
"stderr": command.stderr,
"exit_code": command.exit_code,
"stderr": command.stderr,
},
priority=EventPriority.WARNING,
)
Expand All @@ -75,8 +75,7 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio
On Windows, use WMI and hyper-v cmdlets
"""
if self.system_info.os_family == OSFamily.LINUX:
# Count CPU sockets
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_LINUX)
lscpu_res = self._run_sut_cmd(self.CMD_LSCPU_LINUX, log_artifact=False)

# Count all AMD GPUs
vendor_id = format(self.system_info.vendorid_ep, "x")
Expand All @@ -86,17 +85,42 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio

# Count AMD Virtual Functions
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_LINUX.format(vendorid_ep=vendor_id))

# Collect lshw output
lshw_res = self._run_sut_cmd(self.CMD_LSHW_LINUX, sudo=True, log_artifact=False)
else:
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_WINDOWS)
gpu_count_res = self._run_sut_cmd(self.CMD_GPU_COUNT_WINDOWS)
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_WINDOWS)

device_enum = DeviceEnumerationDataModel()

if cpu_count_res.exit_code == 0:
device_enum.cpu_count = int(cpu_count_res.stdout)
if self.system_info.os_family == OSFamily.LINUX:
if lscpu_res.exit_code == 0 and lscpu_res.stdout:
# Extract socket count from lscpu output
for line in lscpu_res.stdout.splitlines():
if line.startswith("Socket(s):"):
try:
device_enum.cpu_count = int(line.split(":")[1].strip())
break
except (ValueError, IndexError):
self._warning(
description="Cannot parse CPU count from lscpu output",
command=lscpu_res,
)
device_enum.lscpu_output = lscpu_res.stdout
self._log_event(
category=EventCategory.PLATFORM,
description="Collected lscpu output",
priority=EventPriority.INFO,
)
else:
self._warning(description="Cannot collect lscpu output", command=lscpu_res)
else:
self._warning(description="Cannot determine CPU count", command=cpu_count_res)
if cpu_count_res.exit_code == 0:
device_enum.cpu_count = int(cpu_count_res.stdout)
else:
self._warning(description="Cannot determine CPU count", command=cpu_count_res)

if gpu_count_res.exit_code == 0:
device_enum.gpu_count = int(gpu_count_res.stdout)
Expand All @@ -112,14 +136,33 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio
category=EventCategory.SW_DRIVER,
)

# Collect lshw output on Linux
if self.system_info.os_family == OSFamily.LINUX:
if lshw_res.exit_code == 0 and lshw_res.stdout:
device_enum.lshw_output = lshw_res.stdout
self.result.artifacts.append(
TextFileArtifact(filename="lshw.txt", contents=lshw_res.stdout)
)
self._log_event(
category=EventCategory.PLATFORM,
description="Collected lshw output",
priority=EventPriority.INFO,
)
else:
self._warning(description="Cannot collect lshw output", command=lshw_res)

if device_enum.cpu_count or device_enum.gpu_count or device_enum.vf_count:
log_data = device_enum.model_dump(
exclude_none=True,
exclude={"lscpu_output", "lshw_output", "task_name", "task_type", "parent"},
)
self._log_event(
category=EventCategory.PLATFORM,
description=f"Counted {device_enum.cpu_count} CPUs, {device_enum.gpu_count} GPUs, {device_enum.vf_count} VFs",
data=device_enum.model_dump(exclude_none=True),
data=log_data,
priority=EventPriority.INFO,
)
self.result.message = f"Device Enumeration: {device_enum.model_dump(exclude_none=True)}"
self.result.message = f"Device Enumeration: {log_data}"
self.result.status = ExecutionStatus.OK
return self.result, device_enum
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ class DeviceEnumerationDataModel(DataModel):
cpu_count: Optional[int] = None
gpu_count: Optional[int] = None
vf_count: Optional[int] = None
lscpu_output: Optional[str] = None
lshw_output: Optional[str] = None
31 changes: 26 additions & 5 deletions test/unit/plugin/test_device_enumeration_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,16 @@ def test_collect_linux(system_info, device_enumeration_collector):
"""Test linux typical output"""
system_info.os_family = OSFamily.LINUX

lscpu_output = "Architecture: x86_64\nCPU(s): 64\nSocket(s): 2"
lshw_output = "*-cpu\n product: AMD EPYC 1234 64-Core Processor"

device_enumeration_collector._run_sut_cmd = MagicMock(
side_effect=[
MagicMock(
exit_code=0,
stdout="2",
stdout=lscpu_output,
stderr="",
command="lscpu | grep Socket | awk '{ print $2 }'",
command="lscpu",
),
MagicMock(
exit_code=0,
Expand All @@ -71,12 +74,24 @@ def test_collect_linux(system_info, device_enumeration_collector):
stderr="",
command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l",
),
MagicMock(
exit_code=0,
stdout=lshw_output,
stderr="",
command="lshw",
),
]
)

result, data = device_enumeration_collector.collect_data()
assert result.status == ExecutionStatus.OK
assert data == DeviceEnumerationDataModel(cpu_count=2, gpu_count=8, vf_count=0)
assert data == DeviceEnumerationDataModel(
cpu_count=2, gpu_count=8, vf_count=0, lscpu_output=lscpu_output, lshw_output=lshw_output
)
assert (
len([a for a in result.artifacts if hasattr(a, "filename") and a.filename == "lshw.txt"])
== 1
)


def test_collect_windows(system_info, device_enumeration_collector):
Expand Down Expand Up @@ -119,9 +134,9 @@ def test_collect_error(system_info, device_enumeration_collector):
side_effect=[
MagicMock(
exit_code=1,
stdout="some output",
stdout="",
stderr="command failed",
command="lscpu | grep Socket | awk '{ print $2 }'",
command="lscpu",
),
MagicMock(
exit_code=1,
Expand All @@ -135,6 +150,12 @@ def test_collect_error(system_info, device_enumeration_collector):
stderr="command failed",
command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l",
),
MagicMock(
exit_code=1,
stdout="",
stderr="command failed",
command="lshw",
),
]
)

Expand Down