Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}] [--
[--sys-sku STRING] [--sys-platform STRING] [--plugin-configs [STRING ...]] [--system-config STRING]
[--connection-config STRING] [--log-path STRING] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
[--gen-reference-config]
{run-plugins,describe,gen-plugin-config} ...
{summary,run-plugins,describe,gen-plugin-config} ...

node scraper CLI

positional arguments:
{run-plugins,describe,gen-plugin-config}
{summary,run-plugins,describe,gen-plugin-config}
Subcommands
summary Generates summary csv file
run-plugins Run a series of plugins
describe Display details on a built-in config or plugin
gen-plugin-config Generate a config for a plugin or list of plugins
Expand All @@ -38,7 +39,8 @@ options:
--sys-location {LOCAL,REMOTE}
Location of target system (default: LOCAL)
--sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}
Specify system interaction level, used to determine the type of actions that plugins can perform (default: INTERACTIVE)
Specify system interaction level, used to determine the type of actions that plugins can perform (default:
INTERACTIVE)
--sys-sku STRING Manually specify SKU of system (default: None)
--sys-platform STRING
Specify system platform (default: None)
Expand All @@ -54,7 +56,6 @@ options:
--gen-reference-config
Generate reference config from system. Writes to ./reference_config.json. (default: False)


```

### Subcommmands
Expand Down Expand Up @@ -167,6 +168,16 @@ This would produce the following config:
}
```

4. **'summary' sub command**
The 'summary' subcommand can be used to combine results from multiple runs of node-scraper to a
single summary.csv file. Sample run:
```sh
node-scraper summary --summary_path /<path_to_node-scraper_logs>
```
This will generate a new file '/<path_to_node-scraper_logs>/summary.csv' file. This file will
contain the results from all 'nodescraper.csv' files from '/<path_to_node-scarper_logs>'.


### Plugin Configs
A plugin JSON config should follow the structure of the plugin config model defined here.
The globals field is a dictionary of global key-value pairs; values in globals will be passed to
Expand Down
34 changes: 31 additions & 3 deletions nodescraper/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@
from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP
from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
from nodescraper.cli.helper import (
dump_results_to_csv,
generate_reference_config,
generate_reference_config_from_logs,
generate_summary,
get_plugin_configs,
get_system_info,
log_system_info,
Expand Down Expand Up @@ -154,6 +156,25 @@ def build_parser(

subparsers = parser.add_subparsers(dest="subcmd", help="Subcommands")

summary_parser = subparsers.add_parser(
"summary",
help="Generates summary csv file",
)

summary_parser.add_argument(
"--search-path",
dest="search_path",
type=log_path_arg,
help="Path to node-scraper previously generated results.",
)

summary_parser.add_argument(
"--output-path",
dest="output_path",
type=log_path_arg,
help="Specifies path for summary.csv.",
)

run_plugin_parser = subparsers.add_parser(
"run-plugins",
help="Run a series of plugins",
Expand Down Expand Up @@ -249,7 +270,7 @@ def setup_logger(log_level: str = "INFO", log_path: str | None = None) -> loggin
handlers = [logging.StreamHandler(stream=sys.stdout)]

if log_path:
log_file_name = os.path.join(log_path, "errorscraper.log")
log_file_name = os.path.join(log_path, "nodescraper.log")
handlers.append(
logging.FileHandler(filename=log_file_name, mode="wt", encoding="utf-8"),
)
Expand Down Expand Up @@ -327,12 +348,13 @@ def main(arg_input: Optional[list[str]] = None):

parsed_args = parser.parse_args(top_level_args)
system_info = get_system_info(parsed_args)
sname = system_info.name.lower().replace("-", "_").replace(".", "_")
timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")

if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]:
sname = system_info.name.lower().replace("-", "_").replace(".", "_")
log_path = os.path.join(
parsed_args.log_path,
f"scraper_logs_{sname}_{datetime.datetime.now().strftime('%Y_%m_%d-%I_%M_%S_%p')}",
f"scraper_logs_{sname}_{timestamp}",
)
os.makedirs(log_path)
else:
Expand All @@ -342,6 +364,10 @@ def main(arg_input: Optional[list[str]] = None):
if log_path:
logger.info("Log path: %s", log_path)

if parsed_args.subcmd == "summary":
generate_summary(parsed_args.search_path, parsed_args.output_path, logger)
sys.exit(0)

if parsed_args.subcmd == "describe":
parse_describe(parsed_args, plugin_reg, config_reg, logger)

Expand Down Expand Up @@ -407,6 +433,8 @@ def main(arg_input: Optional[list[str]] = None):
try:
results = plugin_executor.run_queue()

dump_results_to_csv(results, sname, log_path, timestamp, logger)

if parsed_args.reference_config:
ref_config = generate_reference_config(results, plugin_reg, logger)
path = os.path.join(os.getcwd(), "reference_config.json")
Expand Down
90 changes: 90 additions & 0 deletions nodescraper/cli/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#
###############################################################################
import argparse
import csv
import glob
import json
import logging
import os
Expand Down Expand Up @@ -424,3 +426,91 @@ def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]:
tuple_list.append((datamodel_path, result_path))

return tuple_list


def dump_results_to_csv(
results: list[PluginResult],
nodename: str,
log_path: str,
timestamp: str,
logger: logging.Logger,
):
"""dump node-scraper summary results to csv file

Args:
results (list[PluginResult]): list of PluginResults
nodename (str): node where results come from
log_path (str): path to results
timestamp (str): time when results were taken
logger (logging.Logger): instance of logger
"""
fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
filename = log_path + "/nodescraper.csv"
all_rows = []
for res in results:
row = {
"nodename": nodename,
"plugin": res.source,
"status": res.status.name,
"timestamp": timestamp,
"message": res.message,
}
all_rows.append(row)
dump_to_csv(all_rows, filename, fieldnames, logger)


def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: logging.Logger):
"""dump data to csv

Args:
all_rows (list): rows to be written
filename (str): name of file to write to
fieldnames (list[str]): header for csv file
logger (logging.Logger): isntance of logger
"""
try:
with open(filename, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in all_rows:
writer.writerow(row)
except Exception as exp:
logger.error("Could not dump data to csv file: %s", exp)
logger.info("Data written to csv file: %s", filename)


def generate_summary(search_path: str, output_path: str | None, logger: logging.Logger):
"""Concatenate csv files into 1 summary csv file

Args:
search_path (str): Path for previous runs
output_path (str | None): Path for new summary csv file
logger (logging.Logger): instance of logger
"""

fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
all_rows = []

pattern = os.path.join(search_path, "**", "nodescraper.csv")
matched_files = glob.glob(pattern, recursive=True)

if not matched_files:
logger.error(f"No nodescraper.csv files found under {search_path}")
return

for filepath in matched_files:
logger.info(f"Reading: {filepath}")
with open(filepath, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
all_rows.append(row)

if not all_rows:
logger.error("No data rows found in matched CSV files.")
return

if not output_path:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If output path can be None, the type hint should be updated accordingly to Optional[output_path]

output_path = os.getcwd()

output_path = os.path.join(output_path, "summary.csv")
dump_to_csv(all_rows, output_path, fieldnames, logger)
78 changes: 78 additions & 0 deletions test/unit/framework/test_cli_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#
###############################################################################
import argparse
import csv
import json
import logging
import os
Expand All @@ -39,7 +40,10 @@
from nodescraper.cli import cli
from nodescraper.cli.helper import (
build_config,
dump_results_to_csv,
dump_to_csv,
find_datamodel_and_result,
generate_summary,
)
from nodescraper.configregistry import ConfigRegistry
from nodescraper.enums import ExecutionStatus, SystemInteractionLevel
Expand Down Expand Up @@ -181,3 +185,77 @@ def build_from_model(cls, datamodel):
assert isinstance(cfg, PluginConfig)
assert set(cfg.plugins) == {parent}
assert cfg.plugins[parent]["analysis_args"] == {}


def test_dump_to_csv(tmp_path):
logger = logging.getLogger()
data = [
{
"nodename": "node1",
"plugin": "TestPlugin",
"status": "OK",
"timestamp": "2025_07_16-12_00_00_PM",
"message": "Success",
}
]
filename = tmp_path / "test.csv"
fieldnames = list(data[0].keys())

dump_to_csv(data, str(filename), fieldnames, logger)

with open(filename, newline="") as f:
reader = list(csv.DictReader(f))
assert reader == data


def test_dump_results_to_csv(tmp_path, caplog):
logger = logging.getLogger()

result = PluginResult(
source="TestPlugin", status=ExecutionStatus.OK, message="some message", result_data={}
)

dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger)

out_file = tmp_path / "nodescraper.csv"
assert out_file.exists()

with open(out_file, newline="") as f:
reader = list(csv.DictReader(f))
assert reader[0]["nodename"] == "node123"
assert reader[0]["plugin"] == "TestPlugin"
assert reader[0]["status"] == "OK"
assert reader[0]["message"] == "some message"


def test_generate_summary(tmp_path):
logger = logging.getLogger()

subdir = tmp_path / "sub"
subdir.mkdir()

errorscraper_path = subdir / "nodescraper.csv"
with open(errorscraper_path, "w", newline="") as f:
writer = csv.DictWriter(
f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"]
)
writer.writeheader()
writer.writerow(
{
"nodename": "nodeX",
"plugin": "PluginA",
"status": "OK",
"timestamp": "2025_07_16-01_00_00_PM",
"message": "some message",
}
)

generate_summary(str(tmp_path), str(tmp_path), logger)

summary_path = tmp_path / "summary.csv"
assert summary_path.exists()

with open(summary_path, newline="") as f:
rows = list(csv.DictReader(f))
assert len(rows) == 1
assert rows[0]["plugin"] == "PluginA"