From d81cc26489d77eb050cb26dd5f3640186cf8b3d2 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 17 Jul 2025 11:39:23 -0500 Subject: [PATCH 1/5] first pass + utest --- nodescraper/cli/cli.py | 25 +++++++- nodescraper/cli/helper.py | 75 +++++++++++++++++++++++ test/unit/framework/test_cli_helper.py | 83 +++++++++++++++++++++++++- 3 files changed, 180 insertions(+), 3 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 5bfdd30d..9265b004 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -35,8 +35,10 @@ from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder from nodescraper.cli.helper import ( + dump_results_to_csv, generate_reference_config, generate_reference_config_from_logs, + generate_summary, get_plugin_configs, get_system_info, log_system_info, @@ -154,6 +156,18 @@ def build_parser( subparsers = parser.add_subparsers(dest="subcmd", help="Subcommands") + summary_parser = subparsers.add_parser( + "summary", + help="Generates summary csv file", + ) + + summary_parser.add_argument( + "--summary_path", + dest="summary_path", + type=log_path_arg, + help="Path to node-scraper results. Generates summary csv file in summary.csv.", + ) + run_plugin_parser = subparsers.add_parser( "run-plugins", help="Run a series of plugins", @@ -327,12 +341,13 @@ def main(arg_input: Optional[list[str]] = None): parsed_args = parser.parse_args(top_level_args) system_info = get_system_info(parsed_args) + sname = system_info.name.lower().replace("-", "_").replace(".", "_") + timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]: - sname = system_info.name.lower().replace("-", "_").replace(".", "_") log_path = os.path.join( parsed_args.log_path, - f"scraper_logs_{sname}_{datetime.datetime.now().strftime('%Y_%m_%d-%I_%M_%S_%p')}", + f"scraper_logs_{sname}_{timestamp}", ) os.makedirs(log_path) else: @@ -342,6 +357,10 @@ def main(arg_input: Optional[list[str]] = None): if log_path: logger.info("Log path: %s", log_path) + if parsed_args.subcmd == "summary": + generate_summary(parsed_args.summary_path, logger) + sys.exit(0) + if parsed_args.subcmd == "describe": parse_describe(parsed_args, plugin_reg, config_reg, logger) @@ -407,6 +426,8 @@ def main(arg_input: Optional[list[str]] = None): try: results = plugin_executor.run_queue() + dump_results_to_csv(results, sname, log_path, timestamp, logger) + if parsed_args.reference_config: ref_config = generate_reference_config(results, plugin_reg, logger) path = os.path.join(os.getcwd(), "reference_config.json") diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index aa6ea854..403d5810 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -24,6 +24,8 @@ # ############################################################################### import argparse +import csv +import glob import json import logging import os @@ -422,3 +424,76 @@ def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]: tuple_list.append((datamodel_path, result_path)) return tuple_list + + +def dump_results_to_csv( + results: list[PluginResult], + nodename: str, + log_path: str, + timestamp: str, + logger: logging.Logger, +): + """dump node-scraper summary results to csv file + + Args: + results (list[PluginResult]): list of PluginResults + nodename (str): node where results come from + log_path (str): path to results + timestamp (str): time when results were taken + logger (logging.Logger): instance of logger + """ + fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] + filename = log_path + "/errorscraper.csv" + all_rows = [] + for res in results: + row = { + "nodename": nodename, + "plugin": res.source, + "status": res.status.name, + "timestamp": timestamp, + "message": res.message, + } + all_rows.append(row) + dump_to_csv(all_rows, filename, fieldnames, logger) + + +def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: logging.Logger): + """dump data to csv + + Args: + all_rows (list): rows to be written + filename (str): name of file to write to + fieldnames (list[str]): header for csv file + logger (logging.Logger): isntance of logger + """ + try: + with open(filename, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for row in all_rows: + writer.writerow(row) + except Exception as exp: + logger.error("Could not dump data to csv file: %s", exp) + logger.info("Data written to csv file: %s", filename) + + +def generate_summary(base_path: str, logger: logging.Logger): + """Concatenate csv files into 1 summary csv file + + Args: + base_path (str): base path to look for csv files + logger (logging.Logger): instance of logger + """ + fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] + all_rows = [] + + pattern = os.path.join(base_path, "**", "errorscraper.csv") + for filepath in glob.glob(pattern, recursive=True): + logger.info(f"Reading: {filepath}") + with open(filepath, newline="") as f: + reader = csv.DictReader(f) + for row in reader: + all_rows.append(row) + + output_path = os.path.join(base_path, "summary.csv") + dump_to_csv(all_rows, output_path, fieldnames, logger) diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py index 8ff75f5f..e505b1d6 100644 --- a/test/unit/framework/test_cli_helper.py +++ b/test/unit/framework/test_cli_helper.py @@ -24,6 +24,7 @@ # ############################################################################### import argparse +import csv import json import logging import os @@ -35,7 +36,13 @@ from pydantic import BaseModel from nodescraper.cli import cli -from nodescraper.cli.helper import build_config, find_datamodel_and_result +from nodescraper.cli.helper import ( + build_config, + dump_results_to_csv, + dump_to_csv, + find_datamodel_and_result, + generate_summary, +) from nodescraper.configregistry import ConfigRegistry from nodescraper.enums import ExecutionStatus, SystemInteractionLevel from nodescraper.models import PluginConfig, TaskResult @@ -176,3 +183,77 @@ def build_from_model(cls, datamodel): assert isinstance(cfg, PluginConfig) assert set(cfg.plugins) == {parent} assert cfg.plugins[parent]["analysis_args"] == {} + + +def test_dump_to_csv(tmp_path): + logger = logging.getLogger() + data = [ + { + "nodename": "node1", + "plugin": "TestPlugin", + "status": "OK", + "timestamp": "2025_07_16-12_00_00_PM", + "message": "Success", + } + ] + filename = tmp_path / "test.csv" + fieldnames = list(data[0].keys()) + + dump_to_csv(data, str(filename), fieldnames, logger) + + with open(filename, newline="") as f: + reader = list(csv.DictReader(f)) + assert reader == data + + +def test_dump_results_to_csv(tmp_path, caplog): + logger = logging.getLogger() + + result = PluginResult( + source="TestPlugin", status=ExecutionStatus.OK, message="some message", result_data={} + ) + + dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger) + + out_file = tmp_path / "errorscraper.csv" + assert out_file.exists() + + with open(out_file, newline="") as f: + reader = list(csv.DictReader(f)) + assert reader[0]["nodename"] == "node123" + assert reader[0]["plugin"] == "TestPlugin" + assert reader[0]["status"] == "OK" + assert reader[0]["message"] == "some message" + + +def test_generate_summary(tmp_path): + logger = logging.getLogger() + + subdir = tmp_path / "sub" + subdir.mkdir() + + errorscraper_path = subdir / "errorscraper.csv" + with open(errorscraper_path, "w", newline="") as f: + writer = csv.DictWriter( + f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"] + ) + writer.writeheader() + writer.writerow( + { + "nodename": "nodeX", + "plugin": "PluginA", + "status": "OK", + "timestamp": "2025_07_16-01_00_00_PM", + "message": "some message", + } + ) + + generate_summary(str(tmp_path), logger) + + summary_path = tmp_path / "summary.csv" + assert summary_path.exists() + + with open(summary_path, newline="") as f: + rows = list(csv.DictReader(f)) + assert len(rows) == 1 + assert rows[0]["plugin"] == "PluginA" From 6f31b6a72ac6a96a68465908b209627020557e0a Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 17 Jul 2025 11:53:49 -0500 Subject: [PATCH 2/5] added README --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 19d77a97..fe65288e 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,14 @@ usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}] [-- [--sys-sku STRING] [--sys-platform STRING] [--plugin-configs [STRING ...]] [--system-config STRING] [--connection-config STRING] [--log-path STRING] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] [--gen-reference-config] - {run-plugins,describe,gen-plugin-config} ... + {summary,run-plugins,describe,gen-plugin-config} ... node scraper CLI positional arguments: - {run-plugins,describe,gen-plugin-config} + {summary,run-plugins,describe,gen-plugin-config} Subcommands + summary Generates summary csv file run-plugins Run a series of plugins describe Display details on a built-in config or plugin gen-plugin-config Generate a config for a plugin or list of plugins @@ -38,7 +39,8 @@ options: --sys-location {LOCAL,REMOTE} Location of target system (default: LOCAL) --sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE} - Specify system interaction level, used to determine the type of actions that plugins can perform (default: INTERACTIVE) + Specify system interaction level, used to determine the type of actions that plugins can perform (default: + INTERACTIVE) --sys-sku STRING Manually specify SKU of system (default: None) --sys-platform STRING Specify system platform (default: None) @@ -54,7 +56,6 @@ options: --gen-reference-config Generate reference config from system. Writes to ./reference_config.json. (default: False) - ``` ### Subcommmands @@ -167,6 +168,16 @@ This would produce the following config: } ``` +4. **'summary' sub command** +The 'summary' subcommand can be used to combine results from multiple runs of node-scraper to a +single summary.csv file. Sample run: +```sh +node-scraper summary --summary_path / +``` +This will generate a new file '//summary.csv' file. This file will +contain the results from all 'errorscraper.csv' files from '/'. + + ### Plugin Configs A plugin JSON config should follow the structure of the plugin config model defined here. The globals field is a dictionary of global key-value pairs; values in globals will be passed to From 05703572e9dcdd88bda17618640fb076a60cea0d Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Fri, 25 Jul 2025 10:29:42 -0500 Subject: [PATCH 3/5] erroscraper.csv -> nodescraper.csv --- README.md | 2 +- nodescraper/cli/helper.py | 4 ++-- test/unit/framework/test_cli_helper.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fe65288e..bb8cd7fd 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ single summary.csv file. Sample run: node-scraper summary --summary_path / ``` This will generate a new file '//summary.csv' file. This file will -contain the results from all 'errorscraper.csv' files from '/'. +contain the results from all 'nodescraper.csv' files from '/'. ### Plugin Configs diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index 403d5810..4f10bca7 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -443,7 +443,7 @@ def dump_results_to_csv( logger (logging.Logger): instance of logger """ fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] - filename = log_path + "/errorscraper.csv" + filename = log_path + "/nodescraper.csv" all_rows = [] for res in results: row = { @@ -487,7 +487,7 @@ def generate_summary(base_path: str, logger: logging.Logger): fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] all_rows = [] - pattern = os.path.join(base_path, "**", "errorscraper.csv") + pattern = os.path.join(base_path, "**", "nodescraper.csv") for filepath in glob.glob(pattern, recursive=True): logger.info(f"Reading: {filepath}") with open(filepath, newline="") as f: diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py index e505b1d6..04aaa766 100644 --- a/test/unit/framework/test_cli_helper.py +++ b/test/unit/framework/test_cli_helper.py @@ -215,7 +215,7 @@ def test_dump_results_to_csv(tmp_path, caplog): dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger) - out_file = tmp_path / "errorscraper.csv" + out_file = tmp_path / "nodescraper.csv" assert out_file.exists() with open(out_file, newline="") as f: @@ -232,7 +232,7 @@ def test_generate_summary(tmp_path): subdir = tmp_path / "sub" subdir.mkdir() - errorscraper_path = subdir / "errorscraper.csv" + errorscraper_path = subdir / "nodescraper.csv" with open(errorscraper_path, "w", newline="") as f: writer = csv.DictWriter( f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"] From 923aa039dbf491dd03ca9add123b080b6c006c04 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 29 Jul 2025 10:45:55 -0500 Subject: [PATCH 4/5] added output-path for summary --- nodescraper/cli/cli.py | 17 ++++++++++++----- nodescraper/cli/helper.py | 23 ++++++++++++++++++----- test/unit/framework/test_cli_helper.py | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 9265b004..c1e7161f 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -162,10 +162,17 @@ def build_parser( ) summary_parser.add_argument( - "--summary_path", - dest="summary_path", + "--search-path", + dest="search_path", type=log_path_arg, - help="Path to node-scraper results. Generates summary csv file in summary.csv.", + help="Path to node-scraper previously generated results.", + ) + + summary_parser.add_argument( + "--output-path", + dest="output_path", + type=log_path_arg, + help="Specifies path for summary.csv.", ) run_plugin_parser = subparsers.add_parser( @@ -263,7 +270,7 @@ def setup_logger(log_level: str = "INFO", log_path: str | None = None) -> loggin handlers = [logging.StreamHandler(stream=sys.stdout)] if log_path: - log_file_name = os.path.join(log_path, "errorscraper.log") + log_file_name = os.path.join(log_path, "nodescraper.log") handlers.append( logging.FileHandler(filename=log_file_name, mode="wt", encoding="utf-8"), ) @@ -358,7 +365,7 @@ def main(arg_input: Optional[list[str]] = None): logger.info("Log path: %s", log_path) if parsed_args.subcmd == "summary": - generate_summary(parsed_args.summary_path, logger) + generate_summary(parsed_args.search_path, parsed_args.output_path, logger) sys.exit(0) if parsed_args.subcmd == "describe": diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index fa69955b..e166744a 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -479,23 +479,36 @@ def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: lo logger.info("Data written to csv file: %s", filename) -def generate_summary(base_path: str, logger: logging.Logger): +def generate_summary(search_path: str, output_path: str, logger: logging.Logger): """Concatenate csv files into 1 summary csv file Args: - base_path (str): base path to look for csv files + search_path (str): base path to look for csv files logger (logging.Logger): instance of logger """ fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] all_rows = [] - pattern = os.path.join(base_path, "**", "nodescraper.csv") - for filepath in glob.glob(pattern, recursive=True): + pattern = os.path.join(search_path, "**", "nodescraper.csv") + matched_files = glob.glob(pattern, recursive=True) + + if not matched_files: + logger.error(f"No nodescraper.csv files found under {search_path}") + return + + for filepath in matched_files: logger.info(f"Reading: {filepath}") with open(filepath, newline="") as f: reader = csv.DictReader(f) for row in reader: all_rows.append(row) - output_path = os.path.join(base_path, "summary.csv") + if not all_rows: + logger.error("No data rows found in matched CSV files.") + return + + if not output_path: + output_path = os.getcwd() + + output_path = os.path.join(output_path, "summary.csv") dump_to_csv(all_rows, output_path, fieldnames, logger) diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py index 50f7a40a..7d008e68 100644 --- a/test/unit/framework/test_cli_helper.py +++ b/test/unit/framework/test_cli_helper.py @@ -250,7 +250,7 @@ def test_generate_summary(tmp_path): } ) - generate_summary(str(tmp_path), logger) + generate_summary(str(tmp_path), str(tmp_path), logger) summary_path = tmp_path / "summary.csv" assert summary_path.exists() From 07eeb3ffcb36960d89b7c59eed9837683c6eb1ce Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 30 Jul 2025 10:14:31 -0500 Subject: [PATCH 5/5] updated docstring --- nodescraper/cli/helper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index 054e4043..994f752c 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -479,13 +479,15 @@ def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: lo logger.info("Data written to csv file: %s", filename) -def generate_summary(search_path: str, output_path: str, logger: logging.Logger): +def generate_summary(search_path: str, output_path: str | None, logger: logging.Logger): """Concatenate csv files into 1 summary csv file Args: - search_path (str): base path to look for csv files + search_path (str): Path for previous runs + output_path (str | None): Path for new summary csv file logger (logging.Logger): instance of logger """ + fieldnames = ["nodename", "plugin", "status", "timestamp", "message"] all_rows = []