amd · alexandraBara · Jul 30, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 25, 2025
@@ -21,13 +21,14 @@ usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}] [--
                     [--sys-sku STRING] [--sys-platform STRING] [--plugin-configs [STRING ...]] [--system-config STRING]
                     [--connection-config STRING] [--log-path STRING] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
                     [--gen-reference-config]
-                    {run-plugins,describe,gen-plugin-config} ...
+                    {summary,run-plugins,describe,gen-plugin-config} ...
 
 node scraper CLI
 
 positional arguments:
-  {run-plugins,describe,gen-plugin-config}
+  {summary,run-plugins,describe,gen-plugin-config}
                         Subcommands
+    summary             Generates summary csv file
     run-plugins         Run a series of plugins
     describe            Display details on a built-in config or plugin
     gen-plugin-config   Generate a config for a plugin or list of plugins
@@ -38,7 +39,8 @@ options:
   --sys-location {LOCAL,REMOTE}
                         Location of target system (default: LOCAL)
   --sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}
-                        Specify system interaction level, used to determine the type of actions that plugins can perform (default: INTERACTIVE)
+                        Specify system interaction level, used to determine the type of actions that plugins can perform (default:
+                        INTERACTIVE)
   --sys-sku STRING      Manually specify SKU of system (default: None)
   --sys-platform STRING
                         Specify system platform (default: None)
@@ -54,7 +56,6 @@ options:
   --gen-reference-config
                         Generate reference config from system. Writes to ./reference_config.json. (default: False)
 
-
 ```
 
 ### Subcommmands
@@ -167,6 +168,16 @@ This would produce the following config:
 }
 ```
 
+4. **'summary' sub command**
+The 'summary' subcommand can be used to combine results from multiple runs of node-scraper to a
+single summary.csv file. Sample run:
+```sh
+node-scraper summary --summary_path /<path_to_node-scraper_logs>
+```
+This will generate a new file '/<path_to_node-scraper_logs>/summary.csv' file. This file will
+contain the results from all 'nodescraper.csv' files from '/<path_to_node-scarper_logs>'.
+
+
 ### Plugin Configs
 A plugin JSON config should follow the structure of the plugin config model defined here.
 The globals field is a dictionary of global key-value pairs; values in globals will be passed to

@@ -35,8 +35,10 @@
 from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP
 from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
 from nodescraper.cli.helper import (
+    dump_results_to_csv,
     generate_reference_config,
     generate_reference_config_from_logs,
+    generate_summary,
     get_plugin_configs,
     get_system_info,
     log_system_info,
@@ -154,6 +156,25 @@ def build_parser(
 
     subparsers = parser.add_subparsers(dest="subcmd", help="Subcommands")
 
+    summary_parser = subparsers.add_parser(
+        "summary",
+        help="Generates summary csv file",
+    )
+
+    summary_parser.add_argument(
+        "--search-path",
+        dest="search_path",
+        type=log_path_arg,
+        help="Path to node-scraper previously generated results.",
+    )
+
+    summary_parser.add_argument(
+        "--output-path",
+        dest="output_path",
+        type=log_path_arg,
+        help="Specifies path for summary.csv.",
+    )
+
     run_plugin_parser = subparsers.add_parser(
         "run-plugins",
         help="Run a series of plugins",
@@ -249,7 +270,7 @@ def setup_logger(log_level: str = "INFO", log_path: str | None = None) -> loggin
     handlers = [logging.StreamHandler(stream=sys.stdout)]
 
     if log_path:
-        log_file_name = os.path.join(log_path, "errorscraper.log")
+        log_file_name = os.path.join(log_path, "nodescraper.log")
         handlers.append(
             logging.FileHandler(filename=log_file_name, mode="wt", encoding="utf-8"),
         )
@@ -327,12 +348,13 @@ def main(arg_input: Optional[list[str]] = None):
 
         parsed_args = parser.parse_args(top_level_args)
         system_info = get_system_info(parsed_args)
+        sname = system_info.name.lower().replace("-", "_").replace(".", "_")
+        timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")
 
         if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]:
-            sname = system_info.name.lower().replace("-", "_").replace(".", "_")
             log_path = os.path.join(
                 parsed_args.log_path,
-                f"scraper_logs_{sname}_{datetime.datetime.now().strftime('%Y_%m_%d-%I_%M_%S_%p')}",
+                f"scraper_logs_{sname}_{timestamp}",
             )
             os.makedirs(log_path)
         else:
@@ -342,6 +364,10 @@ def main(arg_input: Optional[list[str]] = None):
         if log_path:
             logger.info("Log path: %s", log_path)
 
+        if parsed_args.subcmd == "summary":
+            generate_summary(parsed_args.search_path, parsed_args.output_path, logger)
+            sys.exit(0)
+
         if parsed_args.subcmd == "describe":
             parse_describe(parsed_args, plugin_reg, config_reg, logger)
 
@@ -407,6 +433,8 @@ def main(arg_input: Optional[list[str]] = None):
     try:
         results = plugin_executor.run_queue()
 
+        dump_results_to_csv(results, sname, log_path, timestamp, logger)
+
         if parsed_args.reference_config:
             ref_config = generate_reference_config(results, plugin_reg, logger)
             path = os.path.join(os.getcwd(), "reference_config.json")

@@ -24,6 +24,8 @@
 #
 ###############################################################################
 import argparse
+import csv
+import glob
 import json
 import logging
 import os
@@ -424,3 +426,91 @@ def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]:
                 tuple_list.append((datamodel_path, result_path))
 
     return tuple_list
+
+
+def dump_results_to_csv(
+    results: list[PluginResult],
+    nodename: str,
+    log_path: str,
+    timestamp: str,
+    logger: logging.Logger,
+):
+    """dump node-scraper summary results to csv file
+
+    Args:
+        results (list[PluginResult]): list of PluginResults
+        nodename (str): node where results come from
+        log_path (str): path to results
+        timestamp (str): time when results were taken
+        logger (logging.Logger): instance of logger
+    """
+    fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
+    filename = log_path + "/nodescraper.csv"
+    all_rows = []
+    for res in results:
+        row = {
+            "nodename": nodename,
+            "plugin": res.source,
+            "status": res.status.name,
+            "timestamp": timestamp,
+            "message": res.message,
+        }
+        all_rows.append(row)
+    dump_to_csv(all_rows, filename, fieldnames, logger)
+
+
+def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: logging.Logger):
+    """dump data to csv
+
+    Args:
+        all_rows (list): rows to be written
+        filename (str): name of file to write to
+        fieldnames (list[str]): header for csv file
+        logger (logging.Logger): isntance of logger
+    """
+    try:
+        with open(filename, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            for row in all_rows:
+                writer.writerow(row)
+    except Exception as exp:
+        logger.error("Could not dump data to csv file: %s", exp)
+    logger.info("Data written to csv file: %s", filename)
+
+
+def generate_summary(search_path: str, output_path: str | None, logger: logging.Logger):
+    """Concatenate csv files into 1 summary csv file
+
+    Args:
+        search_path (str): Path for previous runs
+        output_path (str | None): Path for new summary csv file
+        logger (logging.Logger): instance of logger
+    """
+
+    fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
+    all_rows = []
+
+    pattern = os.path.join(search_path, "**", "nodescraper.csv")
+    matched_files = glob.glob(pattern, recursive=True)
+
+    if not matched_files:
+        logger.error(f"No nodescraper.csv files found under {search_path}")
+        return
+
+    for filepath in matched_files:
+        logger.info(f"Reading: {filepath}")
+        with open(filepath, newline="") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                all_rows.append(row)
+
+    if not all_rows:
+        logger.error("No data rows found in matched CSV files.")
+        return
+
+    if not output_path:
+        output_path = os.getcwd()
+
+    output_path = os.path.join(output_path, "summary.csv")
+    dump_to_csv(all_rows, output_path, fieldnames, logger)
@@ -24,6 +24,7 @@
 #
 ###############################################################################
 import argparse
+import csv
 import json
 import logging
 import os
@@ -39,7 +40,10 @@
 from nodescraper.cli import cli
 from nodescraper.cli.helper import (
     build_config,
+    dump_results_to_csv,
+    dump_to_csv,
     find_datamodel_and_result,
+    generate_summary,
 )
 from nodescraper.configregistry import ConfigRegistry
 from nodescraper.enums import ExecutionStatus, SystemInteractionLevel
@@ -181,3 +185,77 @@ def build_from_model(cls, datamodel):
     assert isinstance(cfg, PluginConfig)
     assert set(cfg.plugins) == {parent}
     assert cfg.plugins[parent]["analysis_args"] == {}
+
+
+def test_dump_to_csv(tmp_path):
+    logger = logging.getLogger()
+    data = [
+        {
+            "nodename": "node1",
+            "plugin": "TestPlugin",
+            "status": "OK",
+            "timestamp": "2025_07_16-12_00_00_PM",
+            "message": "Success",
+        }
+    ]
+    filename = tmp_path / "test.csv"
+    fieldnames = list(data[0].keys())
+
+    dump_to_csv(data, str(filename), fieldnames, logger)
+
+    with open(filename, newline="") as f:
+        reader = list(csv.DictReader(f))
+        assert reader == data
+
+
+def test_dump_results_to_csv(tmp_path, caplog):
+    logger = logging.getLogger()
+
+    result = PluginResult(
+        source="TestPlugin", status=ExecutionStatus.OK, message="some message", result_data={}
+    )
+
+    dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger)
+
+    out_file = tmp_path / "nodescraper.csv"
+    assert out_file.exists()
+
+    with open(out_file, newline="") as f:
+        reader = list(csv.DictReader(f))
+        assert reader[0]["nodename"] == "node123"
+        assert reader[0]["plugin"] == "TestPlugin"
+        assert reader[0]["status"] == "OK"
+        assert reader[0]["message"] == "some message"
+
+
+def test_generate_summary(tmp_path):
+    logger = logging.getLogger()
+
+    subdir = tmp_path / "sub"
+    subdir.mkdir()
+
+    errorscraper_path = subdir / "nodescraper.csv"
+    with open(errorscraper_path, "w", newline="") as f:
+        writer = csv.DictWriter(
+            f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"]
+        )
+        writer.writeheader()
+        writer.writerow(
+            {
+                "nodename": "nodeX",
+                "plugin": "PluginA",
+                "status": "OK",
+                "timestamp": "2025_07_16-01_00_00_PM",
+                "message": "some message",
+            }
+        )
+
+    generate_summary(str(tmp_path), str(tmp_path), logger)
+
+    summary_path = tmp_path / "summary.csv"
+    assert summary_path.exists()
+
+    with open(summary_path, newline="") as f:
+        rows = list(csv.DictReader(f))
+        assert len(rows) == 1
+        assert rows[0]["plugin"] == "PluginA"