From d81cc26489d77eb050cb26dd5f3640186cf8b3d2 Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alexbara@amd.com>
Date: Thu, 17 Jul 2025 11:39:23 -0500
Subject: [PATCH 1/5] first pass + utest

---
 nodescraper/cli/cli.py                 | 25 +++++++-
 nodescraper/cli/helper.py              | 75 +++++++++++++++++++++++
 test/unit/framework/test_cli_helper.py | 83 +++++++++++++++++++++++++-
 3 files changed, 180 insertions(+), 3 deletions(-)

diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py
index 5bfdd30d..9265b004 100644
--- a/nodescraper/cli/cli.py
+++ b/nodescraper/cli/cli.py
@@ -35,8 +35,10 @@
 from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP
 from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
 from nodescraper.cli.helper import (
+    dump_results_to_csv,
     generate_reference_config,
     generate_reference_config_from_logs,
+    generate_summary,
     get_plugin_configs,
     get_system_info,
     log_system_info,
@@ -154,6 +156,18 @@ def build_parser(
 
     subparsers = parser.add_subparsers(dest="subcmd", help="Subcommands")
 
+    summary_parser = subparsers.add_parser(
+        "summary",
+        help="Generates summary csv file",
+    )
+
+    summary_parser.add_argument(
+        "--summary_path",
+        dest="summary_path",
+        type=log_path_arg,
+        help="Path to node-scraper results. Generates summary csv file in summary.csv.",
+    )
+
     run_plugin_parser = subparsers.add_parser(
         "run-plugins",
         help="Run a series of plugins",
@@ -327,12 +341,13 @@ def main(arg_input: Optional[list[str]] = None):
 
         parsed_args = parser.parse_args(top_level_args)
         system_info = get_system_info(parsed_args)
+        sname = system_info.name.lower().replace("-", "_").replace(".", "_")
+        timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")
 
         if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]:
-            sname = system_info.name.lower().replace("-", "_").replace(".", "_")
             log_path = os.path.join(
                 parsed_args.log_path,
-                f"scraper_logs_{sname}_{datetime.datetime.now().strftime('%Y_%m_%d-%I_%M_%S_%p')}",
+                f"scraper_logs_{sname}_{timestamp}",
             )
             os.makedirs(log_path)
         else:
@@ -342,6 +357,10 @@ def main(arg_input: Optional[list[str]] = None):
         if log_path:
             logger.info("Log path: %s", log_path)
 
+        if parsed_args.subcmd == "summary":
+            generate_summary(parsed_args.summary_path, logger)
+            sys.exit(0)
+
         if parsed_args.subcmd == "describe":
             parse_describe(parsed_args, plugin_reg, config_reg, logger)
 
@@ -407,6 +426,8 @@ def main(arg_input: Optional[list[str]] = None):
     try:
         results = plugin_executor.run_queue()
 
+        dump_results_to_csv(results, sname, log_path, timestamp, logger)
+
         if parsed_args.reference_config:
             ref_config = generate_reference_config(results, plugin_reg, logger)
             path = os.path.join(os.getcwd(), "reference_config.json")
diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py
index aa6ea854..403d5810 100644
--- a/nodescraper/cli/helper.py
+++ b/nodescraper/cli/helper.py
@@ -24,6 +24,8 @@
 #
 ###############################################################################
 import argparse
+import csv
+import glob
 import json
 import logging
 import os
@@ -422,3 +424,76 @@ def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]:
                 tuple_list.append((datamodel_path, result_path))
 
     return tuple_list
+
+
+def dump_results_to_csv(
+    results: list[PluginResult],
+    nodename: str,
+    log_path: str,
+    timestamp: str,
+    logger: logging.Logger,
+):
+    """dump node-scraper summary results to csv file
+
+    Args:
+        results (list[PluginResult]): list of PluginResults
+        nodename (str): node where results come from
+        log_path (str): path to results
+        timestamp (str): time when results were taken
+        logger (logging.Logger): instance of logger
+    """
+    fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
+    filename = log_path + "/errorscraper.csv"
+    all_rows = []
+    for res in results:
+        row = {
+            "nodename": nodename,
+            "plugin": res.source,
+            "status": res.status.name,
+            "timestamp": timestamp,
+            "message": res.message,
+        }
+        all_rows.append(row)
+    dump_to_csv(all_rows, filename, fieldnames, logger)
+
+
+def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: logging.Logger):
+    """dump data to csv
+
+    Args:
+        all_rows (list): rows to be written
+        filename (str): name of file to write to
+        fieldnames (list[str]): header for csv file
+        logger (logging.Logger): isntance of logger
+    """
+    try:
+        with open(filename, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            for row in all_rows:
+                writer.writerow(row)
+    except Exception as exp:
+        logger.error("Could not dump data to csv file: %s", exp)
+    logger.info("Data written to csv file: %s", filename)
+
+
+def generate_summary(base_path: str, logger: logging.Logger):
+    """Concatenate csv files into 1 summary csv file
+
+    Args:
+        base_path (str): base path to look for csv files
+        logger (logging.Logger): instance of logger
+    """
+    fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
+    all_rows = []
+
+    pattern = os.path.join(base_path, "**", "errorscraper.csv")
+    for filepath in glob.glob(pattern, recursive=True):
+        logger.info(f"Reading: {filepath}")
+        with open(filepath, newline="") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                all_rows.append(row)
+
+    output_path = os.path.join(base_path, "summary.csv")
+    dump_to_csv(all_rows, output_path, fieldnames, logger)
diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py
index 8ff75f5f..e505b1d6 100644
--- a/test/unit/framework/test_cli_helper.py
+++ b/test/unit/framework/test_cli_helper.py
@@ -24,6 +24,7 @@
 #
 ###############################################################################
 import argparse
+import csv
 import json
 import logging
 import os
@@ -35,7 +36,13 @@
 from pydantic import BaseModel
 
 from nodescraper.cli import cli
-from nodescraper.cli.helper import build_config, find_datamodel_and_result
+from nodescraper.cli.helper import (
+    build_config,
+    dump_results_to_csv,
+    dump_to_csv,
+    find_datamodel_and_result,
+    generate_summary,
+)
 from nodescraper.configregistry import ConfigRegistry
 from nodescraper.enums import ExecutionStatus, SystemInteractionLevel
 from nodescraper.models import PluginConfig, TaskResult
@@ -176,3 +183,77 @@ def build_from_model(cls, datamodel):
     assert isinstance(cfg, PluginConfig)
     assert set(cfg.plugins) == {parent}
     assert cfg.plugins[parent]["analysis_args"] == {}
+
+
+def test_dump_to_csv(tmp_path):
+    logger = logging.getLogger()
+    data = [
+        {
+            "nodename": "node1",
+            "plugin": "TestPlugin",
+            "status": "OK",
+            "timestamp": "2025_07_16-12_00_00_PM",
+            "message": "Success",
+        }
+    ]
+    filename = tmp_path / "test.csv"
+    fieldnames = list(data[0].keys())
+
+    dump_to_csv(data, str(filename), fieldnames, logger)
+
+    with open(filename, newline="") as f:
+        reader = list(csv.DictReader(f))
+        assert reader == data
+
+
+def test_dump_results_to_csv(tmp_path, caplog):
+    logger = logging.getLogger()
+
+    result = PluginResult(
+        source="TestPlugin", status=ExecutionStatus.OK, message="some message", result_data={}
+    )
+
+    dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger)
+
+    out_file = tmp_path / "errorscraper.csv"
+    assert out_file.exists()
+
+    with open(out_file, newline="") as f:
+        reader = list(csv.DictReader(f))
+        assert reader[0]["nodename"] == "node123"
+        assert reader[0]["plugin"] == "TestPlugin"
+        assert reader[0]["status"] == "OK"
+        assert reader[0]["message"] == "some message"
+
+
+def test_generate_summary(tmp_path):
+    logger = logging.getLogger()
+
+    subdir = tmp_path / "sub"
+    subdir.mkdir()
+
+    errorscraper_path = subdir / "errorscraper.csv"
+    with open(errorscraper_path, "w", newline="") as f:
+        writer = csv.DictWriter(
+            f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"]
+        )
+        writer.writeheader()
+        writer.writerow(
+            {
+                "nodename": "nodeX",
+                "plugin": "PluginA",
+                "status": "OK",
+                "timestamp": "2025_07_16-01_00_00_PM",
+                "message": "some message",
+            }
+        )
+
+    generate_summary(str(tmp_path), logger)
+
+    summary_path = tmp_path / "summary.csv"
+    assert summary_path.exists()
+
+    with open(summary_path, newline="") as f:
+        rows = list(csv.DictReader(f))
+        assert len(rows) == 1
+        assert rows[0]["plugin"] == "PluginA"

From 6f31b6a72ac6a96a68465908b209627020557e0a Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alexbara@amd.com>
Date: Thu, 17 Jul 2025 11:53:49 -0500
Subject: [PATCH 2/5] added README

---
 README.md | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 19d77a97..fe65288e 100644
--- a/README.md
+++ b/README.md
@@ -21,13 +21,14 @@ usage: node-scraper [-h] [--sys-name STRING] [--sys-location {LOCAL,REMOTE}] [--
                     [--sys-sku STRING] [--sys-platform STRING] [--plugin-configs [STRING ...]] [--system-config STRING]
                     [--connection-config STRING] [--log-path STRING] [--log-level {CRITICAL,FATAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}]
                     [--gen-reference-config]
-                    {run-plugins,describe,gen-plugin-config} ...
+                    {summary,run-plugins,describe,gen-plugin-config} ...
 
 node scraper CLI
 
 positional arguments:
-  {run-plugins,describe,gen-plugin-config}
+  {summary,run-plugins,describe,gen-plugin-config}
                         Subcommands
+    summary             Generates summary csv file
     run-plugins         Run a series of plugins
     describe            Display details on a built-in config or plugin
     gen-plugin-config   Generate a config for a plugin or list of plugins
@@ -38,7 +39,8 @@ options:
   --sys-location {LOCAL,REMOTE}
                         Location of target system (default: LOCAL)
   --sys-interaction-level {PASSIVE,INTERACTIVE,DISRUPTIVE}
-                        Specify system interaction level, used to determine the type of actions that plugins can perform (default: INTERACTIVE)
+                        Specify system interaction level, used to determine the type of actions that plugins can perform (default:
+                        INTERACTIVE)
   --sys-sku STRING      Manually specify SKU of system (default: None)
   --sys-platform STRING
                         Specify system platform (default: None)
@@ -54,7 +56,6 @@ options:
   --gen-reference-config
                         Generate reference config from system. Writes to ./reference_config.json. (default: False)
 
-
 ```
 
 ### Subcommmands
@@ -167,6 +168,16 @@ This would produce the following config:
 }
 ```
 
+4. **'summary' sub command**
+The 'summary' subcommand can be used to combine results from multiple runs of node-scraper to a
+single summary.csv file. Sample run:
+```sh
+node-scraper summary --summary_path /<path_to_node-scraper_logs>
+```
+This will generate a new file '/<path_to_node-scraper_logs>/summary.csv' file. This file will
+contain the results from all 'errorscraper.csv' files from '/<path_to_node-scarper_logs>'.
+
+
 ### Plugin Configs
 A plugin JSON config should follow the structure of the plugin config model defined here.
 The globals field is a dictionary of global key-value pairs; values in globals will be passed to

From 05703572e9dcdd88bda17618640fb076a60cea0d Mon Sep 17 00:00:00 2001
From: Alex Bara <alex.bara@amd.com>
Date: Fri, 25 Jul 2025 10:29:42 -0500
Subject: [PATCH 3/5] erroscraper.csv -> nodescraper.csv

---
 README.md                              | 2 +-
 nodescraper/cli/helper.py              | 4 ++--
 test/unit/framework/test_cli_helper.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index fe65288e..bb8cd7fd 100644
--- a/README.md
+++ b/README.md
@@ -175,7 +175,7 @@ single summary.csv file. Sample run:
 node-scraper summary --summary_path /<path_to_node-scraper_logs>
 ```
 This will generate a new file '/<path_to_node-scraper_logs>/summary.csv' file. This file will
-contain the results from all 'errorscraper.csv' files from '/<path_to_node-scarper_logs>'.
+contain the results from all 'nodescraper.csv' files from '/<path_to_node-scarper_logs>'.
 
 
 ### Plugin Configs
diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py
index 403d5810..4f10bca7 100644
--- a/nodescraper/cli/helper.py
+++ b/nodescraper/cli/helper.py
@@ -443,7 +443,7 @@ def dump_results_to_csv(
         logger (logging.Logger): instance of logger
     """
     fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
-    filename = log_path + "/errorscraper.csv"
+    filename = log_path + "/nodescraper.csv"
     all_rows = []
     for res in results:
         row = {
@@ -487,7 +487,7 @@ def generate_summary(base_path: str, logger: logging.Logger):
     fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
     all_rows = []
 
-    pattern = os.path.join(base_path, "**", "errorscraper.csv")
+    pattern = os.path.join(base_path, "**", "nodescraper.csv")
     for filepath in glob.glob(pattern, recursive=True):
         logger.info(f"Reading: {filepath}")
         with open(filepath, newline="") as f:
diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py
index e505b1d6..04aaa766 100644
--- a/test/unit/framework/test_cli_helper.py
+++ b/test/unit/framework/test_cli_helper.py
@@ -215,7 +215,7 @@ def test_dump_results_to_csv(tmp_path, caplog):
 
     dump_results_to_csv([result], "node123", str(tmp_path), "2025_07_16-01_00_00_PM", logger)
 
-    out_file = tmp_path / "errorscraper.csv"
+    out_file = tmp_path / "nodescraper.csv"
     assert out_file.exists()
 
     with open(out_file, newline="") as f:
@@ -232,7 +232,7 @@ def test_generate_summary(tmp_path):
     subdir = tmp_path / "sub"
     subdir.mkdir()
 
-    errorscraper_path = subdir / "errorscraper.csv"
+    errorscraper_path = subdir / "nodescraper.csv"
     with open(errorscraper_path, "w", newline="") as f:
         writer = csv.DictWriter(
             f, fieldnames=["nodename", "plugin", "status", "timestamp", "message"]

From 923aa039dbf491dd03ca9add123b080b6c006c04 Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alexbara@amd.com>
Date: Tue, 29 Jul 2025 10:45:55 -0500
Subject: [PATCH 4/5] added output-path for summary

---
 nodescraper/cli/cli.py                 | 17 ++++++++++++-----
 nodescraper/cli/helper.py              | 23 ++++++++++++++++++-----
 test/unit/framework/test_cli_helper.py |  2 +-
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py
index 9265b004..c1e7161f 100644
--- a/nodescraper/cli/cli.py
+++ b/nodescraper/cli/cli.py
@@ -162,10 +162,17 @@ def build_parser(
     )
 
     summary_parser.add_argument(
-        "--summary_path",
-        dest="summary_path",
+        "--search-path",
+        dest="search_path",
         type=log_path_arg,
-        help="Path to node-scraper results. Generates summary csv file in summary.csv.",
+        help="Path to node-scraper previously generated results.",
+    )
+
+    summary_parser.add_argument(
+        "--output-path",
+        dest="output_path",
+        type=log_path_arg,
+        help="Specifies path for summary.csv.",
     )
 
     run_plugin_parser = subparsers.add_parser(
@@ -263,7 +270,7 @@ def setup_logger(log_level: str = "INFO", log_path: str | None = None) -> loggin
     handlers = [logging.StreamHandler(stream=sys.stdout)]
 
     if log_path:
-        log_file_name = os.path.join(log_path, "errorscraper.log")
+        log_file_name = os.path.join(log_path, "nodescraper.log")
         handlers.append(
             logging.FileHandler(filename=log_file_name, mode="wt", encoding="utf-8"),
         )
@@ -358,7 +365,7 @@ def main(arg_input: Optional[list[str]] = None):
             logger.info("Log path: %s", log_path)
 
         if parsed_args.subcmd == "summary":
-            generate_summary(parsed_args.summary_path, logger)
+            generate_summary(parsed_args.search_path, parsed_args.output_path, logger)
             sys.exit(0)
 
         if parsed_args.subcmd == "describe":
diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py
index fa69955b..e166744a 100644
--- a/nodescraper/cli/helper.py
+++ b/nodescraper/cli/helper.py
@@ -479,23 +479,36 @@ def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: lo
     logger.info("Data written to csv file: %s", filename)
 
 
-def generate_summary(base_path: str, logger: logging.Logger):
+def generate_summary(search_path: str, output_path: str, logger: logging.Logger):
     """Concatenate csv files into 1 summary csv file
 
     Args:
-        base_path (str): base path to look for csv files
+        search_path (str): base path to look for csv files
         logger (logging.Logger): instance of logger
     """
     fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
     all_rows = []
 
-    pattern = os.path.join(base_path, "**", "nodescraper.csv")
-    for filepath in glob.glob(pattern, recursive=True):
+    pattern = os.path.join(search_path, "**", "nodescraper.csv")
+    matched_files = glob.glob(pattern, recursive=True)
+
+    if not matched_files:
+        logger.error(f"No nodescraper.csv files found under {search_path}")
+        return
+
+    for filepath in matched_files:
         logger.info(f"Reading: {filepath}")
         with open(filepath, newline="") as f:
             reader = csv.DictReader(f)
             for row in reader:
                 all_rows.append(row)
 
-    output_path = os.path.join(base_path, "summary.csv")
+    if not all_rows:
+        logger.error("No data rows found in matched CSV files.")
+        return
+
+    if not output_path:
+        output_path = os.getcwd()
+
+    output_path = os.path.join(output_path, "summary.csv")
     dump_to_csv(all_rows, output_path, fieldnames, logger)
diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py
index 50f7a40a..7d008e68 100644
--- a/test/unit/framework/test_cli_helper.py
+++ b/test/unit/framework/test_cli_helper.py
@@ -250,7 +250,7 @@ def test_generate_summary(tmp_path):
             }
         )
 
-    generate_summary(str(tmp_path), logger)
+    generate_summary(str(tmp_path), str(tmp_path), logger)
 
     summary_path = tmp_path / "summary.csv"
     assert summary_path.exists()

From 07eeb3ffcb36960d89b7c59eed9837683c6eb1ce Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alexbara@amd.com>
Date: Wed, 30 Jul 2025 10:14:31 -0500
Subject: [PATCH 5/5] updated docstring

---
 nodescraper/cli/helper.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py
index 054e4043..994f752c 100644
--- a/nodescraper/cli/helper.py
+++ b/nodescraper/cli/helper.py
@@ -479,13 +479,15 @@ def dump_to_csv(all_rows: list, filename: str, fieldnames: list[str], logger: lo
     logger.info("Data written to csv file: %s", filename)
 
 
-def generate_summary(search_path: str, output_path: str, logger: logging.Logger):
+def generate_summary(search_path: str, output_path: str | None, logger: logging.Logger):
     """Concatenate csv files into 1 summary csv file
 
     Args:
-        search_path (str): base path to look for csv files
+        search_path (str): Path for previous runs
+        output_path (str | None): Path for new summary csv file
         logger (logging.Logger): instance of logger
     """
+
     fieldnames = ["nodename", "plugin", "status", "timestamp", "message"]
     all_rows = []