From 5da4e03e20eecdb81e0e33b40530382541f4ecc8 Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Wed, 18 Mar 2026 11:27:33 +0100 Subject: [PATCH 1/3] Adding options to provide second testing file for out of sample testing --- tools/model.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/model.py b/tools/model.py index 8c91452..68629d4 100755 --- a/tools/model.py +++ b/tools/model.py @@ -243,9 +243,13 @@ def trim_by_quantile( if quantile >= 1.0: return train_df, test_df q = train_df["target_uj"].quantile(quantile) + low = train_df["target_uj"].quantile(1 - quantile) + high = train_df["target_uj"].quantile(quantile) + + return ( - train_df[train_df["target_uj"] <= q], - test_df[test_df["target_uj"] <= q], + train_df[(train_df["target_uj"] >= low) & (train_df["target_uj"] <= high)], + test_df[(test_df["target_uj"] >= low) & (test_df["target_uj"] <= high)], ) @@ -393,6 +397,7 @@ def main() -> None: description="Train nonlinear PSYS model and distill to kernel-friendly linear+multi-LUT params." ) parser.add_argument("logfiles", nargs="+", type=Path) + parser.add_argument("--test-data", type=Path, help="Optional separate dataset for testing") parser.add_argument("--mode", choices=("delta",), default="delta") parser.add_argument("--alpha", type=float, default=10.0, help="L2 strength for non-negative linear fit") parser.add_argument("--test-frac", type=float, default=0.2) @@ -401,15 +406,26 @@ def main() -> None: parser.add_argument("--random-seed", type=int, default=42) args = parser.parse_args() + # Check all files exist for path in args.logfiles: if not path.exists(): raise FileNotFoundError(path) + if args.test_data and not args.test_data.exists(): + raise FileNotFoundError(args.test_data) - df = gather_rows(args.logfiles, args.mode, args.min_target_uj) - if df.empty or len(df) < 40: + # Gather training data + train_df = gather_rows(args.logfiles, args.mode, args.min_target_uj) + if train_df.empty or len(train_df) < 40: raise RuntimeError("Not enough usable rows (need >= 40 after filtering)") - train_df, test_df = split_random(df, args.test_frac, args.random_seed) + # Gather test data + if args.test_data: + test_df = gather_rows([args.test_data], args.mode, args.min_target_uj) + if test_df.empty: + raise RuntimeError("No usable rows in test-data file") + else: + train_df, test_df = split_random(train_df, args.test_frac, args.random_seed) + train_df, test_df = trim_by_quantile(train_df, test_df, args.trim_upper_quantile) if train_df.empty or test_df.empty: raise RuntimeError("Train/test split produced an empty set") @@ -505,7 +521,7 @@ def main() -> None: ) print(f"# mode={args.mode} alpha={args.alpha}") - print(f"# rows_total={len(df)} rows_train={len(train_df)} rows_test={len(test_df)}") + print(f"# rows_train={len(train_df)} rows_test={len(test_df)}") print_metrics("test metrics (current module defaults)", metric_bundle(y_test, pred_baseline_test)) print_metrics("test metrics (linear non-negative ridge)", metric_bundle(y_test, pred_lin_test)) From 84aa505e22b5d93568955715b6f6fa11b1ea07e5 Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Sat, 21 Mar 2026 16:50:53 +0100 Subject: [PATCH 2/3] Added test-data argument again --- tools/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/model.py b/tools/model.py index 4566ada..8d65b87 100755 --- a/tools/model.py +++ b/tools/model.py @@ -573,6 +573,7 @@ def main(args) -> None: description="Train nonlinear PSYS model and distill to kernel-friendly linear+multi-LUT params." ) parser.add_argument("logfiles", nargs="+", type=Path) + parser.add_argument("--test-data", type=Path, help="Optional separate dataset for testing") parser.add_argument("--mode", choices=("delta",), default="delta") parser.add_argument("--alpha", type=float, default=10.0, help="L2 strength for non-negative linear fit") parser.add_argument("--test-frac", type=float, default=0.2) From 81f484f26040a0107bdcbcaaa95c4aa426221ba8 Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Sat, 21 Mar 2026 17:34:31 +0100 Subject: [PATCH 3/3] Added plotting functionality --- tools/model.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/model.py b/tools/model.py index 8d65b87..6f5045d 100755 --- a/tools/model.py +++ b/tools/model.py @@ -11,7 +11,7 @@ from scipy.optimize import lsq_linear from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score - +import plotext as plt NL_LUT_BINS = 8 @@ -408,6 +408,18 @@ def main(args) -> None: if train_df.empty or len(train_df) < 40: raise RuntimeError("Not enough usable rows (need >= 40 after filtering)") + if args.plot: + x = train_df["timestamp"].tolist() + y = train_df["target_uj"].tolist() + + plt.clear_data() + plt.plot(x, y, marker='dot') + plt.title("Energy of time") + plt.xlabel("Time") + plt.ylabel(TARGET_COL) + plt.show() + return + # Gather test data if args.test_data: test_df = gather_rows([args.test_data], args.mode, args.min_target_uj) @@ -581,6 +593,7 @@ def main(args) -> None: parser.add_argument("--trim-upper-quantile", type=float, default=0.999) parser.add_argument("--random-seed", type=int, default=42) parser.add_argument("--target", type=str, choices=["rapl_psys_sum_uj", "rapl_core_sum_uj"], default="rapl_psys_sum_uj") + parser.add_argument("--plot", action='store_true') args = parser.parse_args()