From ccb9cd2dee3fedd3a8f15f98a4d39f082c55af63 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:24:23 +0100 Subject: [PATCH 01/11] perf: speed up LP constraint writing by replacing concat+sort with join Replace the vertical concat + sort approach in Constraint.to_polars() with an inner join, so every row has all columns populated. This removes the need for the group_by validation step in constraints_to_file() and simplifies the formatting expressions by eliminating null checks on coeffs/vars columns. --- linopy/constraints.py | 6 +----- linopy/io.py | 37 +++++++------------------------------ 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index 291beb1d..e6216dba 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -632,11 +632,7 @@ def to_polars(self) -> pl.DataFrame: short = filter_nulls_polars(short) check_has_nulls_polars(short, name=f"{self.type} {self.name}") - df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"]) - # delete subsequent non-null rhs (happens is all vars per label are -1) - is_non_null = df["rhs"].is_not_null() - prev_non_is_null = is_non_null.shift(1).fill_null(False) - df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null) + df = long.join(short, on="labels", how="inner") return df[["labels", "coeffs", "vars", "sign", "rhs"]] # Wrapped function which would convert variable to dataarray diff --git a/linopy/io.py b/linopy/io.py index 56fe033d..f5bca4ea 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -440,48 +440,25 @@ def constraints_to_file( if df.height == 0: continue - # Ensure each constraint has both coefficient and RHS terms - analysis = df.group_by("labels").agg( - [ - pl.col("coeffs").is_not_null().sum().alias("coeff_rows"), - pl.col("sign").is_not_null().sum().alias("rhs_rows"), - ] - ) - - valid = analysis.filter( - (pl.col("coeff_rows") > 0) & (pl.col("rhs_rows") > 0) - ) - - if valid.height == 0: - continue - - # Keep only constraints that have both parts - df = df.join(valid.select("labels"), on="labels", how="inner") - # Sort by labels and mark first/last occurrences df = df.sort("labels").with_columns( [ - pl.when(pl.col("labels").is_first_distinct()) - .then(pl.col("labels")) - .otherwise(pl.lit(None)) - .alias("labels_first"), + pl.col("labels").is_first_distinct().alias("is_first_in_group"), (pl.col("labels") != pl.col("labels").shift(-1)) .fill_null(True) .alias("is_last_in_group"), ] ) - row_labels = print_constraint(pl.col("labels_first")) + row_labels = print_constraint(pl.col("labels")) col_labels = print_variable(pl.col("vars")) columns = [ - pl.when(pl.col("labels_first").is_not_null()).then(row_labels[0]), - pl.when(pl.col("labels_first").is_not_null()).then(row_labels[1]), - pl.when(pl.col("labels_first").is_not_null()) - .then(pl.lit(":\n")) - .alias(":"), + pl.when(pl.col("is_first_in_group")).then(row_labels[0]), + pl.when(pl.col("is_first_in_group")).then(row_labels[1]), + pl.when(pl.col("is_first_in_group")).then(pl.lit(":\n")).alias(":"), *signed_number(pl.col("coeffs")), - pl.when(pl.col("vars").is_not_null()).then(col_labels[0]), - pl.when(pl.col("vars").is_not_null()).then(col_labels[1]), + col_labels[0], + col_labels[1], pl.when(pl.col("is_last_in_group")).then(pl.col("sign")), pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")), pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), From 8524c29d09113dbeae4cc50ba66dc8f5d4e0fc15 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:34:11 +0100 Subject: [PATCH 02/11] fix: missing space in lp file --- linopy/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/linopy/io.py b/linopy/io.py index f5bca4ea..f85c4499 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -459,6 +459,7 @@ def constraints_to_file( *signed_number(pl.col("coeffs")), col_labels[0], col_labels[1], + pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")), pl.when(pl.col("is_last_in_group")).then(pl.col("sign")), pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")), pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), From aab95f5487708742fac00f411e6d70db118c4bb1 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:04:07 +0100 Subject: [PATCH 03/11] perf: skip group_terms when unnecessary and avoid xarray broadcast for short DataFrame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Skip group_terms_polars when _term dim size is 1 (no duplicate vars) - Build the short DataFrame (labels, rhs, sign) directly with numpy instead of going through xarray.broadcast + to_polars - Add sign column via pl.lit when uniform (common case), avoiding costly numpy string array → polars conversion Co-Authored-By: Claude Opus 4.5 --- linopy/constraints.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index e6216dba..c239be60 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -42,7 +42,6 @@ get_label_position, group_terms_polars, has_optimized_model, - infer_schema_polars, iterate_slices, maybe_replace_signs, print_coord, @@ -622,14 +621,30 @@ def to_polars(self) -> pl.DataFrame: long = to_polars(ds[keys]) long = filter_nulls_polars(long) - long = group_terms_polars(long) + if ds.sizes.get("_term", 1) > 1: + long = group_terms_polars(long) check_has_nulls_polars(long, name=f"{self.type} {self.name}") - short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]] - schema = infer_schema_polars(short_ds) - schema["sign"] = pl.Enum(["=", "<=", ">="]) - short = to_polars(short_ds, schema=schema) + # Build short DataFrame (labels, rhs) without xarray broadcast. + # Add sign separately to avoid costly numpy string→polars conversion. + labels_flat = ds["labels"].values.reshape(-1) + rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1) + short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat}) short = filter_nulls_polars(short) + + sign_values = ds["sign"].values + unique_signs = np.unique(sign_values) + if len(unique_signs) == 1: + short = short.with_columns( + pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") + ) + else: + sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) + # Apply same mask as filter_nulls (labels != -1) + sign_flat = sign_flat[labels_flat != -1] + short = short.with_columns( + pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="])) + ) check_has_nulls_polars(short, name=f"{self.type} {self.name}") df = long.join(short, on="labels", how="inner") From 77626593d1d2988cc4bd1dd0a62e735f52261c59 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:05:11 +0100 Subject: [PATCH 04/11] perf: skip group_terms in LinearExpression.to_polars when no duplicate vars Check n_unique before running the expensive group_by+sum. When all variable references are unique (common case for objectives), this saves ~31ms per 320k terms. Co-Authored-By: Claude Opus 4.5 --- linopy/expressions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/linopy/expressions.py b/linopy/expressions.py index 10e243de..7550f2d5 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1463,7 +1463,13 @@ def to_polars(self) -> pl.DataFrame: df = to_polars(self.data) df = filter_nulls_polars(df) - df = group_terms_polars(df) + if df["vars"].n_unique() < df.height: + df = group_terms_polars(df) + else: + # Match column order of group_terms (group-by keys, coeffs, rest) + varcols = [c for c in df.columns if c.startswith("vars")] + rest = [c for c in df.columns if c not in varcols and c != "coeffs"] + df = df.select(varcols + ["coeffs"] + rest) check_has_nulls_polars(df, name=self.type) return df From bdbb042beccdba1bf859ac06a1468db8f743efc3 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:06:39 +0100 Subject: [PATCH 05/11] perf: pre-cast rhs to String in with_columns instead of inside concat_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the rhs float→String cast into the with_columns step so it runs once unconditionally rather than inside a when().then() per row. Co-Authored-By: Claude Opus 4.5 --- linopy/io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linopy/io.py b/linopy/io.py index f85c4499..98a0e5f9 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -447,6 +447,7 @@ def constraints_to_file( (pl.col("labels") != pl.col("labels").shift(-1)) .fill_null(True) .alias("is_last_in_group"), + pl.col("rhs").cast(pl.String).alias("rhs_str"), ] ) @@ -462,7 +463,7 @@ def constraints_to_file( pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")), pl.when(pl.col("is_last_in_group")).then(pl.col("sign")), pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")), - pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), + pl.when(pl.col("is_last_in_group")).then(pl.col("rhs_str")), ] kwargs: Any = dict( From e9cffb47ac8556e938af3733d2b21efb774d8a8a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:25:13 +0100 Subject: [PATCH 06/11] bench: add PyPSA and knapsack models to LP writer benchmark Add realistic PyPSA SciGrid-DE network model and knapsack model to the benchmark script alongside the existing basic_model. --- dev-scripts/benchmark_lp_writer.py | 101 +++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 dev-scripts/benchmark_lp_writer.py diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py new file mode 100644 index 00000000..9f6dbae8 --- /dev/null +++ b/dev-scripts/benchmark_lp_writer.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +Benchmark script for LP file writing performance. + +Benchmarks both synthetic models and a realistic PyPSA network model. + +Usage: + python dev-scripts/benchmark_lp_writer.py +""" + +import tempfile +import time +from pathlib import Path + +import numpy as np +from numpy.random import default_rng + +from linopy import Model + +rng = default_rng(125) + + +def basic_model(n: int) -> Model: + """Create a basic model with 2*n^2 variables and 2*n^2 constraints.""" + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="c1") + m.add_constraints(x + y >= 0, name="c2") + m.add_objective((2 * x).sum() + y.sum()) + return m + + +def knapsack_model(n: int) -> Model: + """Create a knapsack model with n binary variables and 1 constraint.""" + m = Model() + packages = m.add_variables(coords=[np.arange(n)], binary=True) + weight = rng.integers(1, 100, size=n) + value = rng.integers(1, 100, size=n) + m.add_constraints((weight * packages).sum() <= 200) + m.add_objective(-(value * packages).sum()) + return m + + +def pypsa_model() -> Model | None: + """Create a model from the PyPSA SciGrid-DE example network.""" + try: + import pypsa + except ImportError: + return None + n = pypsa.examples.scigrid_de() + n.optimize.create_model() + return n.model + + +def benchmark_model(label: str, m: Model, iterations: int = 10) -> tuple[float, float]: + """Benchmark LP file writing for a single model. Returns (mean, std).""" + with tempfile.TemporaryDirectory() as tmpdir: + # Warmup + m.to_file(Path(tmpdir) / "warmup.lp", progress=False) + + times = [] + for i in range(iterations): + fn = Path(tmpdir) / f"bench_{i}.lp" + start = time.perf_counter() + m.to_file(fn, progress=False) + times.append(time.perf_counter() - start) + + avg = np.mean(times) + std = np.std(times) + print( + f" {label:55s} ({m.nvars:>9,} vars, {m.ncons:>9,} cons): " + f"{avg * 1000:7.1f}ms ± {std * 1000:5.1f}ms" + ) + return avg, std + + +def main() -> None: + iterations = 10 + print(f"LP file writing benchmark ({iterations} iterations each)") + print("=" * 90) + + print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):") + for n in [50, 100, 200, 500, 1000]: + benchmark_model(f"N={n}", basic_model(n), iterations) + + print("\nknapsack_model (N binary vars, 1 constraint with N terms):") + for n in [100, 500, 1000, 5000, 10000]: + benchmark_model(f"N={n}", knapsack_model(n), iterations) + + print("\nPyPSA SciGrid-DE (realistic power system model):") + m = pypsa_model() + if m is not None: + benchmark_model("scigrid-de (24 snapshots)", m, iterations) + else: + print(" (skipped, pypsa not installed)") + + +if __name__ == "__main__": + main() From 44b115f7b3cae6ea975dd6be6721cf1c87471f3c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:31:00 +0100 Subject: [PATCH 07/11] perf: Use polars streaming engine --- linopy/io.py | 63 +++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/linopy/io.py b/linopy/io.py index 98a0e5f9..a36f3995 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -54,6 +54,29 @@ def clean_name(name: str) -> str: coord_sanitizer = str.maketrans("[,]", "(,)", " ") +def _format_and_write( + df: pl.DataFrame, columns: list[pl.Expr], f: BufferedWriter +) -> None: + """ + Format columns via concat_str and write to file. + + Uses Polars streaming engine for better performance when available, + with automatic fallback to eager evaluation. + """ + kwargs: Any = dict( + separator=" ", null_value="", quote_style="never", include_header=False + ) + try: + formatted = ( + df.lazy() + .select(pl.concat_str(columns, ignore_nulls=True)) + .collect(engine="streaming") + ) + except Exception: + formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) + formatted.write_csv(f, **kwargs) + + def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]: """ Return polars expressions for a signed number string, handling -0.0 correctly. @@ -155,10 +178,7 @@ def objective_write_linear_terms( *signed_number(pl.col("coeffs")), *print_variable(pl.col("vars")), ] - df = df.select(pl.concat_str(cols, ignore_nulls=True)) - df.write_csv( - f, separator=" ", null_value="", quote_style="never", include_header=False - ) + _format_and_write(df, cols, f) def objective_write_quadratic_terms( @@ -171,10 +191,7 @@ def objective_write_quadratic_terms( *print_variable(pl.col("vars2")), ] f.write(b"+ [\n") - df = df.select(pl.concat_str(cols, ignore_nulls=True)) - df.write_csv( - f, separator=" ", null_value="", quote_style="never", include_header=False - ) + _format_and_write(df, cols, f) f.write(b"] / 2\n") @@ -254,11 +271,7 @@ def bounds_to_file( *signed_number(pl.col("upper")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def binaries_to_file( @@ -296,11 +309,7 @@ def binaries_to_file( *print_variable(pl.col("labels")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def integers_to_file( @@ -339,11 +348,7 @@ def integers_to_file( *print_variable(pl.col("labels")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def sos_to_file( @@ -399,11 +404,7 @@ def sos_to_file( pl.col("var_weights"), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def constraints_to_file( @@ -466,11 +467,7 @@ def constraints_to_file( pl.when(pl.col("is_last_in_group")).then(pl.col("rhs_str")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) # in the future, we could use lazy dataframes when they support appending # tp existent files From 940edc73c1ae1041473ea295a6ef4f5f63ace954 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:46:24 +0100 Subject: [PATCH 08/11] bench: increase knapsack model sizes to 100k Co-Authored-By: Claude Opus 4.5 --- dev-scripts/benchmark_lp_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py index 9f6dbae8..a9656a02 100644 --- a/dev-scripts/benchmark_lp_writer.py +++ b/dev-scripts/benchmark_lp_writer.py @@ -86,7 +86,7 @@ def main() -> None: benchmark_model(f"N={n}", basic_model(n), iterations) print("\nknapsack_model (N binary vars, 1 constraint with N terms):") - for n in [100, 500, 1000, 5000, 10000]: + for n in [100, 1000, 10000, 50000, 100000]: benchmark_model(f"N={n}", knapsack_model(n), iterations) print("\nPyPSA SciGrid-DE (realistic power system model):") From 9ac474b54d6525fcf580ab8d03637cb3e9e8c3cc Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:58:53 +0100 Subject: [PATCH 09/11] perf: reduce per-constraint overhead in Constraint.to_polars() Replace np.unique with faster numpy equality check for sign uniformity. Eliminate redundant filter_nulls_polars and check_has_nulls_polars on the short DataFrame by applying the labels mask directly during construction. Co-Authored-By: Claude Opus 4.5 --- linopy/constraints.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index c239be60..0bd124f0 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -625,27 +625,32 @@ def to_polars(self) -> pl.DataFrame: long = group_terms_polars(long) check_has_nulls_polars(long, name=f"{self.type} {self.name}") - # Build short DataFrame (labels, rhs) without xarray broadcast. - # Add sign separately to avoid costly numpy string→polars conversion. + # Build short DataFrame (labels, rhs, sign) without xarray broadcast. + # Apply labels mask directly instead of filter_nulls_polars. labels_flat = ds["labels"].values.reshape(-1) + mask = labels_flat != -1 + labels_masked = labels_flat[mask] rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1) - short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat}) - short = filter_nulls_polars(short) sign_values = ds["sign"].values - unique_signs = np.unique(sign_values) - if len(unique_signs) == 1: - short = short.with_columns( - pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") + sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) + all_same_sign = ( + sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all() + ) + + short_data: dict = { + "labels": labels_masked, + "rhs": rhs_flat[mask], + } + if all_same_sign: + short = pl.DataFrame(short_data).with_columns( + pl.lit(sign_flat[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") ) else: - sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) - # Apply same mask as filter_nulls (labels != -1) - sign_flat = sign_flat[labels_flat != -1] - short = short.with_columns( - pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="])) + short_data["sign"] = pl.Series( + "sign", sign_flat[mask], dtype=pl.Enum(["=", "<=", ">="]) ) - check_has_nulls_polars(short, name=f"{self.type} {self.name}") + short = pl.DataFrame(short_data) df = long.join(short, on="labels", how="inner") return df[["labels", "coeffs", "vars", "sign", "rhs"]] From 1919f3f781551a8b8f094ec3240cf0fd1e8713c4 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 19:51:57 +0100 Subject: [PATCH 10/11] Add better benchmark script --- dev-scripts/benchmark_lp_writer_commits.py | 237 +++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 dev-scripts/benchmark_lp_writer_commits.py diff --git a/dev-scripts/benchmark_lp_writer_commits.py b/dev-scripts/benchmark_lp_writer_commits.py new file mode 100644 index 00000000..ee5184ed --- /dev/null +++ b/dev-scripts/benchmark_lp_writer_commits.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Benchmark LP file writing performance across a series of commits. + +Creates git worktrees for each commit, installs linopy, runs benchmarks +in a subprocess, and prints a markdown comparison table. + +Usage: + python dev-scripts/benchmark_lp_writer_commits.py + python dev-scripts/benchmark_lp_writer_commits.py --commits abc1234 def5678 + python dev-scripts/benchmark_lp_writer_commits.py --baseline master_org +""" + +import argparse +import json +import subprocess +import sys +import tempfile +import textwrap + +# Default commits: the perf/lp-write-speed branch history +DEFAULT_COMMITS = [ + "master_org", + "ccb9cd2", + "8524c29", + "aab95f5", + "7762659", + "bdbb042", + "44b115f", + "9ac474b", +] + +# The benchmark script that runs inside each worktree +BENCH_SCRIPT = textwrap.dedent("""\ + import json + import sys + import tempfile + import time + from pathlib import Path + + import numpy as np + + from linopy import Model + + WARMUP = 2 + ITERATIONS = 8 + + + def basic_model(n): + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="c1") + m.add_constraints(x + y >= 0, name="c2") + m.add_objective((2 * x).sum() + y.sum()) + return m + + + def pypsa_model(): + try: + import pypsa + except ImportError: + return None + n = pypsa.examples.scigrid_de() + n.optimize.create_model() + return n.model + + + def bench(label, m): + with tempfile.TemporaryDirectory() as tmpdir: + for _ in range(WARMUP): + m.to_file(Path(tmpdir) / "warmup.lp", progress=False) + times = [] + for i in range(ITERATIONS): + fn = Path(tmpdir) / f"bench_{i}.lp" + start = time.perf_counter() + m.to_file(fn, progress=False) + times.append(time.perf_counter() - start) + return {"label": label, "mean": float(np.mean(times)), "std": float(np.std(times)), + "nvars": m.nvars, "ncons": m.ncons} + + + results = [] + results.append(bench("basic_model(N=100)", basic_model(100))) + results.append(bench("basic_model(N=500)", basic_model(500))) + + m = pypsa_model() + if m is not None: + results.append(bench("PyPSA scigrid-de", m)) + + json.dump(results, sys.stdout) +""") + + +def resolve_commit(ref: str) -> tuple[str, str]: + """Return (short_sha, subject) for a git ref.""" + out = subprocess.run( + ["git", "log", "-1", "--format=%h\t%s", ref], + capture_output=True, + text=True, + check=True, + ) + sha, subject = out.stdout.strip().split("\t", 1) + return sha, subject + + +def run_benchmark_at_commit(ref: str) -> list[dict]: + """Checkout commit in a worktree, install, run benchmark, return results.""" + sha, subject = resolve_commit(ref) + print(f"\n{'=' * 70}", file=sys.stderr) + print(f"Benchmarking: {sha} {subject}", file=sys.stderr) + print(f"{'=' * 70}", file=sys.stderr) + + with tempfile.TemporaryDirectory() as worktree_dir: + # Create worktree + subprocess.run( + ["git", "worktree", "add", "--detach", worktree_dir, ref], + check=True, + capture_output=True, + ) + try: + # Install in current environment + print(f" Installing linopy from {sha}...", file=sys.stderr) + subprocess.run( + [ + sys.executable, + "-m", + "pip", + "install", + "-e", + worktree_dir, + "-q", + "--no-deps", + ], + check=True, + capture_output=True, + ) + + # Run benchmark in subprocess (fresh import) + print(" Running benchmarks...", file=sys.stderr) + result = subprocess.run( + [sys.executable, "-c", BENCH_SCRIPT], + capture_output=True, + text=True, + check=True, + ) + return json.loads(result.stdout) + finally: + subprocess.run( + ["git", "worktree", "remove", "--force", worktree_dir], + capture_output=True, + ) + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark LP writer across commits") + parser.add_argument( + "--commits", + nargs="+", + default=DEFAULT_COMMITS, + help="Git refs to benchmark (first is baseline)", + ) + args = parser.parse_args() + + commits = args.commits + + # Collect results: {commit_ref: {label: {mean, std, ...}}} + all_results: dict[str, dict[str, dict]] = {} + commit_info: dict[str, tuple[str, str]] = {} # ref -> (sha, subject) + + for ref in commits: + sha, subject = resolve_commit(ref) + commit_info[ref] = (sha, subject) + results = run_benchmark_at_commit(ref) + all_results[ref] = {r["label"]: r for r in results} + + # Reinstall current version + print("\nReinstalling current worktree linopy...", file=sys.stderr) + subprocess.run( + [sys.executable, "-m", "pip", "install", "-e", ".", "-q", "--no-deps"], + capture_output=True, + ) + + # Get benchmark labels from first commit that has results + labels = list(next(iter(all_results.values())).keys()) + + # Print markdown table per benchmark + baseline_ref = commits[0] + print() + for label in labels: + baseline_data = all_results[baseline_ref].get(label) + if not baseline_data: + continue + + nvars = baseline_data["nvars"] + ncons = baseline_data["ncons"] + print(f"### {label} ({nvars:,} vars, {ncons:,} cons)\n") + print("| Commit | Description | Time (ms) | Δ vs prev | Δ vs baseline |") + print("|--------|-------------|-----------|-----------|---------------|") + + prev_mean = None + baseline_mean = baseline_data["mean"] + + for ref in commits: + sha, subject = commit_info[ref] + data = all_results[ref].get(label) + if not data: + continue + + mean_ms = data["mean"] * 1000 + std_ms = data["std"] * 1000 + + # Delta vs previous + if prev_mean is not None: + delta_prev = (data["mean"] - prev_mean) / prev_mean * 100 + delta_prev_str = f"{delta_prev:+.1f}%" + else: + delta_prev_str = "—" + + # Delta vs baseline + delta_base = (data["mean"] - baseline_mean) / baseline_mean * 100 + delta_base_str = f"{delta_base:+.1f}%" + + print( + f"| `{sha}` | {subject[:40]:40s} | " + f"{mean_ms:7.1f} ± {std_ms:4.1f} | " + f"{delta_prev_str:>9s} | {delta_base_str:>13s} |" + ) + + prev_mean = data["mean"] + + print() + + +if __name__ == "__main__": + main() From ac761c0888ba8f0305e18fc32e6bfdd24f26ef0d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 19:55:35 +0100 Subject: [PATCH 11/11] Add better benchmark script --- dev-scripts/benchmark_lp_writer_commits.py | 61 +++++++++++++++++++--- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/dev-scripts/benchmark_lp_writer_commits.py b/dev-scripts/benchmark_lp_writer_commits.py index ee5184ed..3c17a046 100644 --- a/dev-scripts/benchmark_lp_writer_commits.py +++ b/dev-scripts/benchmark_lp_writer_commits.py @@ -57,6 +57,18 @@ def basic_model(n): return m + def knapsack_model(n): + from numpy.random import default_rng + rng = default_rng(125) + m = Model() + packages = m.add_variables(coords=[np.arange(n)], binary=True) + weight = rng.integers(1, 100, size=n) + value = rng.integers(1, 100, size=n) + m.add_constraints((weight * packages).sum() <= 200) + m.add_objective(-(value * packages).sum()) + return m + + def pypsa_model(): try: import pypsa @@ -67,6 +79,18 @@ def pypsa_model(): return n.model + def pypsa_model_240h(): + try: + import pypsa + import pandas as pd + except ImportError: + return None + n = pypsa.examples.scigrid_de() + n.set_snapshots(pd.date_range('2011-01-01', periods=240, freq='h')) + n.optimize.create_model() + return n.model + + def bench(label, m): with tempfile.TemporaryDirectory() as tmpdir: for _ in range(WARMUP): @@ -82,12 +106,19 @@ def bench(label, m): results = [] - results.append(bench("basic_model(N=100)", basic_model(100))) - results.append(bench("basic_model(N=500)", basic_model(500))) + for n in [50, 100, 200, 500]: + results.append(bench(f"basic_model(N={n})", basic_model(n))) + + for n in [1000, 10000, 100000]: + results.append(bench(f"knapsack(N={n})", knapsack_model(n))) m = pypsa_model() if m is not None: - results.append(bench("PyPSA scigrid-de", m)) + results.append(bench("PyPSA scigrid-de 24h", m)) + + m = pypsa_model_240h() + if m is not None: + results.append(bench("PyPSA scigrid-de 240h", m)) json.dump(results, sys.stdout) """) @@ -120,7 +151,7 @@ def run_benchmark_at_commit(ref: str) -> list[dict]: capture_output=True, ) try: - # Install in current environment + # Install in current environment (non-editable + force to ensure clean) print(f" Installing linopy from {sha}...", file=sys.stderr) subprocess.run( [ @@ -128,23 +159,27 @@ def run_benchmark_at_commit(ref: str) -> list[dict]: "-m", "pip", "install", - "-e", worktree_dir, "-q", "--no-deps", + "--force-reinstall", ], check=True, capture_output=True, ) - # Run benchmark in subprocess (fresh import) + # Run benchmark in subprocess (fresh import, cwd=/ to avoid + # importing linopy from the repo working directory) print(" Running benchmarks...", file=sys.stderr) result = subprocess.run( [sys.executable, "-c", BENCH_SCRIPT], capture_output=True, text=True, - check=True, + cwd="/", ) + if result.returncode != 0: + print(f" FAILED! stderr:\n{result.stderr}", file=sys.stderr) + return [] return json.loads(result.stdout) finally: subprocess.run( @@ -178,7 +213,17 @@ def main(): # Reinstall current version print("\nReinstalling current worktree linopy...", file=sys.stderr) subprocess.run( - [sys.executable, "-m", "pip", "install", "-e", ".", "-q", "--no-deps"], + [ + sys.executable, + "-m", + "pip", + "install", + "-e", + ".", + "-q", + "--no-deps", + "--force-reinstall", + ], capture_output=True, )