From d51cda8153cf625b193c003306f48192c3c7f5a5 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:38:33 +0200 Subject: [PATCH 01/13] fix(variables): broadcast and order pandas/DataArray bounds in coords `add_variables` had two related bugs when `lower`/`upper` were arrays: - pandas Series/DataFrame bounds missing a dimension in `coords` had the missing dimension silently dropped (#709), unlike DataArray bounds which were already broadcast. - DataArray bounds missing a dimension were expanded with `DataArray.expand_dims`, which prepends new dimensions and produces a `coords`-mismatched dimension order in the resulting variable (#706). The order depended on the type of the bounds, so scalar bounds worked but two array bounds missing the same dimension did not. Replace `_validate_dataarray_bounds` plus the downstream `as_dataarray(..., coords)` call with a single helper `_as_dataarray_in_coords`. It converts any input (pandas with named axes via `to_xarray`, otherwise via `as_dataarray`), validates the result against `coords`, expands missing dims, transposes to coords order, and reconstructs the coord variables in that order. `expand_dims` and `transpose` are no-ops when the array already matches, so scalar / full-dim DataArray bounds keep their fast path. Also fix `linopy.piecewise._broadcast_points`, which built the `expand_dims` map from a `set`, producing a hash-randomized dimension order across processes. Iterate expressions and dims in declaration order instead. Closes #706 and #709. Supersedes #710 and #719. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 3 + linopy/model.py | 94 ++++++++++++++++++++++++------ linopy/piecewise.py | 20 +++---- test/test_piecewise_constraints.py | 17 ++++++ test/test_variable.py | 70 +++++++++++++++++++--- 5 files changed, 167 insertions(+), 37 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 7883db82..64bfddf6 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,6 +52,9 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** +* ``Model.add_variables`` now broadcasts pandas ``Series``/``DataFrame`` bounds that are missing dimensions in ``coords`` to the full coords shape, matching ``DataArray`` behaviour. Previously the missing dimension was silently dropped when both bounds were pandas objects (`#709 `__). +* ``Model.add_variables`` now preserves ``coords`` dimension order when broadcasting ``DataArray``/pandas bounds that are missing dimensions. Previously the result depended on the bound type — scalar bounds kept ``coords`` order, while two array bounds missing the same dimension produced a prepended order (`#706 `__). +* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. diff --git a/linopy/model.py b/linopy/model.py index 48a8200b..3aaeec62 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -126,27 +126,70 @@ def _coords_to_dict( return result -def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any: +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: """ - Validate and expand DataArray bounds against explicit coords. + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Multi-level columns are unstacked so each level becomes its own dimension. + Returns ``None`` if any axis (or MultiIndex level) is unnamed, signalling + that the caller should fall back to ``as_dataarray``. + """ + if isinstance(arr, pd.DataFrame): + while isinstance(arr, pd.DataFrame): + arr = arr.unstack() + if not isinstance(arr, pd.Series): + return None + + index = arr.index + if isinstance(index, pd.MultiIndex): + if any(n is None for n in index.names): + return None + elif index.name is None: + return None + + return arr.to_xarray() - If ``arr`` is not a DataArray, return it unchanged (``as_dataarray`` - will handle conversion). For DataArray inputs: - - Raises ``ValueError`` if the array has dimensions not in coords. - - Raises ``ValueError`` if shared dimension coordinates don't match. - - Expands missing dimensions via ``expand_dims``. +def _as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: """ - if not isinstance(arr, DataArray): - return arr + Convert ``arr`` to a DataArray broadcast against the model ``coords``. + + Folds the conversion (scalars, numpy arrays, pandas, ``DataArray``) and + the coords-driven broadcast into a single step: + + - Pandas inputs with fully named axes are converted via ``to_xarray`` so + their axis names map to dimensions; other inputs go through + ``as_dataarray``. + - Raises ``ValueError`` if the resulting array has dimensions not in + ``coords``. + - Raises ``ValueError`` if shared dimension coordinates differ in values; + same-values-different-order coordinates are reindexed. + - Missing dimensions are expanded; the result is transposed so the + dimension and coordinate order follow ``coords``. Both operations are + no-ops when the array already matches. + """ + if coords is None: + return as_dataarray(arr, coords, **kwargs) expected = _coords_to_dict(coords) if not expected: - return arr + return as_dataarray(arr, coords, **kwargs) + + orig_type_name = type(arr).__name__ + + if isinstance(arr, pd.Series | pd.DataFrame): + converted = _named_pandas_to_dataarray(arr) + if converted is not None: + arr = converted + + if not isinstance(arr, DataArray): + return as_dataarray(arr, coords, **kwargs) extra = set(arr.dims) - set(expected) if extra: - raise ValueError(f"DataArray has extra dimensions not in coords: {extra}") + raise ValueError( + f"{orig_type_name} has extra dimensions not in coords: {extra}" + ) for dim, coord_values in expected.items(): if dim not in arr.dims: @@ -171,11 +214,30 @@ def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any: f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" ) - # Expand missing dimensions + # expand_dims prepends new dimensions and their coordinate variables; + # the subsequent transpose restores coords order. Both are no-ops when + # the array already matches. Reconstruct so the DataArray's coords + # iteration order also follows coords (a Dataset built from this picks + # up its dim order from coord insertion). expand = {k: v for k, v in expected.items() if k not in arr.dims} if expand: arr = arr.expand_dims(expand) + target_dims = tuple(d for d in expected if d in arr.dims) + tuple( + d for d in arr.dims if d not in expected + ) + arr = arr.transpose(*target_dims) + + coord_order = [c for c in target_dims if c in arr.coords] + [ + c for c in arr.coords if c not in target_dims + ] + if list(arr.coords) != coord_order: + arr = DataArray( + arr.variable, + coords={c: arr.coords[c] for c in coord_order}, + name=arr.name, + ) + return arr @@ -765,14 +827,10 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) - if coords is not None: - lower = _validate_dataarray_bounds(lower, coords) - upper = _validate_dataarray_bounds(upper, coords) - data = Dataset( { - "lower": as_dataarray(lower, coords, **kwargs), - "upper": as_dataarray(upper, coords, **kwargs), + "lower": _as_dataarray_in_coords(lower, coords, **kwargs), + "upper": _as_dataarray_in_coords(upper, coords, **kwargs), "labels": -1, } ) diff --git a/linopy/piecewise.py b/linopy/piecewise.py index ccc265a7..25a0ce17 100644 --- a/linopy/piecewise.py +++ b/linopy/piecewise.py @@ -1006,20 +1006,18 @@ def _broadcast_points( lin_exprs = [_to_linexpr(e) for e in exprs] - target_dims: set[str] = set() - for le in lin_exprs: - target_dims.update(str(d) for d in le.coord_dims) - - missing = target_dims - skip - {str(d) for d in points.dims} - if not missing: - return points + point_dims = {str(d) for d in points.dims} + # Iterate exprs/dims in order; a set would give a hash-dependent, + # run-varying expanded dimension order. expand_map: dict[str, list] = {} - for d in missing: - for le in lin_exprs: + for le in lin_exprs: + for dim in le.coord_dims: + d = str(dim) + if d in skip or d in point_dims or d in expand_map: + continue if d in le.coords: - expand_map[str(d)] = list(le.coords[d].values) - break + expand_map[d] = list(le.coords[d].values) if expand_map: points = points.expand_dims(expand_map) diff --git a/test/test_piecewise_constraints.py b/test/test_piecewise_constraints.py index c44af394..72b57265 100644 --- a/test/test_piecewise_constraints.py +++ b/test/test_piecewise_constraints.py @@ -1383,6 +1383,23 @@ def test_broadcast_over_extra_dims(self) -> None: assert "generator" in delta.dims assert "time" in delta.dims + def test_broadcast_points_dim_order_follows_exprs(self) -> None: + """Expanded dims follow the expression dim order, not set ordering.""" + import xarray as xr + + from linopy.piecewise import BREAKPOINT_DIM, _broadcast_points + + m = Model() + coords = [ + pd.Index(["v0", "v1"], name="alpha"), + pd.Index(["w0", "w1"], name="beta"), + pd.Index([0, 1], name="gamma"), + ] + x = m.add_variables(coords=coords, name="x") + points = xr.DataArray([0, 1, 2, 3], dims=[BREAKPOINT_DIM]) + out = _broadcast_points(points, 1 * x) + assert out.dims == ("alpha", "beta", "gamma", BREAKPOINT_DIM) + # =========================================================================== # NaN masking diff --git a/test/test_variable.py b/test/test_variable.py index b14b746e..61bedf73 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -434,17 +434,71 @@ def test_dataarray_extra_dims(self, model: "Model") -> None: # -- Broadcasting missing dims ----------------------------------------- - def test_dataarray_broadcast_missing_dim(self, model: "Model") -> None: + @pytest.mark.parametrize( + "bound", + [ + pytest.param( + DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}), + id="DataArray", + ), + pytest.param( + pd.Series(index=pd.RangeIndex(3, name="time"), data=[1, 2, 3]), + id="Series", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.Index(["red"], name="colour"), + data=[[1], [2], [3]], + ), + id="DataFrame", + ), + ], + ) + def test_bound_broadcast_missing_dim( + self, model: "Model", bound: DataArray | pd.Series | pd.DataFrame + ) -> None: + """Pandas / DataArray bounds missing dims are broadcast to coords.""" time = pd.RangeIndex(3, name="time") space = pd.Index(["a", "b"], name="space") - lower = DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}) - var = model.add_variables(lower=lower, coords=[time, space], name="x") - assert set(var.data.dims) == {"time", "space"} - assert var.data.sizes == {"time": 3, "space": 2} - # Verify broadcast filled with actual values, not NaN + colour = pd.Index(["red"], name="colour") + var = model.add_variables( + lower=-bound, upper=bound, coords=[time, space, colour], name="x" + ) + assert var.dims == ("time", "space", "colour") + assert var.data.lower.dims == ("time", "space", "colour") + assert var.data.upper.dims == ("time", "space", "colour") + assert var.data.sizes == {"time": 3, "space": 2, "colour": 1} assert not var.data.lower.isnull().any() - assert (var.data.lower.sel(space="a") == [1, 2, 3]).all() - assert (var.data.lower.sel(space="b") == [1, 2, 3]).all() + assert (var.data.lower.sel(space="a", colour="red") == [-1, -2, -3]).all() + assert (var.data.lower.sel(space="b", colour="red") == [-1, -2, -3]).all() + assert (var.data.upper.sel(space="a", colour="red") == [1, 2, 3]).all() + + @pytest.mark.parametrize( + "lower, upper", + [ + pytest.param(0, "da", id="scalar-lower+da-upper"), + pytest.param("da", 1, id="da-lower+scalar-upper"), + pytest.param("da", "da", id="da-lower+da-upper"), + ], + ) + def test_dataarray_broadcast_missing_dim_order( + self, model: "Model", lower: Any, upper: Any + ) -> None: + """Dimension order follows coords, not the type of the bounds (#706).""" + x = pd.Index(["a", "b", "c"], name="x") + y = pd.Index(["X", "Y"], name="y") + full = DataArray( + np.arange(6).reshape(3, 2), coords={"x": x, "y": y}, dims=["x", "y"] + ) + # bounds are DataArrays missing the 'y' dimension + da = full.sum("y") + lower = da if lower == "da" else lower + upper = da if upper == "da" else upper + var = model.add_variables(lower=lower, upper=upper, coords=[x, y], name="x") + assert var.dims == ("x", "y") + assert var.data.lower.dims == ("x", "y") + assert var.data.upper.dims == ("x", "y") # -- Special coord formats --------------------------------------------- From a246006fb3c57fc37d28a956645df2bbaf9c0b16 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:42:53 +0200 Subject: [PATCH 02/13] docs(variables): frame add_variables coords as source of truth Restate #706/#709's fix as a single principle in the docstring, release note, and `_as_dataarray_in_coords` helper docstring: when `coords` is provided to `add_variables`, it is the source of truth for dimensions, dimension order, and coordinate values, and `lower` / `upper` are broadcast and aligned to match. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 3 +-- linopy/model.py | 40 +++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 64bfddf6..1ee464ad 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,8 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables`` now broadcasts pandas ``Series``/``DataFrame`` bounds that are missing dimensions in ``coords`` to the full coords shape, matching ``DataArray`` behaviour. Previously the missing dimension was silently dropped when both bounds were pandas objects (`#709 `__). -* ``Model.add_variables`` now preserves ``coords`` dimension order when broadcasting ``DataArray``/pandas bounds that are missing dimensions. Previously the result depended on the bound type — scalar bounds kept ``coords`` order, while two array bounds missing the same dimension produced a prepended order (`#706 `__). +* ``Model.add_variables``: when ``coords`` is provided, it is now the source of truth for the variable's dimensions, dimension order, and coordinate values. ``lower`` / ``upper`` bounds (scalar, ``numpy``, pandas, ``DataArray``) are broadcast and aligned to match ``coords``. Previously pandas bounds missing a dimension silently dropped it (`#709 `__), and the dimension order of expanded array bounds depended on the type of the bound (`#706 `__). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. diff --git a/linopy/model.py b/linopy/model.py index 3aaeec62..f699cfa8 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -152,21 +152,21 @@ def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | Non def _as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: """ - Convert ``arr`` to a DataArray broadcast against the model ``coords``. - - Folds the conversion (scalars, numpy arrays, pandas, ``DataArray``) and - the coords-driven broadcast into a single step: - - - Pandas inputs with fully named axes are converted via ``to_xarray`` so - their axis names map to dimensions; other inputs go through - ``as_dataarray``. - - Raises ``ValueError`` if the resulting array has dimensions not in + Coerce ``arr`` into a DataArray that matches the model ``coords``. + + ``coords`` is the source of truth: the returned DataArray has the + dimensions, dimension order, and coordinate values of ``coords``, + regardless of the input type. Pandas inputs with fully named axes + are converted via ``to_xarray`` so their axis names map to + dimensions; scalars, numpy arrays, and unnamed pandas go through + ``as_dataarray``. The result is then validated, expanded over + missing dims, and transposed; ``expand_dims`` and ``transpose`` + are no-ops when the array already matches. + + - Raises ``ValueError`` if the input has dimensions not in ``coords``. - - Raises ``ValueError`` if shared dimension coordinates differ in values; - same-values-different-order coordinates are reindexed. - - Missing dimensions are expanded; the result is transposed so the - dimension and coordinate order follow ``coords``. Both operations are - no-ops when the array already matches. + - Raises ``ValueError`` if shared dimension coordinates differ in + values. Same-values-different-order coordinates are reindexed. """ if coords is None: return as_dataarray(arr, coords, **kwargs) @@ -745,11 +745,13 @@ def add_variables( Upper bound of the variable(s). Ignored if `binary` is True. The default is inf. coords : list/xarray.Coordinates, optional - The coords of the variable array. - These are directly passed to the DataArray creation of - `lower` and `upper`. For every single combination of - coordinates a optimization variable is added to the model. - The default is None. + The coords of the variable array. When provided, ``coords`` + is the source of truth for the variable's dimensions, + dimension order, and coordinate values; ``lower`` and + ``upper`` are broadcast and aligned to match. One + optimization variable is added per combination of + coordinates. The default is None, in which case the shape + is inferred from the bounds. name : str, optional Reference name of the added variables. The default None results in a name like "var1", "var2" etc. From aa0c80dbc936552eb43227cd564ead2105d2385b Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:44:24 +0200 Subject: [PATCH 03/13] docs: frame bounds fix as extending 0.7.0's coords-as-truth fix 0.7.0 already shipped "add_variables no longer ignores coords when lower / upper are DataArrays". Recast the new bullet as extending that fix to the remaining gaps (pandas bounds; dim order across bound types) so the continuity is visible from the release notes. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 1ee464ad..046012ce 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables``: when ``coords`` is provided, it is now the source of truth for the variable's dimensions, dimension order, and coordinate values. ``lower`` / ``upper`` bounds (scalar, ``numpy``, pandas, ``DataArray``) are broadcast and aligned to match ``coords``. Previously pandas bounds missing a dimension silently dropped it (`#709 `__), and the dimension order of expanded array bounds depended on the type of the bound (`#706 `__). +* ``Model.add_variables``: extend the 0.7.0 fix that made ``coords`` the source of truth for ``DataArray`` bounds to the remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are now broadcast to ``coords`` (previously silently dropped when both bounds were pandas, `#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (previously the order of expanded array bounds depended on the type of the bound, `#706 `__). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. From 4ddc3c20bc6a74ac3fdc495d079ae0cc3f9fc8fa Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:44:47 +0200 Subject: [PATCH 04/13] docs: reword as "extend and finalize", emphasize hardening Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 046012ce..c8ba5fc4 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables``: extend the 0.7.0 fix that made ``coords`` the source of truth for ``DataArray`` bounds to the remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are now broadcast to ``coords`` (previously silently dropped when both bounds were pandas, `#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (previously the order of expanded array bounds depended on the type of the bound, `#706 `__). +* ``Model.add_variables``: extend and finalize the 0.7.0 fix that made ``coords`` the source of truth for ``DataArray`` bounds, hardening it across the remaining bound types and dimension order. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are now broadcast to ``coords`` (previously silently dropped when both bounds were pandas, `#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (previously the order of expanded array bounds depended on the type of the bound, `#706 `__). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. From 5557a9f463ae42413366bde6174ca3f9dd8c72a2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:45:24 +0200 Subject: [PATCH 05/13] docs: rephrase as "0.7.0 made ... this release closes the two remaining gaps" Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index c8ba5fc4..64074240 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables``: extend and finalize the 0.7.0 fix that made ``coords`` the source of truth for ``DataArray`` bounds, hardening it across the remaining bound types and dimension order. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are now broadcast to ``coords`` (previously silently dropped when both bounds were pandas, `#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (previously the order of expanded array bounds depended on the type of the bound, `#706 `__). +* ``Model.add_variables``: 0.7.0 made ``coords`` the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. From cdc987b1071ddf429c96bfb917a3bce04bafc727 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:46:52 +0200 Subject: [PATCH 06/13] docs: spell out dims/order/values in coords-as-truth bullet Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 64074240..3edc1c18 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,7 +52,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``Model.add_variables``: 0.7.0 made ``coords`` the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). +* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. From 001d07152f583a74d7c2b89655bb5743b20ef1b5 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 00:58:16 +0200 Subject: [PATCH 07/13] test(variables): cover pandas MultiIndex bounds and dim reindex - Parametrize test_bound_broadcast_missing_dim with three additional cases: Series with MultiIndex(time, colour), DataFrame with MultiIndex columns(space, colour), and DataFrame with MultiIndex index(time, space). Exercises the `while DataFrame: unstack()` loop and the MultiIndex branch of `_named_pandas_to_dataarray`. - Add test_dataarray_coord_reorder for the same-values-different-order reindex branch (previously only the unequal-values raise was covered). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/test_variable.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/test_variable.py b/test/test_variable.py index 61bedf73..5ce76237 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -432,6 +432,14 @@ def test_dataarray_extra_dims(self, model: "Model") -> None: with pytest.raises(ValueError, match="extra dimensions"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") + def test_dataarray_coord_reorder(self, model: "Model") -> None: + """A bound whose coords differ only in order is reindexed to coords.""" + lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]}) + var = model.add_variables( + lower=lower, coords=[pd.Index(["a", "b", "c"], name="x")], name="x" + ) + assert (var.data.lower == [1, 2, 3]).all() + # -- Broadcasting missing dims ----------------------------------------- @pytest.mark.parametrize( @@ -453,6 +461,35 @@ def test_dataarray_extra_dims(self, model: "Model") -> None: ), id="DataFrame", ), + pytest.param( + pd.Series( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["red"]], names=("time", "colour") + ), + data=[1, 2, 3], + ), + id="Series-multiindex", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.MultiIndex.from_product( + [["a", "b"], ["red"]], names=("space", "colour") + ), + data=[[1, 1], [2, 2], [3, 3]], + ), + id="DataFrame-multicolumns", + ), + pytest.param( + pd.DataFrame( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["a", "b"]], names=("time", "space") + ), + columns=pd.Index(["red"], name="colour"), + data=[[1], [1], [2], [2], [3], [3]], + ), + id="DataFrame-multiindex", + ), ], ) def test_bound_broadcast_missing_dim( From bca89e7829ab7de209c68a6cd597da46a99a0c50 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 01:10:18 +0200 Subject: [PATCH 08/13] refactor: move as_dataarray_in_coords to common.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate `_as_dataarray_in_coords` and its helpers (`_coords_to_dict`, `_named_pandas_to_dataarray`) from `model.py` into `common.py`, alongside the existing `as_dataarray` they parallel. Rename to `as_dataarray_in_coords` (no leading underscore) since it is no longer file-local — other modules can import the strict-coords variant when migrating call sites. Pure relocation: no behavior change, no call-site changes beyond `add_variables`'s import. Refs #723. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 132 +++++++++++++++++++++++++++++++++++++++++++++- linopy/model.py | 134 ++--------------------------------------------- 2 files changed, 134 insertions(+), 132 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index e9a38d29..92a6b0e5 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -9,7 +9,7 @@ import operator import os -from collections.abc import Callable, Generator, Hashable, Iterable, Sequence +from collections.abc import Callable, Generator, Hashable, Iterable, Mapping, Sequence from functools import cached_property, partial, reduce, wraps from pathlib import Path from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload @@ -275,6 +275,136 @@ def as_dataarray( return arr +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index | DataArray] | Mapping, +) -> dict[str, Any]: + """Normalize coords to a dict mapping dim names to coordinate values.""" + if isinstance(coords, Mapping): + return dict(coords) + # Sequence of indexes + result: dict[str, Any] = {} + for c in coords: + if isinstance(c, pd.Index) and c.name: + result[c.name] = c + return result + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Multi-level columns are unstacked so each level becomes its own dimension. + Returns ``None`` if any axis (or MultiIndex level) is unnamed, signalling + that the caller should fall back to ``as_dataarray``. + """ + if isinstance(arr, pd.DataFrame): + while isinstance(arr, pd.DataFrame): + arr = arr.unstack() + if not isinstance(arr, pd.Series): + return None + + index = arr.index + if isinstance(index, pd.MultiIndex): + if any(n is None for n in index.names): + return None + elif index.name is None: + return None + + return arr.to_xarray() + + +def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: + """ + Coerce ``arr`` into a DataArray that matches ``coords``. + + Strict-coords counterpart to ``as_dataarray``: ``coords`` is the + source of truth, so the returned DataArray has the dimensions, + dimension order, and coordinate values of ``coords``, regardless + of the input type. Pandas inputs with fully named axes are + converted via ``to_xarray`` so their axis names map to dimensions; + scalars, numpy arrays, and unnamed pandas go through + ``as_dataarray``. The result is then validated, expanded over + missing dims, and transposed; ``expand_dims`` and ``transpose`` + are no-ops when the array already matches. + + - Raises ``ValueError`` if the input has dimensions not in + ``coords``. + - Raises ``ValueError`` if shared dimension coordinates differ in + values. Same-values-different-order coordinates are reindexed. + """ + if coords is None: + return as_dataarray(arr, coords, **kwargs) + + expected = _coords_to_dict(coords) + if not expected: + return as_dataarray(arr, coords, **kwargs) + + orig_type_name = type(arr).__name__ + + if isinstance(arr, pd.Series | pd.DataFrame): + converted = _named_pandas_to_dataarray(arr) + if converted is not None: + arr = converted + + if not isinstance(arr, DataArray): + return as_dataarray(arr, coords, **kwargs) + + extra = set(arr.dims) - set(expected) + if extra: + raise ValueError( + f"{orig_type_name} has extra dimensions not in coords: {extra}" + ) + + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + if isinstance(arr.indexes.get(dim), pd.MultiIndex): + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + # Same values, different order → reindex to match expected order + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) + else: + raise ValueError( + f"Coordinates for dimension '{dim}' do not match: " + f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" + ) + + # expand_dims prepends new dimensions and their coordinate variables; + # the subsequent transpose restores coords order. Both are no-ops when + # the array already matches. Reconstruct so the DataArray's coords + # iteration order also follows coords (a Dataset built from this picks + # up its dim order from coord insertion). + expand = {k: v for k, v in expected.items() if k not in arr.dims} + if expand: + arr = arr.expand_dims(expand) + + target_dims = tuple(d for d in expected if d in arr.dims) + tuple( + d for d in arr.dims if d not in expected + ) + arr = arr.transpose(*target_dims) + + coord_order = [c for c in target_dims if c in arr.coords] + [ + c for c in arr.coords if c not in target_dims + ] + if list(arr.coords) != coord_order: + arr = DataArray( + arr.variable, + coords={c: arr.coords[c] for c in coord_order}, + name=arr.name, + ) + + return arr + + def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: """ Broadcast a boolean mask to match the shape of labels. diff --git a/linopy/model.py b/linopy/model.py index f699cfa8..2adadf2b 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -29,6 +29,7 @@ from linopy import solvers from linopy.common import ( as_dataarray, + as_dataarray_in_coords, assign_multiindex_safe, best_int, broadcast_mask, @@ -112,135 +113,6 @@ logger = logging.getLogger(__name__) -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping, -) -> dict[str, Any]: - """Normalize coords to a dict mapping dim names to coordinate values.""" - if isinstance(coords, Mapping): - return dict(coords) - # Sequence of indexes - result: dict[str, Any] = {} - for c in coords: - if isinstance(c, pd.Index) and c.name: - result[c.name] = c - return result - - -def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: - """ - Convert a pandas Series or DataFrame with fully named axes to a DataArray. - - Multi-level columns are unstacked so each level becomes its own dimension. - Returns ``None`` if any axis (or MultiIndex level) is unnamed, signalling - that the caller should fall back to ``as_dataarray``. - """ - if isinstance(arr, pd.DataFrame): - while isinstance(arr, pd.DataFrame): - arr = arr.unstack() - if not isinstance(arr, pd.Series): - return None - - index = arr.index - if isinstance(index, pd.MultiIndex): - if any(n is None for n in index.names): - return None - elif index.name is None: - return None - - return arr.to_xarray() - - -def _as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: - """ - Coerce ``arr`` into a DataArray that matches the model ``coords``. - - ``coords`` is the source of truth: the returned DataArray has the - dimensions, dimension order, and coordinate values of ``coords``, - regardless of the input type. Pandas inputs with fully named axes - are converted via ``to_xarray`` so their axis names map to - dimensions; scalars, numpy arrays, and unnamed pandas go through - ``as_dataarray``. The result is then validated, expanded over - missing dims, and transposed; ``expand_dims`` and ``transpose`` - are no-ops when the array already matches. - - - Raises ``ValueError`` if the input has dimensions not in - ``coords``. - - Raises ``ValueError`` if shared dimension coordinates differ in - values. Same-values-different-order coordinates are reindexed. - """ - if coords is None: - return as_dataarray(arr, coords, **kwargs) - - expected = _coords_to_dict(coords) - if not expected: - return as_dataarray(arr, coords, **kwargs) - - orig_type_name = type(arr).__name__ - - if isinstance(arr, pd.Series | pd.DataFrame): - converted = _named_pandas_to_dataarray(arr) - if converted is not None: - arr = converted - - if not isinstance(arr, DataArray): - return as_dataarray(arr, coords, **kwargs) - - extra = set(arr.dims) - set(expected) - if extra: - raise ValueError( - f"{orig_type_name} has extra dimensions not in coords: {extra}" - ) - - for dim, coord_values in expected.items(): - if dim not in arr.dims: - continue - if isinstance(arr.indexes.get(dim), pd.MultiIndex): - continue - expected_idx = ( - coord_values - if isinstance(coord_values, pd.Index) - else pd.Index(coord_values) - ) - actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - # Same values, different order → reindex to match expected order - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - else: - raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" - ) - - # expand_dims prepends new dimensions and their coordinate variables; - # the subsequent transpose restores coords order. Both are no-ops when - # the array already matches. Reconstruct so the DataArray's coords - # iteration order also follows coords (a Dataset built from this picks - # up its dim order from coord insertion). - expand = {k: v for k, v in expected.items() if k not in arr.dims} - if expand: - arr = arr.expand_dims(expand) - - target_dims = tuple(d for d in expected if d in arr.dims) + tuple( - d for d in arr.dims if d not in expected - ) - arr = arr.transpose(*target_dims) - - coord_order = [c for c in target_dims if c in arr.coords] + [ - c for c in arr.coords if c not in target_dims - ] - if list(arr.coords) != coord_order: - arr = DataArray( - arr.variable, - coords={c: arr.coords[c] for c in coord_order}, - name=arr.name, - ) - - return arr - - class Model: """ Linear optimization model. @@ -831,8 +703,8 @@ def add_variables( data = Dataset( { - "lower": _as_dataarray_in_coords(lower, coords, **kwargs), - "upper": _as_dataarray_in_coords(upper, coords, **kwargs), + "lower": as_dataarray_in_coords(lower, coords, **kwargs), + "upper": as_dataarray_in_coords(upper, coords, **kwargs), "labels": -1, } ) From b28f3dfef316f363c36530a4f9f23903cc714099 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 13:15:50 +0200 Subject: [PATCH 09/13] refactor(common): simplify _named_pandas_to_dataarray + cover edge branches Replace the unstack-while-loop / split named-check structure with a single up-front "all axes named" check and a single ``DataFrame.stack(level=list(range(nlevels)), future_stack=True)`` call that collapses all column levels into the row MultiIndex in one shot. Same observable behaviour, fewer moving parts, no defensive unreachable branches. Add tests covering the unnamed-axis fall-through path, the empty-coords short-circuit in ``as_dataarray_in_coords``, and the ``MultiIndex``-on-a-dim ``continue`` in the validation loop. Together with the restructure these bring the new helper code to full patch coverage. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 23 ++++++++++------------- test/test_variable.py | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index 92a6b0e5..d7869f24 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -293,23 +293,20 @@ def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | Non """ Convert a pandas Series or DataFrame with fully named axes to a DataArray. - Multi-level columns are unstacked so each level becomes its own dimension. - Returns ``None`` if any axis (or MultiIndex level) is unnamed, signalling - that the caller should fall back to ``as_dataarray``. + DataFrame columns (and column-MultiIndex levels) are stacked into the row + MultiIndex so each axis name becomes its own dimension. Returns ``None`` + if any axis (or MultiIndex level) is unnamed, so the caller can fall back + to ``as_dataarray``. """ + names = list(arr.index.names) if isinstance(arr, pd.DataFrame): - while isinstance(arr, pd.DataFrame): - arr = arr.unstack() - if not isinstance(arr, pd.Series): - return None - - index = arr.index - if isinstance(index, pd.MultiIndex): - if any(n is None for n in index.names): - return None - elif index.name is None: + names += list(arr.columns.names) + if any(n is None for n in names): return None + if isinstance(arr, pd.DataFrame): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + return arr.to_xarray() diff --git a/test/test_variable.py b/test/test_variable.py index 5ce76237..1f80a8ed 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -440,6 +440,31 @@ def test_dataarray_coord_reorder(self, model: "Model") -> None: ) assert (var.data.lower == [1, 2, 3]).all() + def test_pandas_bound_with_unnamed_axis_falls_through(self, model: "Model") -> None: + """Pandas bound with any unnamed axis falls through to as_dataarray.""" + unnamed_series = pd.Series([1, 2, 3]) + var = model.add_variables( + upper=unnamed_series, + coords=[pd.Index([0, 1, 2], name="dim_0")], + name="x", + ) + assert (var.data.upper.values.flatten() == [1, 2, 3]).all() + + def test_unnamed_coords_short_circuit(self, model: "Model") -> None: + """Coords as a list of unnamed indexes leaves the bound unchanged.""" + bound = DataArray([1, 2, 3], dims=["dim_0"]) + var = model.add_variables(upper=bound, coords=[pd.Index([0, 1, 2])], name="x") + assert (var.data.upper == [1, 2, 3]).all() + + def test_dataarray_bound_with_multiindex_coord(self, model: "Model") -> None: + """A DataArray bound carrying a MultiIndex coord skips the value check.""" + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=("l1", "l2")) + midx.name = "multi" + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": midx}) + var = model.add_variables(upper=bound, coords=[midx], name="x") + assert var.shape == (4,) + assert (var.data.upper == [1, 2, 3, 4]).all() + # -- Broadcasting missing dims ----------------------------------------- @pytest.mark.parametrize( From 9b4d7cc55fb494c63323a6baee8dbb57eca36d56 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 13:50:12 +0200 Subject: [PATCH 10/13] fix(common): only accept string axis names in _named_pandas_to_dataarray Pandas allows any hashable in ``pd.Index.names`` (tuples, ints, etc.), but only strings map cleanly to xarray dim names. Reject anything non-string up front so the pandas falls back to ``as_dataarray`` instead of producing a DataArray with an awkward non-string dim name that downstream validation would reject with a confusing "extra dimensions" error. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linopy/common.py b/linopy/common.py index d7869f24..a0e0f1a8 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -301,7 +301,9 @@ def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | Non names = list(arr.index.names) if isinstance(arr, pd.DataFrame): names += list(arr.columns.names) - if any(n is None for n in names): + # pd.Index.names entries can be any hashable (tuples, ints, ...). Only + # strings map cleanly to xarray dim names; everything else falls through. + if any(not isinstance(n, str) for n in names): return None if isinstance(arr, pd.DataFrame): From 7705156a517cd1c818bc1aaddfadc748aa98b6a0 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 15:11:53 +0200 Subject: [PATCH 11/13] fix(common): align positional inputs to coords, with clear shape errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inputs without their own meaningful labels — numpy arrays, polars Series, pandas with unnamed axes — fell through ``as_dataarray_in_coords`` via a short-circuit return. That meant: - The default ``dim_0`` / ``dim_1`` axis names from ``as_dataarray`` leaked into the result, so a pandas Series without an index name combined with another bound carrying a named coord produced a spurious 2-D variable. - Shape mismatches surfaced further downstream as confusing "coordinates do not match" errors against the auto-generated ``RangeIndex``. The fall-through now: (a) defaults ``dims`` to coords' keys so axes get labelled correctly; (b) runs the same validate / expand / transpose path as labelled inputs; (c) re-assigns coords from ``expected`` on the resulting DataArray so positional inputs align to coords by position. A shape mismatch surfaces as xarray's clear ``conflicting sizes`` from ``assign_coords``. MultiIndex coords are left alone (re-assigning a PandasMultiIndex emits a FutureWarning). Replaces the tautological ``test_pandas_bound_with_unnamed_axis_falls_through`` (which sneaked past by naming the coord ``"dim_0"`` to match the auto-generated dim) with ``test_positional_bound_aligns_to_coords`` that asserts actual positional alignment across numpy / Series / DataFrame, plus ``test_positional_bound_wrong_size_raises_clear_error`` for the shape-mismatch path. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 20 +++++++++++++++++- test/test_variable.py | 48 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index a0e0f1a8..51566079 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -346,7 +346,25 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: arr = converted if not isinstance(arr, DataArray): - return as_dataarray(arr, coords, **kwargs) + # numpy/polars/unnamed-pandas inputs are positional — their only + # meaningful information is the values; any axis labels are + # auto-generated. Default dims to coords' keys so as_dataarray + # labels axes correctly (instead of dim_0/dim_1), then re-assign + # coords from expected so positional inputs align to coords by + # position. A shape mismatch surfaces here as a clear xarray + # "conflicting sizes" error rather than a confusing + # "coordinates do not match" further down. + kwargs.setdefault("dims", list(expected)) + arr = as_dataarray(arr, coords, **kwargs) + # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits + # a FutureWarning and isn't needed (as_dataarray already used it). + arr = arr.assign_coords( + { + d: expected[d] + for d in arr.dims + if d in expected and not isinstance(arr.indexes.get(d), pd.MultiIndex) + } + ) extra = set(arr.dims) - set(expected) if extra: diff --git a/test/test_variable.py b/test/test_variable.py index 1f80a8ed..1a49abd6 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -440,15 +440,47 @@ def test_dataarray_coord_reorder(self, model: "Model") -> None: ) assert (var.data.lower == [1, 2, 3]).all() - def test_pandas_bound_with_unnamed_axis_falls_through(self, model: "Model") -> None: - """Pandas bound with any unnamed axis falls through to as_dataarray.""" - unnamed_series = pd.Series([1, 2, 3]) - var = model.add_variables( - upper=unnamed_series, - coords=[pd.Index([0, 1, 2], name="dim_0")], - name="x", + def test_positional_bound_aligns_to_coords(self, model: "Model") -> None: + """ + Numpy / unnamed-pandas bounds align to coords positionally, + even when the input's auto-generated coord values would not match. + """ + coords = [pd.Index(list("abc"), name="x")] + # numpy array — no labels at all, positional alignment. + v_np = model.add_variables(upper=np.array([1, 2, 3]), coords=coords, name="np") + assert v_np.dims == ("x",) + assert (v_np.data.upper.sel(x="a") == 1).all() + assert (v_np.data.upper.sel(x="c") == 3).all() + # Unnamed Series — pandas index is auto-generated, ignored in favour + # of coords (positional alignment, principle: coords is source of truth). + v_s = model.add_variables( + upper=pd.Series([10, 20, 30]), coords=coords, name="s" + ) + assert v_s.dims == ("x",) + assert (v_s.data.upper.sel(x="a") == 10).all() + assert (v_s.data.upper.sel(x="c") == 30).all() + # Unnamed DataFrame — both axes positional. + v_df = model.add_variables( + upper=pd.DataFrame([[1, 2], [3, 4], [5, 6]]), + coords=[pd.Index(list("abc"), name="x"), pd.Index(list("xy"), name="y")], + name="df", ) - assert (var.data.upper.values.flatten() == [1, 2, 3]).all() + assert v_df.dims == ("x", "y") + assert (v_df.data.upper.sel(x="a", y="x") == 1).all() + assert (v_df.data.upper.sel(x="c", y="y") == 6).all() + + def test_positional_bound_wrong_size_raises_clear_error( + self, model: "Model" + ) -> None: + """ + Shape mismatch on positional inputs surfaces as a size error, + not a 'coordinates do not match' error. + """ + coords = [pd.Index(list("abc"), name="x")] + with pytest.raises(Exception, match="conflicting sizes|do not match"): + model.add_variables(upper=np.array([1, 2]), coords=coords, name="np_bad") + with pytest.raises(Exception, match="conflicting sizes|do not match"): + model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") def test_unnamed_coords_short_circuit(self, model: "Model") -> None: """Coords as a list of unnamed indexes leaves the bound unchanged.""" From 26f3e73bebc0d45a665ad0b59c8c3604aedfe942 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 15:23:30 +0200 Subject: [PATCH 12/13] fix(sos): use var.indexes[d] for reformulated bounds; widen _coords_to_dict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``reformulate_sos1`` / ``reformulate_sos2`` built the coords for the indicator variable as ``[var.coords[d] for d in var.dims]``, which is a list of ``xarray.DataArray`` coord objects. The rest of linopy passes ``coords`` as a list of ``pd.Index``. The mix slipped through under the old short-circuit fall-through but broke once the helper started defaulting ``dims`` from ``_coords_to_dict(coords)`` — non-``pd.Index`` entries were silently dropped, so ``len(dims) < len(coords)`` and xarray raised ``different number of dimensions on data and dims: 2 vs 1``. Use ``var.indexes[d]`` instead — it returns the actual ``pd.Index`` (regular or MultiIndex) for the dim and preserves structure that ``pd.Index(coord.values, ...)`` would flatten. Also widen ``_coords_to_dict`` to accept any entry with a ``.name`` (xarray DataArrays included) so a future caller passing mixed types doesn't silently lose coords. The reformulator fix removes the only known producer of mixed-type coords; this is belt-and-suspenders. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 7 ++++--- linopy/sos_reformulation.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index 51566079..db3d8095 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -281,11 +281,12 @@ def _coords_to_dict( """Normalize coords to a dict mapping dim names to coordinate values.""" if isinstance(coords, Mapping): return dict(coords) - # Sequence of indexes + # Sequence of named coords (pd.Index, xarray DataArray, etc.). result: dict[str, Any] = {} for c in coords: - if isinstance(c, pd.Index) and c.name: - result[c.name] = c + name = getattr(c, "name", None) + if name: + result[name] = c return result diff --git a/linopy/sos_reformulation.py b/linopy/sos_reformulation.py index 1f17ee92..4abfb755 100644 --- a/linopy/sos_reformulation.py +++ b/linopy/sos_reformulation.py @@ -119,7 +119,7 @@ def reformulate_sos1( upper_name = f"{prefix}{name}_upper" card_name = f"{prefix}{name}_card" - coords = [var.coords[d] for d in var.dims] + coords = [var.indexes[d] for d in var.dims] y = model.add_variables(coords=coords, name=y_name, binary=True) model.add_constraints(var <= M * y, name=upper_name) @@ -173,9 +173,9 @@ def reformulate_sos2( card_name = f"{prefix}{name}_card" z_coords = [ - pd.Index(var.coords[sos_dim].values[:-1], name=sos_dim) + pd.Index(var.indexes[sos_dim][:-1], name=sos_dim) if d == sos_dim - else var.coords[d] + else var.indexes[d] for d in var.dims ] z = model.add_variables(coords=z_coords, name=z_name, binary=True) From 095b510152d0513086b6f46cf2f98cbadb026d1f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 15:37:15 +0200 Subject: [PATCH 13/13] fix(common): tighten _coords_to_dict to raise on non-pd.Index entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the permissive ``getattr(c, "name", None)`` check with an explicit allow-list: ``pd.Index`` (named or not — unnamed silently skip as before) and unnamed sequences (``list`` / ``tuple`` / ``range`` / ``numpy.ndarray``). Any other type (notably ``xarray.DataArray``, but also ``pd.Series`` and friends) now raises ``TypeError`` with a hint to pass ``variable.indexes[]`` instead. This would have caught the SOS-reformulator bug at the source instead of letting it surface as a confusing xarray error about mismatched dim counts ten frames down. Drop ``DataArray`` from the matching ``coords`` type hints in ``model.py`` / ``expressions.py`` so the documented and runtime type sets agree. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 28 ++++++++++++++++++++++------ linopy/expressions.py | 2 +- linopy/model.py | 12 ++++++------ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index db3d8095..dce26a7a 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -276,17 +276,33 @@ def as_dataarray( def _coords_to_dict( - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping, + coords: Sequence[Sequence | pd.Index] | Mapping, ) -> dict[str, Any]: - """Normalize coords to a dict mapping dim names to coordinate values.""" + """ + Normalize coords to a dict mapping dim names to coordinate values. + + Entries must be ``pd.Index`` (named or not) or unnamed sequences + (``list`` / ``tuple`` / ``range`` / ``np.ndarray``). Other types — + notably ``xarray.DataArray`` — raise ``TypeError`` rather than + being silently dropped: callers should convert via + ``variable.indexes[]`` (or ``pd.Index(...)``) first. + """ if isinstance(coords, Mapping): return dict(coords) - # Sequence of named coords (pd.Index, xarray DataArray, etc.). result: dict[str, Any] = {} for c in coords: - name = getattr(c, "name", None) - if name: - result[name] = c + if isinstance(c, pd.Index): + if c.name: + result[c.name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + pass # unnamed sequence contributes no named dim + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) return result diff --git a/linopy/expressions.py b/linopy/expressions.py index 2ab0b8d3..674c987c 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1844,7 +1844,7 @@ def from_rule( cls, model: Model, rule: Callable, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, ) -> LinearExpression: """ Create a linear expression from a rule and a set of coordinates. diff --git a/linopy/model.py b/linopy/model.py index 2adadf2b..6132fb00 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -591,7 +591,7 @@ def add_variables( self, lower: Any = -inf, upper: Any = inf, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, name: str | None = None, mask: DataArray | ndarray | Series | None = None, binary: bool = False, @@ -823,7 +823,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[False] = ..., ) -> Constraint: ... @@ -839,7 +839,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[True] = ..., ) -> CSRConstraint: ... @@ -854,7 +854,7 @@ def add_constraints( sign: SignLike | None = None, rhs: ConstantLike | VariableLike | ExpressionLike | None = None, name: str | None = None, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, mask: MaskLike | None = None, freeze: bool | None = None, ) -> ConstraintBase: @@ -1360,7 +1360,7 @@ def calculate_block_maps(self) -> None: @overload def linexpr( - self, *args: Sequence[Sequence | pd.Index | DataArray] | Mapping + self, *args: Sequence[Sequence | pd.Index] | Mapping ) -> LinearExpression: ... @overload @@ -1373,7 +1373,7 @@ def linexpr( *args: tuple[ConstantLike, str | Variable | ScalarVariable] | ConstantLike | Callable - | Sequence[Sequence | pd.Index | DataArray] + | Sequence[Sequence | pd.Index] | Mapping, ) -> LinearExpression: """