diff --git a/doc/release_notes.rst b/doc/release_notes.rst index d9bc95aa..dfa22d21 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -4,6 +4,7 @@ Release Notes .. Upcoming Version * Fix LP file writing for negative zero (-0.0) values that produced invalid syntax like "+-0.0" rejected by Gurobi +* Fix expression merge to properly reindex coordinates when expressions have the same coordinate values in different order, preventing silent data corruption with ``join='override'``. For expressions with different coordinate subsets, use ``linopy.align(..., join='outer')`` before adding. Version 0.6.0 -------------- diff --git a/linopy/expressions.py b/linopy/expressions.py index 10e243de..a5b995c2 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -2122,6 +2122,40 @@ def merge( data = [e.data if isinstance(e, linopy_types) else e for e in exprs] data = [fill_missing_coords(ds, fill_helper_dims=True) for ds in data] + # When using join='override', xr.concat places values positionally instead of + # aligning by label. We need to reindex datasets that have the same coordinate + # values but in a different order to ensure proper alignment. + if override and len(data) > 1: + reference = data[0] + aligned_data = [reference] + for ds_item in data[1:]: + reindex_dims = {} + for dim_name in reference.dims: + if dim_name in HELPER_DIMS or dim_name not in ds_item.dims: + continue + if dim_name not in reference.coords or dim_name not in ds_item.coords: + continue # pragma: no cover + ref_coord = reference.coords[dim_name].values + ds_coord = ds_item.coords[dim_name].values + # Check: same length, same set of values, but different order + if len(ref_coord) == len(ds_coord) and not np.array_equal( + ref_coord, ds_coord + ): + try: + same_values = set(ref_coord) == set(ds_coord) + except TypeError: # pragma: no cover + # Unhashable types - convert to strings for comparison + same_values = {str(v) for v in ref_coord} == { + str(v) for v in ds_coord + } + if same_values: + reindex_dims[dim_name] = reference.coords[dim_name] + if reindex_dims: + aligned_data.append(ds_item.reindex(reindex_dims)) + else: + aligned_data.append(ds_item) + data = aligned_data + if not kwargs: kwargs = { "coords": "minimal", diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index a75ace3f..e66c8975 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -441,7 +441,7 @@ def test_linear_expression_sum( assert_linequal(expr.sum(["dim_0", TERM_DIM]), expr.sum("dim_0")) - # test special case otherride coords + # test special case override coords expr = v.loc[:9] + v.loc[10:] assert expr.nterm == 2 assert len(expr.coords["dim_2"]) == 10 @@ -465,7 +465,7 @@ def test_linear_expression_sum_with_const( assert_linequal(expr.sum(["dim_0", TERM_DIM]), expr.sum("dim_0")) - # test special case otherride coords + # test special case override coords expr = v.loc[:9] + v.loc[10:] assert expr.nterm == 2 assert len(expr.coords["dim_2"]) == 10 @@ -1194,6 +1194,66 @@ def test_merge(x: Variable, y: Variable, z: Variable) -> None: merge(expr1, expr2) +def test_merge_with_override_and_reordered_coords(m: Model) -> None: + """Test merge with join='override' when coordinates have same values but different order.""" + import pandas as pd + + # Create variables with same coordinate values but different order + coords_a = pd.Index(["x", "y", "z"], name="dim_0") + coords_b = pd.Index(["z", "x", "y"], name="dim_0") # Same values, different order + + v1 = m.add_variables(coords=[coords_a], name="v1") + v2 = m.add_variables(coords=[coords_b], name="v2") + + expr1 = 1 * v1 + expr2 = 2 * v2 + + # Merging along _term (default) triggers the override logic because + # both expressions have the same dimension sizes + res = merge([expr1, expr2], cls=LinearExpression) + + # Verify that the coordinates match the first expression's order + assert list(res.coords["dim_0"].values) == ["x", "y", "z"] + # The result should have 2 terms (one from each expression) + assert res.nterm == 2 + # Verify the coefficients are correctly aligned (not mismatched due to positional concat) + assert res.sel(dim_0="x").coeffs.values.tolist() == [1.0, 2.0] + assert res.sel(dim_0="z").coeffs.values.tolist() == [1.0, 2.0] + + +def test_align_with_overlapping_coords(m: Model) -> None: + """ + Test that linopy.align enables correct addition of expressions with + overlapping but different coordinate subsets. + """ + import pandas as pd + + from linopy import align + + coords_a = pd.Index(["alice", "bob"], name="person") + coords_b = pd.Index(["bob", "charlie"], name="person") + + v1 = m.add_variables(coords=[coords_a], name="ov1") + v2 = m.add_variables(coords=[coords_b], name="ov2") + + expr1, expr2 = align(1 * v1, 2 * v2, join="outer") + res = expr1 + expr2 + + # Union coords should be alice, bob, charlie + assert list(res.coords["person"].values) == ["alice", "bob", "charlie"] + assert res.nterm == 2 + # bob: in both → coeffs [1, 2] + assert res.sel(person="bob").coeffs.values.tolist() == [1.0, 2.0] + # alice: only in expr1 → first term has coeff 1, second is fill (nan) + alice_coeffs = res.sel(person="alice").coeffs.values + assert alice_coeffs[0] == 1.0 + assert np.isnan(alice_coeffs[1]) + # charlie: only in expr2 → first term is fill (nan), second has coeff 2 + charlie_coeffs = res.sel(person="charlie").coeffs.values + assert np.isnan(charlie_coeffs[0]) + assert charlie_coeffs[1] == 2.0 + + def test_linear_expression_outer_sum(x: Variable, y: Variable) -> None: expr = x + y expr2: LinearExpression = sum([x, y]) # type: ignore