From 403ea2870c91c8e86cb2ef53b28d751e5e46705f Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 18:08:38 +0100 Subject: [PATCH 1/6] Start on fixing fix_file --- esmvalcore/cmor/fix.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 4f66ffda25..5c6fb7e240 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -9,15 +9,16 @@ import logging from collections import defaultdict +from pathlib import Path from typing import TYPE_CHECKING, Any from iris.cube import CubeList from esmvalcore.cmor._fixes.fix import Fix +from esmvalcore.io.local import LocalFile if TYPE_CHECKING: from collections.abc import Sequence - from pathlib import Path import ncdata import xarray as xr @@ -84,6 +85,19 @@ def fix_file( # noqa: PLR0913 Fixed data or a path to them. """ + # TODO: the code in `esmvalcore.preprocessor.preprocess` called from + # `esmvalcore.dataset.Dataset.load` currently relies on this function + # returning an esmvalcore.io.local.LocalFile (or an iris.cube.Cube or a + # list of those). Maybe this function could be updated so it returns a + # CubeList instead of a xr.Dataset or ncdata.NcData object? + # All fix_file methods currently seem to return a Path, so this is not a + # problem just yet. + if not isinstance(file, Path): + # Skip this function for anything that is not a path to a file. + # TODO: it would be nice to make this work for any + # `esmvalcore.io.DataElement`. + return file + # Update extra_facets with variable information given as regular arguments # to this function extra_facets.update( @@ -96,6 +110,7 @@ def fix_file( # noqa: PLR0913 }, ) + result = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -105,12 +120,20 @@ def fix_file( # noqa: PLR0913 session=session, frequency=frequency, ): - file = fix.fix_file( - file, + result = fix.fix_file( + result, output_dir, add_unique_suffix=add_unique_suffix, ) - return file + + if isinstance(file, LocalFile): + result = LocalFile(result) + result.facets = file.facets + result.ignore_warnings = file.ignore_warnings + result.to_iris() + file.attributes = result.attributes + + return result def fix_metadata( From 8610779f05a623e2a4b17c3900364afd0f02f05e Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 21:11:51 +0100 Subject: [PATCH 2/6] Improve compatibility with esmvalcore.preprocessor --- esmvalcore/cmor/fix.py | 46 +++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 5c6fb7e240..5f093a436a 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -20,8 +20,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - import ncdata - import xarray as xr from iris.cube import Cube from esmvalcore.config import Session @@ -40,7 +38,7 @@ def fix_file( # noqa: PLR0913 session: Session | None = None, frequency: str | None = None, **extra_facets: Any, -) -> str | Path | xr.Dataset | ncdata.NcData: +) -> Path | CubeList: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data with @@ -52,7 +50,7 @@ def fix_file( # noqa: PLR0913 ------- A path should only be returned if it points to the original (unchanged) file (i.e., a fix was not necessary). If a fix is necessary, this function - should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object. + should return a :class:`~iris.cube.CubeList`. Under no circumstances a copy of the input data should be created (this is very inefficient). @@ -81,21 +79,13 @@ def fix_file( # noqa: PLR0913 Returns ------- - str | pathlib.Path | xr.Dataset | ncdata.NcData: + : Fixed data or a path to them. """ - # TODO: the code in `esmvalcore.preprocessor.preprocess` called from - # `esmvalcore.dataset.Dataset.load` currently relies on this function - # returning an esmvalcore.io.local.LocalFile (or an iris.cube.Cube or a - # list of those). Maybe this function could be updated so it returns a - # CubeList instead of a xr.Dataset or ncdata.NcData object? - # All fix_file methods currently seem to return a Path, so this is not a - # problem just yet. if not isinstance(file, Path): - # Skip this function for anything that is not a path to a file. - # TODO: it would be nice to make this work for any - # `esmvalcore.io.DataElement`. + # Skip this function for `esmvalcore.io.DataElement` that is not a path + # to a file. return file # Update extra_facets with variable information given as regular arguments @@ -110,7 +100,7 @@ def fix_file( # noqa: PLR0913 }, ) - result = Path(file) + result: Path | CubeList = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -127,11 +117,25 @@ def fix_file( # noqa: PLR0913 ) if isinstance(file, LocalFile): - result = LocalFile(result) - result.facets = file.facets - result.ignore_warnings = file.ignore_warnings - result.to_iris() - file.attributes = result.attributes + # This happens when this function is called from + # `esmvalcore.dataset.Dataset.load`. + if isinstance(result, Path): + if result == file: + # No fixes have been applied, return the original file. + result = file + else: + # The file has been fixed and the result is a path to the fixed + # file. The result needs to be loaded to read the global + # attributes for recording provenance. + fixed_file = LocalFile(result) + fixed_file.facets = file.facets + fixed_file.ignore_warnings = file.ignore_warnings + result = fixed_file.to_iris() + + if isinstance(result, CubeList): + # Set the attributes for recording provenance here because + # to_iris will not be called on the original file. + file.attributes = result[0].attributes.globals.copy() return result From 41d59cd4f9596c2f8698a65f6ef5fcc27336b4ee Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 21:30:50 +0100 Subject: [PATCH 3/6] Use correct type for function argument --- tests/unit/cmor/test_fix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/cmor/test_fix.py b/tests/unit/cmor/test_fix.py index 7b5fc3d0ba..c54bbba7b7 100644 --- a/tests/unit/cmor/test_fix.py +++ b/tests/unit/cmor/test_fix.py @@ -14,7 +14,7 @@ class TestFixFile: @pytest.fixture(autouse=True) def setUp(self): """Prepare for testing.""" - self.filename = "filename" + self.filename = Path("filename") self.mock_fix = Mock() self.mock_fix.fix_file.return_value = "new_filename" self.expected_get_fixes_call = { @@ -40,7 +40,7 @@ def test_fix(self): return_value=[self.mock_fix], ) as mock_get_fixes: file_returned = fix_file( - file="filename", + file=Path("filename"), short_name="short_name", project="project", dataset="model", @@ -62,7 +62,7 @@ def test_nofix(self): return_value=[], ) as mock_get_fixes: file_returned = fix_file( - file="filename", + file=Path("filename"), short_name="short_name", project="project", dataset="model", From 68ce98aa9f351e6dcfe7abbdaff5a601f9f77df1 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:39:10 +0100 Subject: [PATCH 4/6] Further updates to type hints --- esmvalcore/cmor/_fixes/fix.py | 8 +++----- esmvalcore/cmor/fix.py | 13 +++++-------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 2fbd9856fb..cb61fabae6 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -35,8 +35,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - import ncdata - import xarray as xr from iris.coords import Coord from iris.cube import Cube @@ -84,10 +82,10 @@ def __init__( def fix_file( self, - file: str | Path | xr.Dataset | ncdata.NcData, + file: Path, output_dir: Path, # noqa: ARG002 add_unique_suffix: bool = False, # noqa: ARG002 - ) -> str | Path | xr.Dataset | ncdata.NcData: + ) -> Path | Sequence[Cube]: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data @@ -116,7 +114,7 @@ def fix_file( Returns ------- - str | pathlib.Path | xr.Dataset | ncdata.NcData: + : Fixed data or a path to them. """ diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 5f093a436a..3f6bc4f60d 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -9,19 +9,16 @@ import logging from collections import defaultdict +from collections.abc import Sequence from pathlib import Path from typing import TYPE_CHECKING, Any -from iris.cube import CubeList +from iris.cube import Cube, CubeList from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.io.local import LocalFile if TYPE_CHECKING: - from collections.abc import Sequence - - from iris.cube import Cube - from esmvalcore.config import Session logger = logging.getLogger(__name__) @@ -38,7 +35,7 @@ def fix_file( # noqa: PLR0913 session: Session | None = None, frequency: str | None = None, **extra_facets: Any, -) -> Path | CubeList: +) -> Path | Sequence[Cube]: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data with @@ -100,7 +97,7 @@ def fix_file( # noqa: PLR0913 }, ) - result: Path | CubeList = Path(file) + result: Path | Sequence[Cube] = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -132,7 +129,7 @@ def fix_file( # noqa: PLR0913 fixed_file.ignore_warnings = file.ignore_warnings result = fixed_file.to_iris() - if isinstance(result, CubeList): + if isinstance(result, Sequence) and isinstance(result[0], Cube): # Set the attributes for recording provenance here because # to_iris will not be called on the original file. file.attributes = result[0].attributes.globals.copy() From cf2bb52d595c787c54b4c2f1324c90312f335fbf Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:58:33 +0100 Subject: [PATCH 5/6] Update docs --- esmvalcore/cmor/_fixes/fix.py | 11 ++++++----- esmvalcore/cmor/fix.py | 8 -------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index cb61fabae6..ac25a3a762 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -96,11 +96,12 @@ def fix_file( Warning ------- - A path should only be returned if it points to the original (unchanged) - file (i.e., a fix was not necessary). If a fix is necessary, this - function should return a :class:`~ncdata.NcData` or - :class:`~xarray.Dataset` object. Under no circumstances a copy of the - input data should be created (this is very inefficient). + For fix developers: a path should only be returned if it points to the + original (unchanged) file (i.e., a fix was not necessary). If a fix is + necessary, this function should return a :class:`~iris.cube.CubeList` or + a :class:`~collections.abc.Sequence` of :class:`~iris.cube.Cube` + objects. Under no circumstances a copy of the input data should be + created (this is very inefficient). Parameters ---------- diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 3f6bc4f60d..ef23022846 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -43,14 +43,6 @@ def fix_file( # noqa: PLR0913 operations that are more efficient with other packages (e.g., loading files with lots of variables is much faster with Xarray than Iris). - Warning - ------- - A path should only be returned if it points to the original (unchanged) - file (i.e., a fix was not necessary). If a fix is necessary, this function - should return a :class:`~iris.cube.CubeList`. - Under no circumstances a copy of the input data should be created (this is - very inefficient). - Parameters ---------- file: From f375c69ae232791658466967b292f913b5939653 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 14:44:29 +0100 Subject: [PATCH 6/6] Add more tests --- tests/unit/cmor/test_fix.py | 141 +++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 36 deletions(-) diff --git a/tests/unit/cmor/test_fix.py b/tests/unit/cmor/test_fix.py index c54bbba7b7..80f37f32c8 100644 --- a/tests/unit/cmor/test_fix.py +++ b/tests/unit/cmor/test_fix.py @@ -1,11 +1,22 @@ """Unit tests for :mod:`esmvalcore.cmor.fix`.""" +from __future__ import annotations + +from collections.abc import Sequence from pathlib import Path +from typing import TYPE_CHECKING from unittest.mock import Mock, patch, sentinel +import iris +import iris.cube import pytest from esmvalcore.cmor.fix import Fix, fix_data, fix_file, fix_metadata +from esmvalcore.io.local import LocalFile +from esmvalcore.io.protocol import DataElement + +if TYPE_CHECKING: + from pytest_mock import MockerFixture class TestFixFile: @@ -16,7 +27,7 @@ def setUp(self): """Prepare for testing.""" self.filename = Path("filename") self.mock_fix = Mock() - self.mock_fix.fix_file.return_value = "new_filename" + self.mock_fix.fix_file.return_value = Path("new_filename") self.expected_get_fixes_call = { "project": "project", "dataset": "model", @@ -33,48 +44,106 @@ def setUp(self): "frequency": "frequency", } - def test_fix(self): + def test_fix(self, mocker: MockerFixture) -> None: """Check that the returned fix is applied.""" - with patch( + mock_get_fixes = mocker.patch( "esmvalcore.cmor._fixes.fix.Fix.get_fixes", return_value=[self.mock_fix], - ) as mock_get_fixes: - file_returned = fix_file( - file=Path("filename"), - short_name="short_name", - project="project", - dataset="model", - mip="mip", - output_dir=Path("output_dir"), - session=sentinel.session, - frequency="frequency", - ) - assert file_returned != self.filename - assert file_returned == "new_filename" - mock_get_fixes.assert_called_once_with( - **self.expected_get_fixes_call, - ) + ) + file_returned = fix_file( + file=Path("filename"), + short_name="short_name", + project="project", + dataset="model", + mip="mip", + output_dir=Path("output_dir"), + session=sentinel.session, + frequency="frequency", + ) + assert file_returned != self.filename + assert file_returned == Path("new_filename") + mock_get_fixes.assert_called_once_with( + **self.expected_get_fixes_call, + ) - def test_nofix(self): + def test_fix_returns_cubes( + self, + mocker: MockerFixture, + tmp_path: Path, + ) -> None: + """Check that the returned fix is applied.""" + # Prepare some mock fixed data and save it to a file. + fixed_file = LocalFile(tmp_path / "new_filename.nc") + fixed_cube = iris.cube.Cube([0], var_name="tas") + fixed_cube.attributes.globals = {"a": "b"} + iris.save(fixed_cube, fixed_file) + + # Set up a mock fix to that returns this data. + self.mock_fix.fix_file.return_value = fixed_file + mock_get_fixes = mocker.patch( + "esmvalcore.cmor._fixes.fix.Fix.get_fixes", + return_value=[self.mock_fix], + ) + + mock_input_file = LocalFile(self.filename) + result = fix_file( + file=mock_input_file, + short_name="short_name", + project="project", + dataset="model", + mip="mip", + output_dir=Path("output_dir"), + session=sentinel.session, + frequency="frequency", + ) + # Check that a sequence of cubes is returned and that the attributes + # of the input file have been updated with the global attributes of the + # fixed cube for recording provenance. + assert isinstance(result, Sequence) + assert len(result) == 1 + assert isinstance(result[0], iris.cube.Cube) + assert result[0].var_name == "tas" + assert "a" in mock_input_file.attributes + assert mock_input_file.attributes["a"] == "b" + mock_get_fixes.assert_called_once_with( + **self.expected_get_fixes_call, + ) + + def test_nofix(self, mocker: MockerFixture) -> None: """Check that the same file is returned if no fix is available.""" - with patch( + mock_get_fixes = mocker.patch( "esmvalcore.cmor._fixes.fix.Fix.get_fixes", return_value=[], - ) as mock_get_fixes: - file_returned = fix_file( - file=Path("filename"), - short_name="short_name", - project="project", - dataset="model", - mip="mip", - output_dir=Path("output_dir"), - session=sentinel.session, - frequency="frequency", - ) - assert file_returned == self.filename - mock_get_fixes.assert_called_once_with( - **self.expected_get_fixes_call, - ) + ) + file_returned = fix_file( + file=Path("filename"), + short_name="short_name", + project="project", + dataset="model", + mip="mip", + output_dir=Path("output_dir"), + session=sentinel.session, + frequency="frequency", + ) + assert file_returned == self.filename + mock_get_fixes.assert_called_once_with( + **self.expected_get_fixes_call, + ) + + def test_nofix_if_not_path(self, mocker: MockerFixture) -> None: + """Check that the same object is returned if the input is not a Path.""" + mock_data_element = mocker.create_autospec(DataElement, instance=True) + file_returned = fix_file( + file=mock_data_element, + short_name="short_name", + project="project", + dataset="model", + mip="mip", + output_dir=Path("output_dir"), + session=sentinel.session, + frequency="frequency", + ) + assert file_returned is mock_data_element class TestGetCube: