Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions esmvalcore/cmor/_fixes/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
if TYPE_CHECKING:
from collections.abc import Sequence

import ncdata
import xarray as xr
from iris.coords import Coord
from iris.cube import Cube

Expand Down Expand Up @@ -84,10 +82,10 @@ def __init__(

def fix_file(
self,
file: str | Path | xr.Dataset | ncdata.NcData,
file: Path,
output_dir: Path, # noqa: ARG002
add_unique_suffix: bool = False, # noqa: ARG002
) -> str | Path | xr.Dataset | ncdata.NcData:
) -> Path | Sequence[Cube]:
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.

This is mainly intended to fix errors that prevent loading the data
Expand All @@ -98,11 +96,12 @@ def fix_file(

Warning
-------
A path should only be returned if it points to the original (unchanged)
file (i.e., a fix was not necessary). If a fix is necessary, this
function should return a :class:`~ncdata.NcData` or
:class:`~xarray.Dataset` object. Under no circumstances a copy of the
input data should be created (this is very inefficient).
For fix developers: a path should only be returned if it points to the
original (unchanged) file (i.e., a fix was not necessary). If a fix is
necessary, this function should return a :class:`~iris.cube.CubeList` or
a :class:`~collections.abc.Sequence` of :class:`~iris.cube.Cube`
objects. Under no circumstances a copy of the input data should be
created (this is very inefficient).

Parameters
----------
Expand All @@ -116,7 +115,7 @@ def fix_file(

Returns
-------
str | pathlib.Path | xr.Dataset | ncdata.NcData:
:
Fixed data or a path to them.

"""
Expand Down
58 changes: 37 additions & 21 deletions esmvalcore/cmor/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@

import logging
from collections import defaultdict
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any

from iris.cube import CubeList
from iris.cube import Cube, CubeList

from esmvalcore.cmor._fixes.fix import Fix
from esmvalcore.io.local import LocalFile

if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path

import ncdata
import xarray as xr
from iris.cube import Cube

from esmvalcore.config import Session

logger = logging.getLogger(__name__)
Expand All @@ -39,22 +35,14 @@ def fix_file( # noqa: PLR0913
session: Session | None = None,
frequency: str | None = None,
**extra_facets: Any,
) -> str | Path | xr.Dataset | ncdata.NcData:
) -> Path | Sequence[Cube]:
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.

This is mainly intended to fix errors that prevent loading the data with
Iris (e.g., those related to ``missing_value`` or ``_FillValue``) or
operations that are more efficient with other packages (e.g., loading files
with lots of variables is much faster with Xarray than Iris).

Warning
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed this as users cannot do anything about this, it seems more like a note for developers.

-------
A path should only be returned if it points to the original (unchanged)
file (i.e., a fix was not necessary). If a fix is necessary, this function
should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object.
Under no circumstances a copy of the input data should be created (this is
very inefficient).

Parameters
----------
file:
Expand All @@ -80,10 +68,15 @@ def fix_file( # noqa: PLR0913

Returns
-------
str | pathlib.Path | xr.Dataset | ncdata.NcData:
:
Fixed data or a path to them.

"""
if not isinstance(file, Path):
# Skip this function for `esmvalcore.io.DataElement` that is not a path
# to a file.
return file

# Update extra_facets with variable information given as regular arguments
# to this function
extra_facets.update(
Expand All @@ -96,6 +89,7 @@ def fix_file( # noqa: PLR0913
},
)

result: Path | Sequence[Cube] = Path(file)
for fix in Fix.get_fixes(
project=project,
dataset=dataset,
Expand All @@ -105,12 +99,34 @@ def fix_file( # noqa: PLR0913
session=session,
frequency=frequency,
):
file = fix.fix_file(
file,
result = fix.fix_file(
result,
output_dir,
add_unique_suffix=add_unique_suffix,
)
return file

if isinstance(file, LocalFile):
# This happens when this function is called from
# `esmvalcore.dataset.Dataset.load`.
if isinstance(result, Path):
if result == file:
# No fixes have been applied, return the original file.
result = file
else:
# The file has been fixed and the result is a path to the fixed
# file. The result needs to be loaded to read the global
# attributes for recording provenance.
fixed_file = LocalFile(result)
fixed_file.facets = file.facets
fixed_file.ignore_warnings = file.ignore_warnings
result = fixed_file.to_iris()

if isinstance(result, Sequence) and isinstance(result[0], Cube):
# Set the attributes for recording provenance here because
# to_iris will not be called on the original file.
file.attributes = result[0].attributes.globals.copy()

return result


def fix_metadata(
Expand Down
143 changes: 106 additions & 37 deletions tests/unit/cmor/test_fix.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
"""Unit tests for :mod:`esmvalcore.cmor.fix`."""

from __future__ import annotations

from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING
from unittest.mock import Mock, patch, sentinel

import iris
import iris.cube
import pytest

from esmvalcore.cmor.fix import Fix, fix_data, fix_file, fix_metadata
from esmvalcore.io.local import LocalFile
from esmvalcore.io.protocol import DataElement

if TYPE_CHECKING:
from pytest_mock import MockerFixture


class TestFixFile:
Expand All @@ -14,9 +25,9 @@ class TestFixFile:
@pytest.fixture(autouse=True)
def setUp(self):
"""Prepare for testing."""
self.filename = "filename"
self.filename = Path("filename")
self.mock_fix = Mock()
self.mock_fix.fix_file.return_value = "new_filename"
self.mock_fix.fix_file.return_value = Path("new_filename")
self.expected_get_fixes_call = {
"project": "project",
"dataset": "model",
Expand All @@ -33,48 +44,106 @@ def setUp(self):
"frequency": "frequency",
}

def test_fix(self):
def test_fix(self, mocker: MockerFixture) -> None:
"""Check that the returned fix is applied."""
with patch(
mock_get_fixes = mocker.patch(
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
return_value=[self.mock_fix],
) as mock_get_fixes:
file_returned = fix_file(
file="filename",
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
assert file_returned != self.filename
assert file_returned == "new_filename"
mock_get_fixes.assert_called_once_with(
**self.expected_get_fixes_call,
)
)
file_returned = fix_file(
file=Path("filename"),
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
assert file_returned != self.filename
assert file_returned == Path("new_filename")
mock_get_fixes.assert_called_once_with(
**self.expected_get_fixes_call,
)

def test_nofix(self):
def test_fix_returns_cubes(
self,
mocker: MockerFixture,
tmp_path: Path,
) -> None:
"""Check that the returned fix is applied."""
# Prepare some mock fixed data and save it to a file.
fixed_file = LocalFile(tmp_path / "new_filename.nc")
fixed_cube = iris.cube.Cube([0], var_name="tas")
fixed_cube.attributes.globals = {"a": "b"}
iris.save(fixed_cube, fixed_file)

# Set up a mock fix to that returns this data.
self.mock_fix.fix_file.return_value = fixed_file
mock_get_fixes = mocker.patch(
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
return_value=[self.mock_fix],
)

mock_input_file = LocalFile(self.filename)
result = fix_file(
file=mock_input_file,
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
# Check that a sequence of cubes is returned and that the attributes
# of the input file have been updated with the global attributes of the
# fixed cube for recording provenance.
assert isinstance(result, Sequence)
assert len(result) == 1
assert isinstance(result[0], iris.cube.Cube)
assert result[0].var_name == "tas"
assert "a" in mock_input_file.attributes
assert mock_input_file.attributes["a"] == "b"
mock_get_fixes.assert_called_once_with(
**self.expected_get_fixes_call,
)

def test_nofix(self, mocker: MockerFixture) -> None:
"""Check that the same file is returned if no fix is available."""
with patch(
mock_get_fixes = mocker.patch(
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
return_value=[],
) as mock_get_fixes:
file_returned = fix_file(
file="filename",
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
assert file_returned == self.filename
mock_get_fixes.assert_called_once_with(
**self.expected_get_fixes_call,
)
)
file_returned = fix_file(
file=Path("filename"),
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
assert file_returned == self.filename
mock_get_fixes.assert_called_once_with(
**self.expected_get_fixes_call,
)

def test_nofix_if_not_path(self, mocker: MockerFixture) -> None:
"""Check that the same object is returned if the input is not a Path."""
mock_data_element = mocker.create_autospec(DataElement, instance=True)
file_returned = fix_file(
file=mock_data_element,
short_name="short_name",
project="project",
dataset="model",
mip="mip",
output_dir=Path("output_dir"),
session=sentinel.session,
frequency="frequency",
)
assert file_returned is mock_data_element


class TestGetCube:
Expand Down