diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b0a5b1c..0a810112 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,13 @@ repos: # Run the formatter. - id: ruff-format +# Type checking with mypy +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.15.0 + hooks: + - id: mypy + additional_dependencies: ['types-PyYAML', 'types-requests'] + # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell rev: v2.4.2 diff --git a/atlite/__init__.py b/atlite/__init__.py index d21aaf5c..03a78626 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -33,16 +33,16 @@ __version__ = version("atlite") # e.g. "0.17.0" # TODO, in the network structure it should use the dev version match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__) -assert match, f"Could not determine release_version of pypsa: {__version__}" +assert match, f"Could not determine release_version of atlite: {__version__}" release_version = match.group(0) __all__ = [ - Cutout, - ExclusionContainer, - compute_indicatormatrix, - regrid, - cspinstallations, - solarpanels, - windturbines, - __version__, + "Cutout", + "ExclusionContainer", + "compute_indicatormatrix", + "regrid", + "cspinstallations", + "solarpanels", + "windturbines", + "__version__", ] diff --git a/atlite/_types.py b/atlite/_types.py new file mode 100644 index 00000000..98bb6a00 --- /dev/null +++ b/atlite/_types.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Contributors to atlite +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Literal, TypeAlias, TypedDict + +import geopandas as gpd +import numpy as np +import scipy.sparse as sp +import xarray as xr +from pyproj import CRS +from shapely.geometry.base import BaseGeometry + +NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] +NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] +NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset +PathLike: TypeAlias = str | Path +NumericArray: TypeAlias = NDArray | DataArray +Number: TypeAlias = int | float | np.number[Any] +GeoDataFrame: TypeAlias = gpd.GeoDataFrame +GeoSeries: TypeAlias = gpd.GeoSeries +Geometry: TypeAlias = BaseGeometry +CrsLike: TypeAlias = str | int | CRS | dict[str, Any] | None +SparseMatrix: TypeAlias = sp.lil_matrix | sp.csr_matrix + +TrackingType: TypeAlias = ( + Literal["horizontal", "tilted_horizontal", "vertical", "dual"] | None +) +ClearskyModel: TypeAlias = Literal["simple", "enhanced"] +TrigonModel: TypeAlias = Literal["simple", "perez"] +IrradiationType: TypeAlias = Literal["total", "direct", "diffuse", "ground"] +HeatPumpSource: TypeAlias = Literal["air", "soil"] +OrientationName: TypeAlias = Literal["latitude_optimal", "constant", "latitude"] +DataFormat: TypeAlias = Literal["grib", "netcdf"] + + +class ERA5RetrievalParams(TypedDict, total=False): + product: str + area: list[float] + grid: str + chunks: dict[str, int] | None + tmpdir: str | Path | None + lock: Any | None + data_format: Literal["grib", "netcdf"] + year: list[str] + month: list[str] | str + day: list[str] | str + time: str | list[str] + variable: str | list[str] diff --git a/atlite/aggregate.py b/atlite/aggregate.py index e3d3b3a4..2f6b90d2 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -1,29 +1,58 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for aggregating results. -""" +"""Functions for aggregating results.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, cast import dask import xarray as xr +if TYPE_CHECKING: + import pandas as pd + from scipy.sparse import spmatrix + + from atlite._types import DataArray + + +def aggregate_matrix( + da: DataArray, + matrix: spmatrix, + index: pd.Index, +) -> DataArray: + """ + Aggregate spatial data with a sparse matrix. + + Parameters + ---------- + da : xarray.DataArray + DataArray with spatial dimensions ``y`` and ``x``. + matrix : scipy.sparse.spmatrix + Aggregation matrix mapping flattened spatial cells to ``index``. + index : pandas.Index + Index defining the aggregated dimension. -def aggregate_matrix(da, matrix, index): + Returns + ------- + xarray.DataArray + Aggregated data indexed by ``index`` and, if present, time. + """ if index.name is None: index = index.rename("dim_0") if isinstance(da.data, dask.array.core.Array): da = da.stack(spatial=("y", "x")) - da = da.chunk(dict(spatial=-1)) - return xr.apply_ufunc( + da = da.chunk({"spatial": -1}) + result = xr.apply_ufunc( lambda da: da * matrix.T, da, input_core_dims=[["spatial"]], output_core_dims=[[index.name]], dask="parallelized", output_dtypes=[da.dtype], - dask_gufunc_kwargs=dict(output_sizes={index.name: index.size}), + dask_gufunc_kwargs={"output_sizes": {index.name: index.size}}, ).assign_coords(**{index.name: index}) - else: - da = da.stack(spatial=("y", "x")).transpose("spatial", "time") - return xr.DataArray(matrix * da, [index, da.coords["time"]]) + return cast("DataArray", result) + da = da.stack(spatial=("y", "x")).transpose("spatial", "time") + return xr.DataArray(matrix * da, [index, da.coords["time"]]) diff --git a/atlite/convert.py b/atlite/convert.py index 250cb785..14dd99cd 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -All functions for converting weather data into energy system model data. -""" +"""All functions for converting weather data into energy system model data.""" from __future__ import annotations @@ -13,7 +11,7 @@ from collections import namedtuple from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any, Literal import geopandas as gpd import numpy as np @@ -44,34 +42,50 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from atlite.resource import TurbineConfig + from collections.abc import Callable + + from atlite._types import ( + ClearskyModel, + DataArray, + Dataset, + HeatPumpSource, + IrradiationType, + NumericArray, + OrientationName, + TrackingType, + TrigonModel, + ) + from atlite.cutout import Cutout + from atlite.resource import CSPConfig, PanelConfig, TurbineConfig -def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: +def _aggregate_time( + da: xr.DataArray, method: Literal["sum", "mean"] | None +) -> xr.DataArray: if method == "sum": return da.sum("time", keep_attrs=True) - elif method == "mean": + if method == "mean": return da.mean("time", keep_attrs=True) return da def convert_and_aggregate( - cutout, - convert_func, - matrix=None, - index=None, - layout=None, - shapes=None, - shapes_crs=4326, - per_unit=False, - return_capacity=False, + cutout: Cutout, + convert_func: Callable[..., Any], + matrix: Any = None, + index: Any = None, + layout: Any = None, + shapes: Any = None, + shapes_crs: int = 4326, + per_unit: bool = False, + return_capacity: bool = False, aggregate_time: Literal["sum", "mean", "legacy"] | None = "legacy", - capacity_factor=False, - capacity_factor_timeseries=False, - show_progress=False, - dask_kwargs={}, - **convert_kwds, -): + capacity_factor: bool = False, + capacity_factor_timeseries: bool = False, + show_progress: bool = False, + dask_kwargs: dict[str, Any] | None = None, + **convert_kwds: Any, +) -> Any: """ Convert and aggregate a weather-based renewable generation time-series. @@ -81,6 +95,10 @@ def convert_and_aggregate( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. + convert_func : callable + Callback like convert_wind, convert_pv. matrix : N x S - xr.DataArray or sp.sparse.csr_matrix or None If given, it is used to aggregate the grid cells to buses. N is the number of buses, S the number of spatial coordinates, in the @@ -117,11 +135,8 @@ def convert_and_aggregate( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to ``dask.compute``. - - Other Parameters - ---------------- - convert_func : Function - Callback like convert_wind, convert_pv + **convert_kwds : Any + Additional keyword arguments passed to ``convert_func``. Returns ------- @@ -150,6 +165,11 @@ def convert_and_aggregate( The installed units per bus in MW corresponding to ``layout`` (only if ``return_capacity`` is True). + Raises + ------ + ValueError + If deprecated parameters conflict or invalid arguments are provided. + See Also -------- wind : Generate wind generation time-series. @@ -193,9 +213,10 @@ def convert_and_aggregate( aggregate_time = None func_name = convert_func.__name__.replace("convert_", "") - logger.info(f"Convert and aggregate '{func_name}'.") + logger.info("Convert and aggregate '%s'.", func_name) da = convert_func(cutout.data, **convert_kwds) + dask_kwargs = dask_kwargs or {} no_args = all(v is None for v in [layout, shapes, matrix]) if no_args: @@ -269,13 +290,28 @@ def convert_and_aggregate( if return_capacity: return maybe_progressbar(results, show_progress, **dask_kwargs), capacity - else: - return maybe_progressbar(results, show_progress, **dask_kwargs) + return maybe_progressbar(results, show_progress, **dask_kwargs) -def maybe_progressbar(ds, show_progress, **kwargs): +def maybe_progressbar( + ds: Dataset | DataArray, show_progress: bool, **kwargs: Any +) -> Dataset | DataArray: """ - Load a xr.dataset with dask arrays either with or without progressbar. + Load a dataset or data array, optionally showing a dask progress bar. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Object backed by dask arrays. + show_progress : bool + Whether to display a progress bar while loading. + **kwargs + Keyword arguments passed to ``load``. + + Returns + ------- + xr.Dataset or xr.DataArray + Loaded object. """ if show_progress: with ProgressBar(minimum=2): @@ -286,24 +322,63 @@ def maybe_progressbar(ds, show_progress, **kwargs): # temperature -def convert_temperature(ds): +def convert_temperature(ds: Dataset) -> DataArray: """ - Return outside temperature (useful for e.g. heat pump T-dependent - coefficient of performance). + Convert ambient air temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + + Returns + ------- + xr.DataArray + Ambient temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["temperature"] - 273.15 -def temperature(cutout, **params): +def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return ambient air temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Ambient temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) # soil temperature -def convert_soil_temperature(ds): +def convert_soil_temperature(ds: Dataset) -> DataArray: """ - Return soil temperature (useful for e.g. heat pump T-dependent coefficient - of performance). + Convert soil temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``soil temperature``. + + Returns + ------- + xr.DataArray + Soil temperature in degrees Celsius with missing values filled by zero. """ # Temperature is in Kelvin @@ -313,26 +388,108 @@ def convert_soil_temperature(ds): return (ds["soil temperature"] - 273.15).fillna(0.0) -def soil_temperature(cutout, **params): +def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return soil temperature converted from Kelvin to degrees Celsius. + + Sea grid cells, where soil temperature is undefined, are filled with 0.0 + so they do not contribute during spatial aggregation. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Soil temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) # dewpoint temperature -def convert_dewpoint_temperature(ds): +def convert_dewpoint_temperature(ds: Dataset) -> DataArray: """ - Return dewpoint temperature. + Convert dew point temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``dewpoint temperature``. + + Returns + ------- + xr.DataArray + Dew point temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["dewpoint temperature"] - 273.15 -def dewpoint_temperature(cutout, **params): +def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return dew point temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Dew point temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate( convert_func=convert_dewpoint_temperature, **params ) -def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): +def convert_coefficient_of_performance( + ds: Dataset, + source: HeatPumpSource, + sink_T: float, + c0: float | None, + c1: float | None, + c2: float | None, +) -> DataArray: + """ + Convert source temperatures to heat pump COP values. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing the required temperature variables. + source : {"air", "soil"} + Heat source used for the heat pump. + sink_T : float + Sink temperature in degrees Celsius. + c0, c1, c2 : float or None + Quadratic regression coefficients. If ``None``, source-specific + defaults are used. + + Returns + ------- + xr.DataArray + Coefficient of performance for each time step and grid cell. + """ assert source in ["air", "soil"], NotImplementedError( "'source' must be one of ['air', 'soil']" ) @@ -360,29 +517,57 @@ def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): def coefficient_of_performance( - cutout, source="air", sink_T=55.0, c0=None, c1=None, c2=None, **params -): + cutout: Cutout, + source: HeatPumpSource = "air", + sink_T: float = 55.0, + c0: float | None = None, + c1: float | None = None, + c2: float | None = None, + **params: Any, +) -> DataArray | NumericArray: """ - Convert ambient or soil temperature to coefficient of performance (COP) of - air- or ground-sourced heat pumps. The COP is a function of temperature - difference from source to sink. The defaults for either source (c0, c1, c2) - are based on a quadratic regression in [1]. - - Paramterers - ----------- - source : str - The heat source. Can be 'air' or 'soil'. - sink_T : float - The temperature of the heat sink. - c0 : float - The constant regression coefficient for the temperature difference. - c1 : float - The linear regression coefficient for the temperature difference. - c2 : float - The quadratic regression coefficient for the temperature difference. - - Reference - --------- + Convert temperature to heat pump coefficient of performance (COP). + + The COP is modelled as a quadratic function of the temperature difference + ``dT = sink_T - source_T``: ``COP = c0 + c1 * dT + c2 * dT**2``. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + source : {"air", "soil"} + Heat source type. Default coefficients per source: + + - ``"air"``: ``c0=6.81, c1=-0.121, c2=0.000630`` + - ``"soil"``: ``c0=8.77, c1=-0.150, c2=0.000734`` + sink_T : float, default 55.0 + Heat sink temperature in °C. + c0 : float or None + Constant regression coefficient. If ``None``, uses source default. + c1 : float or None + Linear regression coefficient. If ``None``, uses source default. + c2 : float or None + Quadratic regression coefficient. If ``None``, uses source default. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Coefficient of performance time-series (dimensionless). + + See Also + -------- + heat_demand : Compute heating degree-day demand. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + + References + ---------- [1] Staffell, Brett, Brandon, Hawkes, A review of domestic heat pumps, Energy & Environmental Science (2012), 5, 9291-9306, https://doi.org/10.1039/C2EE22653G. @@ -399,7 +584,39 @@ def coefficient_of_performance( # heat demand -def convert_heat_demand(ds, threshold, a, constant, hour_shift): +def convert_heat_demand( + ds: Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> DataArray: + """ + Convert ambient temperature to daily heat demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Heating threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily heat demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -415,48 +632,63 @@ def convert_heat_demand(ds, threshold, a, constant, hour_shift): return (constant + heat_demand).rename("heat_demand") -def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **params): +def heat_demand( + cutout: Cutout, + threshold: float = 15.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> DataArray | NumericArray: """ - Convert outside temperature into daily heat demand using the degree-day - approximation. - - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. - - E.g. for Moscow in winter, hour_shift = 4, for New York in winter, - hour_shift = -5 + Convert outside temperature into daily heat demand using degree-day approximation. - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in winter ``hour_shift=4``, for New York ``hour_shift=-5``. + The shift applies uniformly across all grid cells and times. Parameters ---------- - threshold : float - Outside temperature in degrees Celsius above which there is no - heat demand. - a : float + cutout : atlite.Cutout + The cutout to process. + threshold : float, default 15.0 + Outside temperature in °C above which there is no heat demand. + a : float, default 1.0 Linear factor relating heat demand to outside temperature. - constant : float - Constant part of heat demand that does not depend on outside - temperature (e.g. due to water heating). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of heat demand independent of outside temperature + (e.g. water heating). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Daily heat demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + cooling_demand : Degree-day cooling demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_heat_demand, @@ -469,7 +701,39 @@ def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **p # cooling demand -def convert_cooling_demand(ds, threshold, a, constant, hour_shift): +def convert_cooling_demand( + ds: Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> DataArray: + """ + Convert ambient temperature to daily cooling demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Cooling threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily cooling demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -486,52 +750,64 @@ def convert_cooling_demand(ds, threshold, a, constant, hour_shift): def cooling_demand( - cutout, threshold=23.0, a=1.0, constant=0.0, hour_shift=0.0, **params -): + cutout: Cutout, + threshold: float = 23.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> DataArray | NumericArray: """ - Convert outside temperature into daily cooling demand using the degree-day - approximation. - - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. - - E.g. for Moscow in summer, hour_shift = 3, for New York in summer, - hour_shift = -4 + Convert outside temperature into daily cooling demand using degree-day approximation. - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in summer ``hour_shift=3``, for New York ``hour_shift=-4``. + The shift applies uniformly across all grid cells and times. Parameters ---------- - threshold : float - Outside temperature in degrees Celsius below which there is no - cooling demand. The default 23C is taken as a more liberal - estimation following European computational practices - (e.g. UK Met Office and European commission take as thresholds - 22C and 24C, respectively) - a : float + cutout : atlite.Cutout + The cutout to process. + threshold : float, default 23.0 + Outside temperature in °C below which there is no cooling demand. + The default follows European computational practices (UK Met Office + uses 22 °C, European Commission uses 24 °C). + a : float, default 1.0 Linear factor relating cooling demand to outside temperature. - constant : float - Constant part of cooling demand that does not depend on outside - temperature (e.g. due to ventilation). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of cooling demand independent of outside temperature + (e.g. ventilation). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Daily cooling demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + heat_demand : Degree-day heating demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_cooling_demand, @@ -545,8 +821,42 @@ def cooling_demand( # solar thermal collectors def convert_solar_thermal( - ds, orientation, trigon_model, clearsky_model, c0, c1, t_store -): + ds: Dataset, + orientation: Callable, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, + c0: float, + c1: float, + t_store: float, +) -> DataArray: + """ + Convert weather data to solar thermal collector output. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + orientation : callable + Surface orientation callback. + trigon_model : str + Trigonometric irradiation model. + clearsky_model : str or None + Clear-sky model used for diffuse irradiation. + c0, c1 : float + Collector efficiency parameters. + t_store : float + Storage temperature in degrees Celsius. + + Returns + ------- + xr.DataArray + Specific solar thermal output in W/m². + + Notes + ----- + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G`` where + *G* is the tilted irradiation. Output is ``max(0, G * eta)``. + """ # convert storage temperature to Kelvin in line with reanalysis data t_store += 273.15 @@ -570,48 +880,68 @@ def convert_solar_thermal( def solar_thermal( - cutout, - orientation={"slope": 45.0, "azimuth": 180.0}, - trigon_model="simple", - clearsky_model="simple", - c0=0.8, - c1=3.0, - t_store=80.0, - **params, -): + cutout: Cutout, + orientation: OrientationName | dict[str, float] | Callable | None = None, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel = "simple", + c0: float = 0.8, + c1: float = 3.0, + t_store: float = 80.0, + **params: Any, +) -> DataArray | NumericArray: """ - Convert downward short-wave radiation flux and outside temperature into - time series for solar thermal collectors. + Convert radiation and temperature into solar thermal collector time series. - Mathematical model and defaults for c0, c1 based on model in [1]. + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G``. + Mathematical model and defaults for *c0*, *c1* based on [1]. Parameters ---------- - cutout : cutout - orientation : dict or str or function - Panel orientation with slope and azimuth (units of degrees), or - 'latitude_optimal'. - trigon_model : str - Type of trigonometry model - clearsky_model : str or None - Type of clearsky model for diffuse irradiation. Either - 'simple' or 'enhanced'. - c0, c1 : float - Parameters for model in [1] (defaults to 0.8 and 3., respectively) - t_store : float - Store temperature in degree Celsius + cutout : atlite.Cutout + The cutout to process. + orientation : dict, str, or callable, optional + Panel orientation. A dict with ``'slope'`` and ``'azimuth'`` keys + in degrees, the string ``'latitude_optimal'``, or a callable with + the same signature as callbacks from + ``atlite.pv.orientation.make_*``. Default: ``{'slope': 45.0, + 'azimuth': 180.0}``. + trigon_model : {"simple", "perez"}, default "simple" + Trigonometric model for tilted irradiation decomposition. + clearsky_model : {"simple", "enhanced"} or None, default "simple" + Clear-sky model for diffuse irradiation. ``'enhanced'`` also uses + ambient temperature and relative humidity. + c0 : float, default 0.8 + Optical efficiency parameter. + c1 : float, default 3.0 + Thermal loss coefficient in W/(m² K). + t_store : float, default 80.0 + Storage temperature in °C. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Solar thermal generation time-series. + + See Also + -------- + pv : Photovoltaic generation. + irradiation : Tilted surface irradiation. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] Henning and Palzer, Renewable and Sustainable Energy Reviews 30 (2014) 1003-1018 - """ + if orientation is None: + orientation = {"slope": 45.0, "azimuth": 180.0} if not callable(orientation): orientation = get_orientation(orientation) @@ -634,7 +964,21 @@ def convert_wind( interpolation_method: Literal["logarithmic", "power"], ) -> xr.DataArray: """ - Convert wind speeds for turbine to wind energy generation. + Convert wind speeds to turbine-specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing wind speed data. + turbine : TurbineConfig + Turbine configuration with power curve and hub height. + interpolation_method : {"logarithmic", "power"} + Method used to extrapolate wind speed to hub height. + + Returns + ------- + xr.DataArray + Wind power output as specific yield per unit of installed capacity. """ V, POW, hub_height, P = itemgetter("V", "POW", "hub_height", "P")(turbine) @@ -655,17 +999,16 @@ def apply_power_curve(da): ) da.attrs["units"] = "MWh/MWp" - da = da.rename("specific generation") - return da + return da.rename("specific generation") def wind( - cutout, - turbine: str | Path | dict, - smooth: bool | dict = False, + cutout: Cutout, + turbine: str | Path | dict[str, Any], + smooth: bool | dict[str, Any] = False, add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", - **params, + **params: Any, ) -> xr.DataArray: """ Generate wind generation time-series. @@ -675,6 +1018,8 @@ def wind( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. turbine : str or dict A turbineconfig dictionary with the keys 'hub_height' for the hub height and 'V', 'POW' defining the power curve. @@ -695,17 +1040,29 @@ def wind( interpolation_method : {"logarithmic", "power"} Law to interpolate wind speed to turbine hub height. Refer to :py:func:`atlite.wind.extrapolate_wind_speed`. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- resource : xr.DataArray - Wind generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. + Wind generation time-series. Without aggregation, values are capacity + factors (MWh/MWp). With aggregation and ``per_unit=False``, values are + in MW. See :py:func:`convert_and_aggregate` for details. + + See Also + -------- + pv : Photovoltaic generation. Note ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + + References + ---------- + .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 + 1074 – 1088. doi:10.1016/j.energy.2015.09.071 Examples -------- @@ -722,20 +1079,17 @@ def wind( ('time', 'y', 'x') >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") - References - ---------- - .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 - 1074 – 1088. doi:10.1016/j.energy.2015.09.071 - """ - turbine = get_windturbineconfig(turbine, add_cutout_windspeed=add_cutout_windspeed) + turbine_config = get_windturbineconfig( + turbine, add_cutout_windspeed=add_cutout_windspeed + ) if smooth: - turbine = windturbine_smooth(turbine, params=smooth) + turbine_config = windturbine_smooth(turbine_config, params=smooth) return cutout.convert_and_aggregate( convert_func=convert_wind, - turbine=turbine, + turbine=turbine_config, interpolation_method=interpolation_method, **params, ) @@ -743,16 +1097,39 @@ def wind( # irradiation def convert_irradiation( - ds, - orientation, - tracking=None, - irradiation="total", - trigon_model="simple", - clearsky_model="simple", -): + ds: Dataset, + orientation: Callable, + tracking: TrackingType = None, + irradiation: IrradiationType = "total", + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> DataArray: + """ + Convert weather data to irradiation on a tilted surface. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and meteorological variables. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"}, optional + Tracking mode of the surface. + irradiation : {"total", "direct", "diffuse", "ground"}, default "total" + Irradiation component to return. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + Tilted surface irradiation in W/m². + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) - irradiation = TiltedIrradiation( + return TiltedIrradiation( ds, solar_position, surface_orientation, @@ -761,23 +1138,23 @@ def convert_irradiation( tracking=tracking, irradiation=irradiation, ) - return irradiation def irradiation( - cutout, - orientation, - irradiation="total", - tracking=None, - clearsky_model=None, - **params, -): + cutout: Cutout, + orientation: OrientationName | dict[str, float] | Callable, + irradiation: IrradiationType = "total", + tracking: TrackingType = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> DataArray | NumericArray: """ - Calculate the total, direct, diffuse, or ground irradiation on a tilted - surface. + Calculate irradiation on a tilted surface. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -802,23 +1179,32 @@ def irradiation( model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- irradiation : xr.DataArray - The desired irradiation quantity on the tilted surface. Defaults to - "total". + Irradiation on the tilted surface in W/m². + + See Also + -------- + pv : Photovoltaic generation. + solar_thermal : Solar thermal collector output. + + Notes + ----- + The ``trigon_model`` is fixed to ``'simple'`` internally. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] D.T. Reindl, W.A. Beckman, and J.A. Duffie. Diffuse fraction correla- tions. Solar Energy, 45(1):1 – 7, 1990. - """ if not callable(orientation): orientation = get_orientation(orientation) @@ -835,8 +1221,36 @@ def irradiation( # solar PV def convert_pv( - ds, panel, orientation, tracking, trigon_model="simple", clearsky_model="simple" -): + ds: Dataset, + panel: dict[str, Any], + orientation: Callable, + tracking: TrackingType, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> DataArray: + """ + Convert weather data to photovoltaic specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + panel : dict + Solar panel configuration. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"} + Tracking mode of the panel. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + PV power output as capacity factors (unitless, 0–1). + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) irradiation = TiltedIrradiation( @@ -847,21 +1261,28 @@ def convert_pv( clearsky_model=clearsky_model, tracking=tracking, ) - solar_panel = SolarPanelModel(ds, irradiation, panel) - return solar_panel + return SolarPanelModel(ds, irradiation, panel) -def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params): +def pv( + cutout: Cutout, + panel: str | PanelConfig, + orientation: OrientationName | dict[str, float] | Callable, + tracking: TrackingType = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> DataArray | NumericArray: """ - Convert downward-shortwave, upward-shortwave radiation flux and ambient - temperature into a pv generation time-series. + Convert radiation and temperature into PV generation time series. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. panel : str or dict Panel config dictionary with the parameters for the electrical - model in [3]. Alternatively, name of yaml file stored in - atlite.config.solarpanel_dir. + model in [3]. Alternatively, a name accepted by + :py:func:`atlite.resource.get_solarpanelconfig`. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -879,30 +1300,26 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- pv : xr.DataArray - PV generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. + PV generation time-series. Without aggregation, values are capacity + factors (unitless, 0–1). With aggregation and ``per_unit=False``, + values are in MW. See :py:func:`convert_and_aggregate` for details. - Note - ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. - - Examples + See Also -------- - Aggregate PV generation to bus regions: + wind : Wind generation. + irradiation : Tilted surface irradiation. + solar_thermal : Solar thermal collector output. - >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... matrix=matrix, index=buses, per_unit=True) - - Get per-cell capacity factor time series (no aggregation): - - >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... aggregate_time=None) - >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -917,6 +1334,19 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) the Performance Check of Grid Connected Systems, Freiburg, June 2004. Eurosun (ISES Europe Solar Congress). + + Examples + -------- + Aggregate PV generation to bus regions: + + >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... matrix=matrix, index=buses, per_unit=True) + + Get per-cell capacity factor time series (no aggregation): + + >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... aggregate_time=None) + >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") """ if isinstance(panel, (str | Path)): panel = get_solarpanelconfig(panel) @@ -935,6 +1365,26 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) # solar CSP def convert_csp(ds, installation): + """ + Convert direct solar radiation to CSP specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing direct radiation variables. + installation : dict + CSP installation configuration. + + Returns + ------- + xr.DataArray + CSP output as specific yield (kWh/kW_ref), clipped to [0, 1]. + + Raises + ------ + ValueError + If the CSP technology option is not recognized. + """ solar_position = SolarPosition(ds) tech = installation["technology"] @@ -963,39 +1413,46 @@ def convert_csp(ds, installation): da = da.fillna(0.0) da.attrs["units"] = "kWh/kW_ref" - da = da.rename("specific generation") - - return da + return da.rename("specific generation") -def csp(cutout, installation, technology=None, **params): +def csp( + cutout: Cutout, + installation: str | CSPConfig, + technology: Literal["parabolic trough", "solar tower"] | None = None, + **params: Any, +) -> DataArray | NumericArray: """ - Convert downward shortwave direct radiation into a csp generation time- - series. + Convert direct radiation into CSP generation time series. Parameters ---------- - installation: str or xr.DataArray - CSP installation details determining the solar field efficiency dependent on - the local solar position. Can be either the name of one of the standard - installations provided through `atlite.cspinstallationsPanel` or an - xarray.DataArray with 'azimuth' (in rad) and 'altitude' (in rad) coordinates - and an 'efficiency' (in p.u.) entry. - technology: str - Overwrite CSP technology from the installation configuration. The technology - affects which direct radiation is considered. Either 'parabolic trough' (DHI) - or 'solar tower' (DNI). + cutout : atlite.Cutout + The cutout to process. + installation : str or xr.DataArray + CSP installation details determining the solar field efficiency + dependent on the local solar position. Can be a name accepted by + :py:func:`atlite.resource.get_cspinstallationconfig` or an + ``xr.DataArray`` with ``'azimuth'`` (rad) and ``'altitude'`` (rad) + coordinates and an ``'efficiency'`` (p.u.) entry. + technology : {"parabolic trough", "solar tower"} or None + Overwrite CSP technology from the installation configuration. + ``'parabolic trough'`` uses direct horizontal irradiance (DHI), + ``'solar tower'`` uses direct normal irradiance (DNI). + **params + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- csp : xr.DataArray - Time-series or capacity factors based on additional general - conversion arguments. + CSP generation time-series in specific yield (kWh/kW_ref), clipped + to [0, 1]. See :py:func:`convert_and_aggregate` for details on + aggregation behaviour. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -1023,6 +1480,21 @@ def csp(cutout, installation, technology=None, **params): # hydro def convert_runoff(ds, weight_with_height=True): + """ + Convert runoff data, optionally weighting by surface height. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``runoff`` and, if needed, ``height``. + weight_with_height : bool, default True + Whether to weight runoff by terrain height. + + Returns + ------- + xr.DataArray + Runoff field, optionally weighted by surface height. + """ runoff = ds["runoff"] if weight_with_height: @@ -1038,12 +1510,47 @@ def runoff( normalize_using_yearly=None, **params, ): + """ + Compute aggregated surface runoff with optional smoothing and normalization. + + Parameters + ---------- + cutout : atlite.Cutout + Cutout providing weather data with runoff variables. + smooth : bool or int, optional + If ``True``, apply a rolling mean with the default window of + ``24 * 7`` time steps. If an integer, use it as the rolling window + size. Default ``None`` (no smoothing). + lower_threshold_quantile : bool or float, optional + If ``True``, use the default quantile ``5e-3``. If a float, set + values below that quantile to zero. Default ``None`` (no + thresholding). + normalize_using_yearly : pd.Series, optional + Annual country totals used to scale ``countries``-indexed results over + overlapping full years. One factor per country is derived from the + summed reference values across the overlap. + **params + Additional keyword arguments passed to ``convert_and_aggregate()``, + including ``weight_with_height`` for the underlying runoff + conversion. + + Returns + ------- + xr.DataArray or tuple[xr.DataArray, xr.DataArray] + Runoff output from ``convert_and_aggregate``. Smoothing also supports + the tuple return form used with ``return_capacity=True``. Thresholding + and normalization are only supported for ``xr.DataArray`` results. + + See Also + -------- + convert_and_aggregate : General conversion/aggregation arguments. + """ result = cutout.convert_and_aggregate(convert_func=convert_runoff, **params) if smooth is not None: if smooth is True: smooth = 24 * 7 - if "return_capacity" in params.keys(): + if "return_capacity" in params: result = result[0].rolling(time=smooth, min_periods=1).mean(), result[1] else: result = result.rolling(time=smooth, min_periods=1).mean() @@ -1064,7 +1571,8 @@ def runoff( normalize_using_yearly_i = normalize_using_yearly_i.astype(int) years = ( - pd.Series(pd.to_datetime(result.coords["time"].values).year) + pd + .Series(pd.to_datetime(result.coords["time"].values).year) .value_counts() .loc[lambda x: x > 8700] .index.intersection(normalize_using_yearly_i) @@ -1091,11 +1599,12 @@ def hydro( **kwargs, ): """ - Compute inflow time-series for `plants` by aggregating over catchment - basins from `hydrobasins` + Compute inflow time series for plants by aggregating over catchment basins. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. plants : pd.DataFrame Run-of-river plants or dams with lon, lat columns. hydrobasins : str|gpd.GeoDataFrame @@ -1108,6 +1617,13 @@ def hydro( better for coarser resolution). show_progress : bool Whether to display progressbars. + **kwargs + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Inflow time-series for each plant. References ---------- @@ -1139,7 +1655,7 @@ def hydro( # The hydrological parameters are in units of "m of water per day" and so # they should be multiplied by 1000 and the basin area to convert to m3 # d-1 = m3 h-1 / 24 - runoff *= xr.DataArray(basins.shapes.to_crs(dict(proj="cea")).area) + runoff *= xr.DataArray(basins.shapes.to_crs({"proj": "cea"}).area) return hydrom.shift_and_aggregate_runoff_for_plants( basins, runoff, flowspeed, show_progress @@ -1150,45 +1666,33 @@ def convert_line_rating( ds, psi, R, D=0.028, Ts=373, epsilon=0.6, alpha=0.6, per_unit=False ): """ - Convert the cutout data to dynamic line rating time series. - - The formulation is based on: - - [1]“IEEE Std 738™-2012 (Revision of IEEE Std 738-2006/Incorporates IEEE Std - 738-2012/Cor 1-2013), IEEE Standard for Calculating the Current-Temperature - Relationship of Bare Overhead Conductors,” p. 72. - - The following simplifications/assumptions were made: - 1. Wind speed are taken at height 100 meters above ground. However, ironmen - and transmission lines are typically at 50-60 meters. - 2. Solar heat influx is set proportionally to solar short wave influx. - 3. The incidence angle of the solar heat influx is assumed to be 90 degree. - + Convert weather data to dynamic line rating time series. Parameters ---------- ds : xr.Dataset - Subset of the cutout data including all weather cells overlapping with - the line. - psi : int/float - Azimuth angle of the line in degree, that is the incidence angle of the line - with a pointer directing north (90 is east, 180 is south, 270 is west). + Dataset for the cells intersecting a line. + psi : float + Line azimuth in degrees clockwise from north. R : float - Resistance of the conductor in [Ω/m] at maximally allowed temperature Ts. - D : float, - Conductor diameter. - Ts : float - Maximally allowed surface temperature (typically 100°C). - epsilon : float - Conductor emissivity. - alpha : float - Conductor absorptivity. + Conductor resistance in Ω/m at temperature *Ts*. + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum conductor surface temperature in Kelvin. + epsilon : float, default 0.6 + Conductor emissivity (dimensionless). + alpha : float, default 0.6 + Conductor absorptivity (dimensionless). + per_unit : bool, default False + Unused compatibility parameter. Returns ------- - Imax - xr.DataArray giving the maximal current capacity per timestep in Ampere. - + xr.DataArray or numpy.ndarray + Maximum current per time step in ampere. When *ds* is an + ``xr.Dataset`` the result is aggregated across intersecting cells + via ``.min("spatial")``. """ Ta = ds["temperature"] Tfilm = (Ta + Ts) / 2 @@ -1235,7 +1739,7 @@ def convert_line_rating( A = D * 1 # projected area of conductor in square meters if isinstance(ds, dict): - Position = namedtuple("solarposition", ["altitude", "azimuth"]) + Position = namedtuple("Position", ["altitude", "azimuth"]) solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) else: solar_position = SolarPosition(ds) @@ -1250,7 +1754,7 @@ def convert_line_rating( def line_rating( - cutout, shapes, line_resistance, show_progress=False, dask_kwargs={}, **params + cutout, shapes, line_resistance, show_progress=False, dask_kwargs=None, **params ): """ Create a dynamic line rating time series based on the IEEE-738 standard. @@ -1271,6 +1775,7 @@ def line_rating( Parameters ---------- cutout : atlite.Cutout + The cutout to process. shapes : geopandas.GeoSeries Line shapes of the lines. line_resistance : float/series @@ -1280,17 +1785,28 @@ def line_rating( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to `dask.compute`. - params : keyword arguments as float/series - Arguments to tweak/modify the line rating calculations based on [1]. - Defaults are: - * D : 0.028 (conductor diameter) - * Ts : 373 (maximally allowed surface temperature) - * epsilon : 0.6 (conductor emissivity) - * alpha : 0.6 (conductor absorptivity) + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum allowed conductor surface temperature in Kelvin. + epsilon : float, default 0.6 + Conductor emissivity (dimensionless). + alpha : float, default 0.6 + Conductor absorptivity (dimensionless). + **params : Any + Additional keyword arguments passed to + :py:func:`convert_line_rating`. Returns ------- - Current thermal limit timeseries with dimensions time x lines in Ampere. + xr.DataArray + Thermal current limit time-series with dimensions + ``(time, lines)`` in ampere. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. Example ------- @@ -1319,6 +1835,8 @@ def line_rating( >>> s = np.sqrt(3) * i * v / 1e3 # in MW """ + if dask_kwargs is None: + dask_kwargs = {} if not isinstance(shapes, gpd.GeoSeries): shapes = gpd.GeoSeries(shapes).rename_axis("dim_0") @@ -1328,13 +1846,26 @@ def line_rating( data = cutout.data.stack(spatial=["y", "x"]) def get_azimuth(shape): + """ + Return the line azimuth in degrees from its end points. + + Parameters + ---------- + shape : shapely.geometry.base.BaseGeometry + Line geometry. + + Returns + ------- + float + Azimuth angle in degrees computed from the line end points. + """ coords = np.array(shape.coords) start = coords[0] end = coords[-1] - return np.arctan2(start[0] - end[0], start[1] - end[1]) + return np.degrees(np.arctan2(start[0] - end[0], start[1] - end[1])) azimuth = shapes.apply(get_azimuth) - azimuth = azimuth.where(azimuth >= 0, azimuth + np.pi) + azimuth = azimuth.where(azimuth >= 0, azimuth + 180.0) params.setdefault("D", 0.028) params.setdefault("Ts", 373) diff --git a/atlite/csp.py b/atlite/csp.py index df23a0ad..bfd678d1 100644 --- a/atlite/csp.py +++ b/atlite/csp.py @@ -1,21 +1,36 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with csp data generation. -""" +"""Functions for use in conjunction with csp data generation.""" + +from __future__ import annotations import logging +from typing import TYPE_CHECKING, Literal, TypeAlias import numpy as np +import xarray as xr from dask.array import radians, sin from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from dask.array import Array + logger = logging.getLogger(__name__) +NDArray: TypeAlias = np.ndarray +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset +CSPTechnology = Literal["parabolic trough", "solar tower"] +FieldOrientation = Literal["horizontal", "tilted", "single-axis", "two-axis"] -def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): + +def calculate_dni( + ds: Dataset, + solar_position: Dataset | None = None, + altitude_threshold: float = 3.75, +) -> DataArray: """ Calculate DNI on a perpendicular plane. @@ -24,35 +39,34 @@ def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): Parameters ---------- - ds : xarray.Dataset + ds : xr.Dataset Dataset containing the direct influx (influx_direct) into a horizontal plane. - solar_position : xarray.Dataset (optional) - solar_position containing a solar 'altitude' (in rad, 0 to pi/2) for the 'ds' dataset. - Is calculated using atlite.pv.SolarPosition if omitted. - altitude_threshold : float (default: 3.75 degrees) + solar_position : xr.Dataset | None + Dataset containing solar altitude (in rad, 0 to pi/2) for the input dataset. + Calculated using atlite.pv.SolarPosition if not provided. + altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] - will be set to the altitude_threshold to avoid numerical issues when dividing by - the sine of very low solar altitude. - The default values '3.75 deg' corresponds to - the solar altitude traversed by the sun within about 15 minutes in a location with - maximum solar altitude of 60 deg and 10h day time. + are set to altitude_threshold to prevent numerical issues when dividing by + the sine of very low solar altitude. Default: 3.75 degrees corresponds to + approximately 15 minutes of solar movement at 60 deg maximum altitude. + + Returns + ------- + xr.DataArray + Direct Normal Irradiance (DNI) in W/m^2 on a plane perpendicular to solar rays. """ if solar_position is None: solar_position = SolarPosition(ds) - # solar altitude expected in rad, convert degrees (easier to specifcy) to match - altitude_threshold = radians(altitude_threshold) + altitude_threshold_rad: Array = radians(altitude_threshold) - # Sanitation of altitude values: - # Prevent high calculated DNI values during low solar altitudes (sunset / dawn) - # where sin() results in a very low denominator in the DNI calculation - altitude = solar_position["altitude"] + altitude: DataArray = solar_position["altitude"] altitude = altitude.where(lambda x: x > 0, np.nan) - altitude = altitude.where(lambda x: x > altitude_threshold, altitude_threshold) + altitude = altitude.where( + lambda x: x > altitude_threshold_rad, altitude_threshold_rad + ) - # Calculate DNI and remove NaNs introduced during altitude sanitation - # DNI is determined either by dividing by cos(azimuth) or sin(altitude) - dni = ds["influx_direct"] / sin(altitude) + dni: DataArray = ds["influx_direct"] / sin(altitude) return dni diff --git a/atlite/cutout.py b/atlite/cutout.py index db747869..b48342ff 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Base class for atlite. -""" +"""Base class for atlite.""" # There is a binary incompatibility between the pip wheels of netCDF4 and # rasterio, which leads to the first one to work correctly while the second @@ -14,9 +12,12 @@ # https://github.com/pydata/xarray/issues/2535, # https://github.com/rasterio/rasterio-wheels/issues/12 +from __future__ import annotations + import logging from pathlib import Path from tempfile import mktemp +from typing import TYPE_CHECKING, Any from warnings import warn import geopandas as gpd @@ -28,6 +29,22 @@ from pyproj import CRS from shapely.geometry import box +if TYPE_CHECKING: + from collections.abc import Sequence + + from atlite._types import ( + CrsLike, + DataArray, + GeoDataFrame, + Geometry, + NDArray, + Number, + PathLike, + SparseMatrix, + ) + + pass + from atlite.convert import ( coefficient_of_performance, convert_and_aggregate, @@ -66,7 +83,7 @@ class Cutout: functionalities. """ - def __init__(self, path, **cutoutparams): + def __init__(self, path: PathLike, **cutoutparams: Any) -> None: """ Provide an atlite cutout object. @@ -118,6 +135,8 @@ def __init__(self, path, **cutoutparams): data : xr.Dataset User provided cutout data. Save the cutout using `Cutout.to_file()` afterwards. + **cutoutparams + Additional keyword arguments. See Other Parameters below. Other Parameters ---------------- @@ -131,13 +150,20 @@ def __init__(self, path, **cutoutparams): Whether to interpolate NaN's in the SARAH data. This takes effect for sarah data which has missing data for areas where dawn and nightfall happens (ca. 30 min gap). - gebco_path: str + gebco_path : str Path to find the gebco NetCDF file. Only necessary when including the gebco module. parallel : bool, default False Whether to open dataset in parallel mode. Take effect for all xr.open_mfdataset usages. + Raises + ------ + TypeError + If required arguments are missing when building a new cutout. + ValueError + If ``bounds`` has an invalid format. + """ path = Path(path).with_suffix(".nc") chunks = cutoutparams.pop("chunks", {"time": 100}) @@ -155,12 +181,13 @@ def __init__(self, path, **cutoutparams): if cutoutparams: warn( f"Arguments {', '.join(cutoutparams)} are ignored, since " - "cutout is already built." + "cutout is already built.", + stacklevel=2, ) elif "data" in cutoutparams: data = cutoutparams.pop("data") else: - logger.info(f"Building new cutout {path}") + logger.info("Building new cutout %s", path) if "bounds" in cutoutparams: bounds = cutoutparams.pop("bounds") @@ -202,45 +229,35 @@ def __init__(self, path, **cutoutparams): # Check compatibility of CRS modules = atleast_1d(data.attrs.get("module")) - crs = set(CRS(datamodules[m].crs) for m in modules) + crs = {CRS(datamodules[m].crs) for m in modules} assert len(crs) == 1, f"CRS of {module} not compatible" self.path = path self.data = data @property - def name(self): - """ - Name of the cutout. - """ + def name(self) -> str: + """Name of the cutout.""" return self.path.stem @property - def module(self): - """ - Data module of the cutout. - """ - return self.data.attrs.get("module") + def module(self) -> str | list[str]: + """Data module of the cutout.""" + return self.data.attrs.get("module") # type: ignore[no-any-return] @property - def crs(self): - """ - Coordinate Reference System of the cutout. - """ + def crs(self) -> CRS: + """Coordinate Reference System of the cutout.""" return CRS(datamodules[atleast_1d(self.module)[0]].crs) @property - def available_features(self): - """ - List of available weather data features for the cutout. - """ + def available_features(self) -> pd.Index: + """List of available weather data features for the cutout.""" return available_features(self.module) @property - def chunks(self): - """ - Chunking of the cutout data used by dask. - """ + def chunks(self) -> dict[str, int] | None: + """Chunking of the cutout data used by dask.""" chunks = { k.lstrip("chunksize_"): v for k, v in self.data.attrs.items() @@ -249,42 +266,35 @@ def chunks(self): return None if chunks == {} else chunks @property - def coords(self): - """ - Geographic coordinates of the cutout. - """ + def coords(self) -> xr.Coordinates: + """Geographic coordinates of the cutout.""" return self.data.coords @property - def shape(self): - """ - Size of spatial dimensions (y, x) of the cutout data. - """ + def shape(self) -> tuple[int, int]: + """Size of spatial dimensions (y, x) of the cutout data.""" return len(self.coords["y"]), len(self.coords["x"]) @property - def extent(self): - """ - Total extent of the area covered by the cutout (x, X, y, Y). - """ + def extent(self) -> NDArray: + """Total extent of the area covered by the cutout (x, X, y, Y).""" xs, ys = self.coords["x"].values, self.coords["y"].values dx, dy = self.dx, self.dy - return np.array( - [xs[0] - dx / 2, xs[-1] + dx / 2, ys[0] - dy / 2, ys[-1] + dy / 2] - ) + return np.array([ + xs[0] - dx / 2, + xs[-1] + dx / 2, + ys[0] - dy / 2, + ys[-1] + dy / 2, + ]) @property - def bounds(self): - """ - Total bounds of the area covered by the cutout (x, y, X, Y). - """ + def bounds(self) -> NDArray: + """Total bounds of the area covered by the cutout (x, y, X, Y).""" return self.extent[[0, 2, 1, 3]] @property - def transform(self): - """ - Get the affine transform of the cutout. - """ + def transform(self) -> rio.Affine: + """Get the affine transform of the cutout.""" return rio.Affine( self.dx, 0, @@ -295,10 +305,8 @@ def transform(self): ) @property - def transform_r(self): - """ - Get the affine transform of the cutout with reverse y-order. - """ + def transform_r(self) -> rio.Affine: + """Get the affine transform of the cutout with reverse y-order.""" return rio.Affine( self.dx, 0, @@ -309,42 +317,32 @@ def transform_r(self): ) @property - def dx(self): - """ - Spatial resolution on the x coordinates. - """ + def dx(self) -> float: + """Spatial resolution on the x coordinates.""" x = self.coords["x"] - return round((x[-1] - x[0]).item() / (x.size - 1), 8) + return round((x[-1] - x[0]).item() / (x.size - 1), 8) # type: ignore[no-any-return] @property - def dy(self): - """ - Spatial resolution on the y coordinates. - """ + def dy(self) -> float: + """Spatial resolution on the y coordinates.""" y = self.coords["y"] - return round((y[-1] - y[0]).item() / (y.size - 1), 8) + return round((y[-1] - y[0]).item() / (y.size - 1), 8) # type: ignore[no-any-return] @property - def dt(self): - """ - Time resolution of the cutout. - """ - return pd.infer_freq(self.coords["time"].to_index()) + def dt(self) -> str | None: + """Time resolution of the cutout.""" + return pd.infer_freq(self.coords["time"].to_index()) # type: ignore[no-any-return] @property - def prepared(self): - """ - Boolean indicating whether all available features are prepared. - """ - return self.prepared_features.sort_index().equals( + def prepared(self) -> bool: + """Boolean indicating whether all available features are prepared.""" + return self.prepared_features.sort_index().equals( # type: ignore[no-any-return] self.available_features.sort_index() ) @property - def prepared_features(self): - """ - Get the list of prepared features in the cutout. - """ + def prepared_features(self) -> pd.Series[Any]: + """Get the list of prepared features in the cutout.""" index = [ (self.data[v].attrs["module"], self.data[v].attrs["feature"]) for v in self.data @@ -353,7 +351,7 @@ def prepared_features(self): return pd.Series(list(self.data), index, dtype=object) @CachedAttribute - def grid(self): + def grid(self) -> GeoDataFrame: """ Cutout grid with coordinates and geometries. @@ -375,7 +373,13 @@ def grid(self): crs=self.crs, ) - def sel(self, path=None, bounds=None, buffer=0, **kwargs): + def sel( + self, + path: PathLike | None = None, + bounds: Sequence[float] | None = None, + buffer: float = 0, + **kwargs: Any, + ) -> Cutout: """ Select parts of the cutout. @@ -407,12 +411,15 @@ def sel(self, path=None, bounds=None, buffer=0, **kwargs): if bounds is not None: if buffer > 0: bounds = box(*bounds).buffer(buffer).bounds + assert bounds is not None x1, y1, x2, y2 = bounds kwargs.update(x=slice(x1, x2), y=slice(y1, y2)) data = self.data.sel(**kwargs) return Cutout(path, data=data) - def merge(self, other, path=None, **kwargs): + def merge( + self, other: Cutout, path: PathLike | None = None, **kwargs: Any + ) -> Cutout: """ Merge two cutouts into a single cutout. @@ -450,7 +457,7 @@ def merge(self, other, path=None, **kwargs): return Cutout(path, data=data) - def to_file(self, fn=None): + def to_file(self, fn: PathLike | None = None) -> None: """ Save cutout to a NetCDF file. @@ -464,7 +471,14 @@ def to_file(self, fn=None): fn = self.path self.data.to_netcdf(fn) - def __repr__(self): + def __repr__(self) -> str: + """Return string representation of the cutout. + + Returns + ------- + str + Human-readable summary of the cutout. + """ start = np.datetime_as_string(self.coords["time"].values[0], unit="D") end = np.datetime_as_string(self.coords["time"].values[-1], unit="D") return ( @@ -489,7 +503,9 @@ def __repr__(self): ) ) - def indicatormatrix(self, shapes, shapes_crs=4326): + def indicatormatrix( + self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 + ) -> SparseMatrix: """ Compute the indicatormatrix. @@ -505,6 +521,9 @@ def indicatormatrix(self, shapes, shapes_crs=4326): Parameters ---------- shapes : Collection of shapely polygons + Shapes to compute the indicator matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -514,7 +533,9 @@ def indicatormatrix(self, shapes, shapes_crs=4326): """ return compute_indicatormatrix(self.grid, shapes, self.crs, shapes_crs) - def intersectionmatrix(self, shapes, shapes_crs=4326): + def intersectionmatrix( + self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 + ) -> SparseMatrix: """ Compute the intersectionmatrix. @@ -525,8 +546,10 @@ def intersectionmatrix(self, shapes, shapes_crs=4326): Parameters ---------- - orig : Collection of shapely polygons - dest : Collection of shapely polygons + shapes : Collection of shapely polygons + Shapes to compute the intersection matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -536,7 +559,7 @@ def intersectionmatrix(self, shapes, shapes_crs=4326): """ return compute_intersectionmatrix(self.grid, shapes, self.crs, shapes_crs) - def area(self, crs=None): + def area(self, crs: CrsLike = None) -> DataArray: """ Get the area per grid cell as a DataArray with coords (x,y). @@ -561,13 +584,20 @@ def area(self, crs=None): [self.coords["y"], self.coords["x"]], ) - def uniform_layout(self): + def uniform_layout(self) -> DataArray: """ Get a uniform capacity layout for all grid cells. + + Returns + ------- + xr.DataArray + Layout with value 1 for all grid cells. """ return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) - def uniform_density_layout(self, capacity_density, crs=None): + def uniform_density_layout( + self, capacity_density: Number, crs: CrsLike = None + ) -> DataArray: """ Get a capacity layout from a uniform capacity density. @@ -588,14 +618,18 @@ def uniform_density_layout(self, capacity_density, crs=None): """ return capacity_density * self.area(crs) - def equals(self, other): + def equals(self, other: Any) -> bool: """ - It overrides xarray.Dataset.equals and ignores the path attribute in the comparison + Compare equality with another cutout, ignoring the path attribute. + + Returns + ------- + bool + Whether the two cutouts are equal. """ if not isinstance(other, Cutout): - return NotImplemented - # Compare cutouts data attributes - return self.data.equals(other.data) + return NotImplemented # type: ignore[no-any-return] + return bool(self.data.equals(other.data)) def layout_from_capacity_list(self, data, col="Capacity"): """ @@ -632,7 +666,6 @@ def layout_from_capacity_list(self, data, col="Capacity"): >>> pv.plot() """ - x_grid = self.data.x.values y_grid = self.data.y.values diff --git a/atlite/data.py b/atlite/data.py index fece0cf6..9776b2f9 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Management of data retrieval and structure. -""" +"""Management of data retrieval and structure.""" + +from __future__ import annotations import logging import os @@ -11,41 +11,69 @@ from pathlib import Path from shutil import rmtree from tempfile import mkdtemp, mkstemp +from typing import TYPE_CHECKING, Any +import numpy as np import pandas as pd import xarray as xr -from dask import compute, delayed +from dask import compute as dask_compute +from dask import delayed from dask.diagnostics import ProgressBar from dask.utils import SerializableLock from numpy import atleast_1d from atlite.datasets import modules as datamodules +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + + from atlite._types import DataArray, DataFormat, Dataset, PathLike + from atlite.cutout import Cutout + logger = logging.getLogger(__name__) def get_features( - cutout, - module, - features, - data_format, - tmpdir=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + module: str, + features: Iterable[str], + data_format: DataFormat, + tmpdir: PathLike | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> Dataset: """ - Load the feature data for a given module. + Load feature datasets for a cutout module. + + Parameters + ---------- + cutout : atlite.Cutout + Cutout for which data is retrieved. + module : str + Name of the dataset module. + features : iterable of str + Feature names to retrieve from the module. + data_format : str + Data format requested from the dataset backend. + tmpdir : str or pathlib.Path, optional + Directory for intermediate files. + monthly_requests : bool, optional + Whether to split requests into monthly chunks. + concurrent_requests : bool, optional + Whether to issue monthly requests concurrently. - This get the data for a set of features from a module. All modules - in `atlite.datasets` are allowed. + Returns + ------- + xarray.Dataset + Merged dataset containing the requested features. """ - parameters = cutout.data.attrs - lock = SerializableLock() - datasets = [] - get_data = datamodules[module].get_data + parameters: dict[str, Any] = cutout.data.attrs + lock: SerializableLock = SerializableLock() + datasets: list[Any] = [] + get_data: Callable[..., Any] = datamodules[module].get_data for feature in features: - feature_data = delayed(get_data)( + feature_data: Any = delayed(get_data)( cutout, feature, tmpdir=tmpdir, @@ -57,23 +85,23 @@ def get_features( ) datasets.append(feature_data) - datasets = compute(*datasets) + datasets = dask_compute(*datasets) - ds = xr.merge(datasets, compat="equals") + ds: Dataset = xr.merge(datasets, compat="equals") for v in ds: - da = ds[v] + da: DataArray = ds[v] da.attrs["module"] = module - fd = datamodules[module].features.items() + fd: Iterable[tuple[str, Any]] = datamodules[module].features.items() da.attrs["feature"] = [k for k, l in fd if v in l].pop() if da.chunks is not None: - chunksizes = [c[0] for c in da.chunks] + chunksizes: list[int] = [c[0] for c in da.chunks] da.encoding["chunksizes"] = chunksizes return ds -def available_features(module=None): +def available_features(module: str | Sequence[str] | None = None) -> pd.Series[str]: """ Inspect the available features of all or a selection of modules. @@ -91,33 +119,59 @@ def available_features(module=None): obtained. """ - features = {name: m.features for name, m in datamodules.items()} - features = ( - pd.DataFrame(features) + features: dict[str, Any] = {name: m.features for name, m in datamodules.items()} + features_frame: pd.Series[Any] = ( + pd + .DataFrame(features) .unstack() .dropna() .rename_axis(index=["module", "feature"]) .rename("variables") ) if module is not None: - features = features.reindex(atleast_1d(module), level="module") - return features.explode() + features_frame = features_frame.reindex(atleast_1d(module), level="module") + return features_frame.explode() -def non_bool_dict(d): +def non_bool_dict(d: dict[str, Any]) -> dict[str, Any]: """ - Convert bool to int for netCDF4 storing. + Convert boolean dictionary values to integers. + + Parameters + ---------- + d : dict + Dictionary to convert. + + Returns + ------- + dict + Dictionary with boolean values replaced by ``0`` or ``1``. """ return {k: v if not isinstance(v, bool) else int(v) for k, v in d.items()} -def maybe_remove_tmpdir(func): - """Use this wrapper to make tempfile deletion compatible with windows machines.""" +def maybe_remove_tmpdir( + func: Callable[..., Any], +) -> Callable[..., Any]: + """ + Wrap a function to manage a temporary directory. + + Parameters + ---------- + func : callable + Function accepting a ``tmpdir`` keyword argument. + + Returns + ------- + callable + Wrapped function that creates and removes a temporary directory when + ``tmpdir`` is not provided. + """ @wraps(func) - def wrapper(*args, **kwargs): - if kwargs.get("tmpdir", None): - res = func(*args, **kwargs) + def wrapper(*args: Any, **kwargs: Any) -> Any: + if kwargs.get("tmpdir"): + res: Any = func(*args, **kwargs) else: kwargs["tmpdir"] = mkdtemp() try: @@ -131,17 +185,17 @@ def wrapper(*args, **kwargs): @maybe_remove_tmpdir def cutout_prepare( - cutout, - features=None, - tmpdir=None, - data_format="grib", - overwrite=False, - compression={"zlib": True, "complevel": 9, "shuffle": True}, - show_progress=False, - dask_kwargs=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + features: str | Sequence[str] | None = None, + tmpdir: PathLike | None = None, + data_format: DataFormat = "grib", + overwrite: bool = False, + compression: dict[str, Any] | None = None, + show_progress: bool = False, + dask_kwargs: dict[str, Any] | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> Cutout: """ Prepare all or a selection of features in a cutout. @@ -156,6 +210,7 @@ def cutout_prepare( Parameters ---------- cutout : atlite.Cutout + The cutout to process. features : str/list, optional Feature(s) to be prepared. The default slice(None) results in all available features. @@ -195,70 +250,81 @@ def cutout_prepare( Raises ------ - NotADirectoryError - The argument `tmpdir` is not a valid path. + ValueError + If ``tmpdir`` is None. + FileNotFoundError + If ``tmpdir`` does not exist. """ if dask_kwargs is None: dask_kwargs = {} + if compression is None: + compression = {"zlib": True, "complevel": 9, "shuffle": True} + if cutout.prepared and not overwrite: logger.info("Cutout already prepared.") return cutout - # ensure that the tmpdir actually exists - temp_dir_path = Path(tmpdir) + if tmpdir is None: + raise ValueError("tmpdir cannot be None") + temp_dir_path: Path = Path(tmpdir) if not temp_dir_path.is_dir(): raise FileNotFoundError(f"The tmpdir: {temp_dir_path} does not exist.") - logger.info(f"Storing temporary files in {tmpdir}") + logger.info("Storing temporary files in %s", tmpdir) - modules = atleast_1d(cutout.module) - features = atleast_1d(features) if features else slice(None) - prepared = set(atleast_1d(cutout.data.attrs["prepared_features"])) + modules_array: np.ndarray[Any, np.dtype[Any]] = atleast_1d(cutout.module) + modules_list: list[str] = modules_array.tolist() + features_normalized: np.ndarray[Any, np.dtype[Any]] | slice = ( + atleast_1d(features) if features else slice(None) + ) + prepared: set[str] = set(atleast_1d(cutout.data.attrs["prepared_features"])) - # target is series of all available variables for given module and features - target = available_features(modules).loc[:, features].drop_duplicates() + target: pd.Series[str] = ( + available_features(modules_list).loc[:, features_normalized].drop_duplicates() + ) for module in target.index.unique("module"): - missing_vars = target[module] + missing_vars: pd.Series[str] = target[module] if not overwrite: missing_vars = missing_vars[lambda v: ~v.isin(cutout.data)] if missing_vars.empty: continue - logger.info(f"Calculating and writing with module {module}:") - missing_features = missing_vars.index.unique("feature") - ds = get_features( + logger.info("Calculating and writing with module %s:", module) + missing_features: np.ndarray[Any, np.dtype[Any]] = missing_vars.index.unique( + "feature" + ) + ds: Dataset = get_features( cutout, module, missing_features, - tmpdir=tmpdir, data_format=data_format, + tmpdir=tmpdir, monthly_requests=monthly_requests, concurrent_requests=concurrent_requests, ) prepared |= set(missing_features) - cutout.data.attrs.update(dict(prepared_features=list(prepared))) - attrs = non_bool_dict(cutout.data.attrs) + cutout.data.attrs.update({"prepared_features": list(prepared)}) + attrs: dict[str, Any] = non_bool_dict(cutout.data.attrs) attrs.update(ds.attrs) - # Add optional compression to the newly prepared features if compression: for v in missing_vars: ds[v].encoding.update(compression) ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) - # write data to tmp file, copy it to original data, this is much safer - # than appending variables + directory: str + filename: str directory, filename = os.path.split(str(cutout.path)) + fd: int + tmp: str fd, tmp = mkstemp(suffix=filename, dir=directory) os.close(fd) logger.debug("Writing cutout to file...") - # Delayed writing for large cutout - # cf. https://stackoverflow.com/questions/69810367/python-how-to-write-large-netcdf-with-xarray - write_job = ds.to_netcdf(tmp, compute=False) + write_job: Any = ds.to_netcdf(tmp, compute=False) if show_progress: with ProgressBar(minimum=2): write_job.compute(**dask_kwargs) @@ -267,7 +333,7 @@ def cutout_prepare( if cutout.path.exists(): cutout.data.close() cutout.path.unlink() - os.rename(tmp, cutout.path) + Path(tmp).rename(cutout.path) cutout.data = xr.open_dataset(cutout.path, chunks=cutout.chunks) diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index 045c59d8..7a1ba209 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -2,10 +2,15 @@ # # SPDX-License-Identifier: MIT -""" -atlite datasets. -""" +"""atlite datasets.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING from atlite.datasets import era5, gebco, sarah -modules = {"era5": era5, "sarah": sarah, "gebco": gebco} +if TYPE_CHECKING: + from types import ModuleType + +modules: dict[str, ModuleType] = {"era5": era5, "sarah": sarah, "gebco": gebco} diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 0e95b4f2..f7ecfe32 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the CORDEX -dataset. +Module for creating cutouts from the CORDEX dataset. DEPRECATED ---------- @@ -12,14 +11,24 @@ for the time being! """ +from __future__ import annotations + import glob import os from itertools import groupby from operator import itemgetter +from typing import TYPE_CHECKING, Any import pandas as pd import xarray as xr +if TYPE_CHECKING: + from collections.abc import Generator + + import numpy as np + + from atlite._types import PathLike + # Model and CRS Settings model = "MPI-M-MPI-ESM-LR" @@ -29,30 +38,71 @@ # RotProj(dict(proj='ob_tran', o_proj='latlong', lon_0=180, o_lon_p=-162, o_lat_p=39.25)) -def rename_and_clean_coords(ds): +def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: + """Rename rotated coordinates and drop auxiliary variables. + + Parameters + ---------- + ds : xr.Dataset + CORDEX dataset with rotated lon/lat coordinates. + + Returns + ------- + xr.Dataset + Dataset with ``rlon``/``rlat`` renamed to ``x``/``y`` and + ``bnds``, ``height``, ``rotated_pole`` removed if present. + """ ds = ds.rename({"rlon": "x", "rlat": "y"}) - # drop some coordinates and variables we do not use - ds = ds.drop( + return ds.drop( (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} ) - return ds -def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare time-varying CORDEX data, yielding per-month slices. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) ds = ds.sel(x=xs, y=ys) if newname in {"influx", "outflux"}: - # shift averaged data to beginning of bin ds = ds.assign_coords( time=( pd.to_datetime(ds.coords["time"].values) - pd.Timedelta(hours=1.5) ) ) elif newname in {"runoff"}: - # shift and fill 6hr average data to beginning of 3hr bins t = pd.to_datetime(ds.coords["time"].values) ds = ds.reindex(method="bfill", time=(t - pd.Timedelta(hours=3.0)).union(t)) @@ -60,7 +110,39 @@ def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds.sel(time=f"{year}-{m}") -def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_static_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare static (time-invariant) CORDEX data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year key for the yielded tuples. + months : list of int + Months to yield entries for. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the static dataset (same for each month). + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -70,7 +152,39 @@ def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds -def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_weather_types_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare CORDEX weather type classification data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Unused, kept for interface consistency. + ys : slice or np.ndarray + Unused, kept for interface consistency. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = ds.rename({oldname: newname}) for m in months: @@ -78,9 +192,42 @@ def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_meta_cordex( - xs, ys, year, month, template, height_config, module, model=model -): - fn = next(glob.iglob(template.format(year=year, model=model))) + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + model: str = "MPI-M-MPI-ESM-LR", +) -> xr.Dataset: + """Build metadata dataset for a CORDEX cutout including height. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + year : int + Reference year. + month : int + Reference month. + template : str + Glob template for locating NetCDF files. + height_config : dict + Configuration for height data retrieval. + module : Any + Dataset module reference. + model : str, optional + Climate model identifier. + + Returns + ------- + xr.Dataset + Coordinate metadata dataset with height variable. + """ + fn = next(glob.iglob(template.format(year=year, model=model))) # noqa: PTH207 with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.coords.to_dataset() @@ -103,8 +250,41 @@ def prepare_meta_cordex( def tasks_yearly_cordex( - xs, ys, yearmonths, prepare_func, template, oldname, newname, meta_attrs -): + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + oldname: str, + newname: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: + """Create yearly preparation task dicts for CORDEX data retrieval. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + yearmonths : list of (int, int) + ``(year, month)`` pairs to process. + prepare_func : callable + Function to call for data preparation. + template : str + Glob template for locating NetCDF files. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + meta_attrs : dict + Cutout metadata attributes; must contain ``"model"`` key. + + Returns + ------- + list of dict + One task dict per year with keys needed by ``prepare_func``. + """ model = meta_attrs["model"] if not isinstance(xs, slice): @@ -115,138 +295,150 @@ def tasks_yearly_cordex( ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - oldname=oldname, - newname=newname, - fn=next(glob.iglob(template.format(year=year, model=model))), - year=year, - months=list(map(itemgetter(1), yearmonths)), - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "oldname": oldname, + "newname": newname, + "fn": next(glob.iglob(template.format(year=year, model=model))), # noqa: PTH207 + "year": year, + "months": list(map(itemgetter(1), yearmonths)), + } for year, yearmonths in groupby(yearmonths, itemgetter(0)) ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsds", - newname="influx", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "influx", - "rsds_*_{year}*.nc", - ), - ), - "outflux": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsus", - newname="outflux", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "outflux", - "rsus_*_{year}*.nc", - ), - ), - "temperature": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="tas", - newname="temperature", - template=os.path.join( - config.cordex_dir, # noqa: F821 +weather_data_config: dict[str, dict[str, Any]] = {} +try: + from atlite import config # type: ignore[attr-defined] + + weather_data_config = { + "influx": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsds", + "newname": "influx", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "influx", + "rsds_*_{year}*.nc", + ), + }, + "outflux": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsus", + "newname": "outflux", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "outflux", + "rsus_*_{year}*.nc", + ), + }, + "temperature": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "tas", + "newname": "temperature", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "temperature", + "tas_*_{year}*.nc", + ), + }, + "humidity": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "hurs", + "newname": "humidity", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "humidity", + "hurs_*_{year}*.nc", + ), + }, + "wnd10m": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "sfcWind", + "newname": "wnd10m", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "wind", + "sfcWind_*_{year}*.nc", + ), + }, + "roughness": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "rlst", + "newname": "roughness", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "roughness", + "rlst_*.nc", + ), + }, + "runoff": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "mrro", + "newname": "runoff", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "runoff", + "mrro_*_{year}*.nc", + ), + }, + "height": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "orog", + "newname": "height", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "altitude", + "orog_*.nc", + ), + }, + "CWT": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_weather_types_cordex, + "oldname": "CWT", + "newname": "CWT", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, + "{model}", + "weather_types", + "CWT_*_{year}*.nc", + ), + }, + } +except ImportError: + pass + +meta_data_config: dict[str, Any] = {} +try: + from atlite import config # type: ignore[attr-defined] + + meta_data_config = { + "prepare_func": prepare_meta_cordex, + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc", ), - ), - "humidity": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="hurs", - newname="humidity", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "humidity", - "hurs_*_{year}*.nc", - ), - ), - "wnd10m": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="sfcWind", - newname="wnd10m", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "wind", - "sfcWind_*_{year}*.nc", - ), - ), - "roughness": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="rlst", - newname="roughness", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "roughness", - "rlst_*.nc", - ), - ), - "runoff": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="mrro", - newname="runoff", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "runoff", - "mrro_*_{year}*.nc", - ), - ), - "height": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="orog", - newname="height", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "altitude", - "orog_*.nc", - ), - ), - "CWT": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_weather_types_cordex, - oldname="CWT", - newname="CWT", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "weather_types", - "CWT_*_{year}*.nc", - ), - ), -} - -meta_data_config = dict( - prepare_func=prepare_meta_cordex, - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "temperature", - "tas_*_{year}*.nc", - ), - height_config=weather_data_config["height"], -) + "height_config": weather_data_config["height"], + } +except (ImportError, KeyError): + pass diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index f2f957a7..2f5cd7aa 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -8,12 +8,15 @@ https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation """ +from __future__ import annotations + import logging import os import warnings import weakref from pathlib import Path from tempfile import mkstemp +from typing import TYPE_CHECKING, Any, Literal import cdsapi import numpy as np @@ -21,12 +24,18 @@ import xarray as xr from dask import compute, delayed from dask.array import arctan2, sqrt -from dask.utils import SerializableLock from numpy import atleast_1d from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from collections.abc import Callable + + from dask.utils import SerializableLock + + from atlite._types import ERA5RetrievalParams, PathLike + # Null context for running a with statements wihout any context try: from contextlib import nullcontext @@ -34,8 +43,8 @@ # for Python verions < 3.7: import contextlib - @contextlib.contextmanager - def nullcontext(): + @contextlib.contextmanager # type: ignore[no-redef] + def nullcontext(): # noqa: D103 yield @@ -62,48 +71,73 @@ def nullcontext(): static_features = {"height"} -def _add_height(ds): +def _add_height(ds: xr.Dataset) -> xr.Dataset: """ - Convert geopotential 'z' to geopotential height following [1]. + Convert geopotential to height and replace the 'z' variable. - References + Parameters ---------- - [1] ERA5: surface elevation and orography, retrieved: 10.02.2019 - https://confluence.ecmwf.int/display/CKB/ERA5%3A+surface+elevation+and+orography + ds : xr.Dataset + Dataset containing geopotential variable 'z'. + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters, 'z' removed. """ g0 = 9.80665 z = ds["z"] if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - ds = ds.drop_vars("z") - return ds + return ds.drop_vars("z") -def _rename_and_clean_coords(ds, add_lon_lat=True): +def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: """ - Rename 'longitude' and 'latitude' columns to 'x' and 'y' and fix roundings. + Standardize coordinate names and clean up auxiliary variables. + + Renames longitude/latitude/valid_time to x/y/time, rounds spatial + coordinates, and drops 'expver'/'number' if present. + + Parameters + ---------- + ds : xr.Dataset + Raw ERA5 dataset with original coordinate names. + add_lon_lat : bool, optional + Whether to add 'lon'/'lat' as coordinate aliases. Default True. - Optionally (add_lon_lat, default:True) preserves latitude and - longitude columns as 'lat' and 'lon'. + Returns + ------- + xr.Dataset + Dataset with standardized coordinates. """ ds = ds.rename({"longitude": "x", "latitude": "y", "valid_time": "time"}) - # round coords since cds coords are float32 which would lead to mismatches ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) ds = maybe_swap_spatial_dims(ds) if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - ds = ds.drop_vars(["expver", "number"], errors="ignore") - - return ds + return ds.drop_vars(["expver", "number"], errors="ignore") -def get_data_wind(retrieval_params): +def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: """ - Get wind data for given retrieval parameters. + Retrieve and compute wind speed variables from ERA5. + + Downloads u/v wind components at 10m and 100m, computes wind speed, + shear exponent, azimuth angle, and surface roughness. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: wnd100m, wnd_shear_exp, wnd_azimuth, roughness. """ ds = retrieve_data( variable=[ @@ -125,27 +159,48 @@ def get_data_wind(retrieval_params): np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100) ).assign_attrs(units="", long_name="wind shear exponent") - # span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west azimuth = arctan2(ds["u100"], ds["v100"]) ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - ds = ds.rename({"fsr": "roughness"}) - - return ds + return ds.rename({"fsr": "roughness"}) -def sanitize_wind(ds): +def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved wind data. + Clip negative roughness values to a minimum of 2e-4. + + Parameters + ---------- + ds : xr.Dataset + Wind dataset containing 'roughness' variable. + + Returns + ------- + xr.Dataset + Dataset with corrected roughness values. """ ds["roughness"] = ds["roughness"].where(ds["roughness"] >= 0.0, 2e-4) return ds -def get_data_influx(retrieval_params): +def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: """ - Get influx data for given retrieval parameters. + Retrieve and compute solar radiation variables from ERA5. + + Downloads radiation components, converts from J/m² to W/m², computes + albedo, diffuse radiation, and solar position (altitude/azimuth). + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: influx_toa, influx_direct, influx_diffuse, + albedo, solar_altitude, solar_azimuth. """ ds = retrieve_data( variable=[ @@ -170,38 +225,53 @@ def get_data_influx(retrieval_params): ) ds = ds.drop_vars(["ssrd", "ssr"]) - # Convert from energy to power J m**-2 -> W m**-2 and clip negative fluxes for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a] / (60.0 * 60.0) ds[a].attrs["units"] = "W m**-2" - # ERA5 variables are mean values for previous hour, i.e. 13:01 to 14:00 are labelled as "14:00" - # account by calculating the SolarPosition for the center of the interval for aggregation happens - # see https://github.com/PyPSA/atlite/issues/158 - # Do not show DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) time_shift = pd.to_timedelta("-30 minutes") sp = SolarPosition(ds, time_shift=time_shift) sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds + return xr.merge([ds, sp]) -def sanitize_influx(ds): +def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved influx data. + Clip negative radiation values to zero. + + Parameters + ---------- + ds : xr.Dataset + Influx dataset with influx_direct, influx_diffuse, influx_toa. + + Returns + ------- + xr.Dataset + Dataset with non-negative radiation values. """ for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a].clip(min=0.0) return ds -def get_data_temperature(retrieval_params): +def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: """ - Get wind temperature for given retrieval parameters. + Retrieve temperature variables from ERA5. + + Downloads 2m temperature, soil temperature (level 4), and 2m dewpoint. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: temperature, soil temperature, dewpoint temperature. """ ds = retrieve_data( variable=[ @@ -213,79 +283,115 @@ def get_data_temperature(retrieval_params): ) ds = _rename_and_clean_coords(ds) - ds = ds.rename( - { - "t2m": "temperature", - "stl4": "soil temperature", - "d2m": "dewpoint temperature", - } - ) + return ds.rename({ + "t2m": "temperature", + "stl4": "soil temperature", + "d2m": "dewpoint temperature", + }) - return ds - -def get_data_runoff(retrieval_params): +def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: """ - Get runoff data for given retrieval parameters. + Retrieve runoff data from ERA5. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'runoff' variable. """ ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = ds.rename({"ro": "runoff"}) - - return ds + return ds.rename({"ro": "runoff"}) -def sanitize_runoff(ds): +def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved runoff data. + Clip negative runoff values to zero. + + Parameters + ---------- + ds : xr.Dataset + Runoff dataset containing 'runoff' variable. + + Returns + ------- + xr.Dataset + Dataset with non-negative runoff values. """ ds["runoff"] = ds["runoff"].clip(min=0.0) return ds -def get_data_height(retrieval_params): +def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: """ - Get height data for given retrieval parameters. + Retrieve geopotential and convert to terrain height. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters. """ ds = retrieve_data(variable="geopotential", **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = _add_height(ds) + return _add_height(ds) - return ds +def _area(coords: dict[str, xr.DataArray]) -> list[float]: + """ + Extract CDS API bounding box from coordinates. -def _area(coords): - # North, West, South, East. Default: global + Parameters + ---------- + coords : dict[str, xr.DataArray] + Coordinate arrays with 'x' (longitude) and 'y' (latitude). + + Returns + ------- + list[float] + Bounding box as [north, west, south, east]. + """ x0, x1 = coords["x"].min().item(), coords["x"].max().item() y0, y1 = coords["y"].min().item(), coords["y"].max().item() return [y1, x0, y0, x1] -def retrieval_times(coords, static=False, monthly_requests=False): +def retrieval_times( + coords: dict[str, xr.DataArray], + static: bool = False, + monthly_requests: bool = False, +) -> dict[str, Any] | list[dict[str, Any]]: """ - Get list of retrieval cdsapi arguments for time dimension in coordinates. + Generate time parameter chunks for CDS API requests. - If static is False, this function creates a query for each month and year - in the time axis in coords. This ensures not running into size query limits - of the cdsapi even with very (spatially) large cutouts. - If static is True, the function return only one set of parameters - for the very first time point. + Splits the time coordinate into year-based (or year-month-based) chunks + suitable for the CDS API query format. Parameters ---------- - coords : atlite.Cutout.coords + coords : dict[str, xr.DataArray] + Coordinate arrays with 'time' dimension. static : bool, optional + If True, return a single time point (for time-invariant fields). monthly_requests : bool, optional - If True, the data is requested on a monthly basis. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False + If True, split requests by month within each year. Returns ------- - list of dicts witht retrieval arguments - + dict[str, Any] or list[dict[str, Any]] + Single dict if static, otherwise list of dicts with + 'year', 'month', 'day', 'time' keys. """ time = coords["time"].to_index() if static: @@ -296,8 +402,7 @@ def retrieval_times(coords, static=False, monthly_requests=False): "time": time[0].strftime("%H:00"), } - # Prepare request for all months and years - times = [] + times: list[dict[str, Any]] = [] for year in time.year.unique(): t = time[time.year == year] if monthly_requests: @@ -320,26 +425,62 @@ def retrieval_times(coords, static=False, monthly_requests=False): return times -def noisy_unlink(path): +def noisy_unlink(path: PathLike) -> None: """ - Delete file at given path. + Remove a file with debug logging, handling PermissionError gracefully. + + Parameters + ---------- + path : PathLike + Path to the file to delete. """ - logger.debug(f"Deleting file {path}") + logger.debug("Deleting file %s", path) try: - os.unlink(path) + Path(path).unlink() except PermissionError: - logger.error(f"Unable to delete file {path}, as it is still in use.") + logger.error("Unable to delete file %s, as it is still in use.", path) + +def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: + """ + Register a weak-reference callback to delete a temp file on garbage collection. -def add_finalizer(ds: xr.Dataset, target: str | Path): - logger.debug(f"Adding finalizer for {target}") + Parameters + ---------- + ds : xr.Dataset + Dataset whose lifetime controls the temp file. + target : PathLike + Path to the temporary file to clean up. + """ + logger.debug("Adding finalizer for %s", target) weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) -def sanitize_chunks(chunks, **dim_mapping): - dim_mapping = dict(time="valid_time", x="longitude", y="latitude") | dim_mapping +def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: + """ + Remap internal dimension names to ERA5/CDS dimension names in chunk specs. + + Translates atlite dimension names (time, x, y) to the corresponding + ERA5 names (valid_time, longitude, latitude). + + Parameters + ---------- + chunks : Any + Chunk specification. If not a dict, returned as-is. + **dim_mapping : str + Additional or override dimension name mappings. + + Returns + ------- + Any + Remapped chunk dict, or original value if not a dict. + """ + dim_mapping = { + "time": "valid_time", + "x": "longitude", + "y": "latitude", + } | dim_mapping if not isinstance(chunks, dict): - # preserve "auto" or None return chunks return { @@ -350,40 +491,35 @@ def sanitize_chunks(chunks, **dim_mapping): def open_with_grib_conventions( - grib_file: str | Path, chunks=None, tmpdir: str | Path | None = None + grib_file: PathLike, + chunks: dict[str, int] | None = None, + tmpdir: PathLike | None = None, ) -> xr.Dataset: """ - Convert grib file of ERA5 data from the CDS to netcdf file. + Open a GRIB file using cfgrib with standardized coordinate conventions. - The function does the same thing as the CDS backend does, but locally. - This is needed, as the grib file is the recommended download file type for CDS, with conversion to netcdf locally. - The routine is a reduced version based on the documentation here: - https://confluence.ecmwf.int/display/CKB/GRIB+to+netCDF+conversion+on+new+CDS+and+ADS+systems#GRIBtonetCDFconversiononnewCDSandADSsystems-jupiternotebook + Renames forecast/pressure/model dimensions and expands missing dimensions. + If ``tmpdir`` is None, registers a finalizer to delete the file on GC. Parameters ---------- - grib_file : str | Path - Path to the grib file to be converted. - chunks - Chunks - tmpdir : Path, optional - If None adds a finalizer to the dataset object + grib_file : PathLike + Path to the GRIB file. + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + If set, the file is kept (managed externally). Returns ------- xr.Dataset + Opened dataset with standardized dimensions. """ - # - # Open grib file as dataset - # Options to open different datasets into a datasets of consistent hypercubes which are compatible netCDF - # There are options that might be relevant for e.g. for wave model data, that have been removed here - # to keep the code cleaner and shorter ds = xr.open_dataset( grib_file, engine="cfgrib", time_dims=["valid_time"], ignore_keys=["edition"], - # extra_coords={"expver": "valid_time"}, coords_as_attributes=[ "surface", "depthBelowLandLayer", @@ -397,10 +533,6 @@ def open_with_grib_conventions( add_finalizer(ds, grib_file) def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Dataset: - """ - Expand dimensions in an xarray dataset, ensuring that the new dimensions are not already in the dataset - and that the order of dimensions is preserved. - """ dims_required = [ c for c in dataset.coords if c in expand_dims + list(dataset.dims) ] @@ -413,7 +545,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase return dataset logger.debug("Converting grib file to netcdf format") - # Variables and dimensions to rename if they exist in the dataset rename_vars = { "time": "forecast_reference_time", "step": "forecast_period", @@ -423,7 +554,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase rename_vars = {k: v for k, v in rename_vars.items() if k in ds} ds = ds.rename(rename_vars) - # safely expand dimensions in an xarray dataset to ensure that data for the new dimensions are in the dataset ds = safely_expand_dims(ds, ["valid_time", "pressure_level", "model_level"]) return ds @@ -432,140 +562,122 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase def retrieve_data( product: str, chunks: dict[str, int] | None = None, - tmpdir: str | Path | None = None, + tmpdir: PathLike | None = None, lock: SerializableLock | None = None, - **updates, + **updates: Any, ) -> xr.Dataset: """ - Download data like ERA5 from the Climate Data Store (CDS). - - If you want to track the state of your request go to - https://cds-beta.climate.copernicus.eu/requests?tab=all + Download ERA5 data from the CDS API and return as an xarray Dataset. Parameters ---------- product : str - Product name, e.g. 'reanalysis-era5-single-levels'. - chunks : dict, optional - Chunking for xarray dataset, e.g. {'time': 1, 'x': 100, 'y': 100}. - Default is None. - tmpdir : str, optional - Directory where the downloaded data is temporarily stored. - Default is None, which uses the system's temporary directory. - lock : dask.utils.SerializableLock, optional - Lock for thread-safe file writing. Default is None. - updates : dict - Additional parameters for the request. - Must include 'year', 'month', and 'variable'. - Can include e.g. 'data_format'. + CDS product name (e.g. 'reanalysis-era5-single-levels'). + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + Directory for temporary download files. If None, files are + cleaned up via finalizer on GC. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + **updates : Any + Additional CDS API request parameters. Must include at least + 'variable', 'year', and 'month'. Returns ------- - xarray.Dataset - Dataset with the retrieved variables. - - Examples - -------- - >>> ds = retrieve_data( - ... product='reanalysis-era5-single-levels', - ... chunks={'time': 1, 'x': 100, 'y': 100}, - ... tmpdir='/tmp', - ... lock=None, - ... year='2020', - ... month='01', - ... variable=['10m_u_component_of_wind', '10m_v_component_of_wind'], - ... data_format='netcdf' - ... ) - """ - request = {"product_type": ["reanalysis"], "download_format": "unarchived"} + xr.Dataset + Downloaded ERA5 data. + + """ + request: dict[str, Any] = { + "product_type": ["reanalysis"], + "download_format": "unarchived", + } request.update(updates) assert {"year", "month", "variable"}.issubset(request), ( "Need to specify at least 'variable', 'year' and 'month'" ) - logger.debug(f"Requesting {product} with API request: {request}") + logger.debug("Requesting %s with API request: %s", product, request) client = cdsapi.Client( - info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level + info_callback=logger.debug, debug=logging.root.level <= logging.DEBUG ) result = client.retrieve(product, request) if lock is None: lock = nullcontext() - suffix = f".{request['data_format']}" # .netcdf or .grib + suffix = f".{request['data_format']}" with lock: fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) - # Inform user about data being downloaded as "* variable (year-month)" timestr = f"{request['year']}-{request['month']}" variables = atleast_1d(request["variable"]) varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) - logger.info(f"CDS: Downloading variables\n\t{varstr}\n") + logger.info("CDS: Downloading variables\n\t%s\n", varstr) result.download(target) - # Convert from grib to netcdf locally, same conversion as in CDS backend if request["data_format"] == "grib": ds = open_with_grib_conventions(target, chunks=chunks, tmpdir=tmpdir) else: ds = xr.open_dataset(target, chunks=sanitize_chunks(chunks)) if tmpdir is None: - add_finalizer(target) + add_finalizer(ds, target) return ds def get_data( - cutout, - feature, - tmpdir, - lock=None, - data_format="grib", - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: SerializableLock | None = None, + data_format: Literal["grib", "netcdf"] = "grib", + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: """ - Retrieve data from ECMWFs ERA5 dataset (via CDS). + Download ERA5 data for a given feature. - This front-end function downloads data for a specific feature and formats - it to match the given Cutout. + Dispatches to feature-specific ``get_data_{feature}`` functions, + optionally applies ``sanitize_{feature}``, and concatenates time chunks. Parameters ---------- - cutout : atlite.Cutout + cutout : Cutout + Cutout object defining the spatial and temporal extent. feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.era5.features` - tmpdir : str/Path - Directory where the temporary netcdf files are stored. + Feature to retrieve (e.g. 'wind', 'influx', 'temperature', + 'runoff', 'height'). + tmpdir : PathLike + Directory for temporary download files. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + data_format : {{'grib', 'netcdf'}}, optional + Download format. Default 'grib'. monthly_requests : bool, optional - If True, the data is requested on a monthly basis in ERA5. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False - data_format : str, optional - The format of the data to be downloaded. Can be either 'grib' or 'netcdf', - 'grib' highly recommended because CDSAPI limits request size for netcdf. + If True, split API requests by month. Default False. concurrent_requests : bool, optional - If True, the monthly data requests are posted concurrently. - Only has an effect if `monthly_requests` is True. - **creation_parameters : - Additional keyword arguments. The only effective argument is 'sanitize' - (default True) which sets sanitization of the data on or off. + If True, use dask.delayed for parallel downloads. Default False. + **creation_parameters : Any + Additional parameters; 'sanitize' (bool, default True) controls + whether post-processing is applied. Returns ------- - xarray.Dataset - Dataset of dask arrays of the retrieved variables. - + xr.Dataset + ERA5 data for the requested feature, aligned to cutout coordinates. """ coords = cutout.coords sanitize = creation_parameters.get("sanitize", True) - retrieval_params = { + retrieval_params: ERA5RetrievalParams = { "product": "reanalysis-era5-single-levels", "area": _area(coords), "chunks": cutout.chunks, @@ -575,21 +687,28 @@ def get_data( "data_format": data_format, } - func = globals().get(f"get_data_{feature}") - sanitize_func = globals().get(f"sanitize_{feature}") + func: Callable[[ERA5RetrievalParams], xr.Dataset] | None = globals().get( + f"get_data_{feature}" + ) + sanitize_func: Callable[[xr.Dataset], xr.Dataset] | None = globals().get( + f"sanitize_{feature}" + ) - logger.info(f"Requesting data for feature {feature}...") + logger.info("Requesting data for feature %s...", feature) - def retrieve_once(time): - ds = func({**retrieval_params, **time}) + def retrieve_once(time: dict[str, Any]) -> xr.Dataset: + ds = func({**retrieval_params, **time}) # type: ignore[misc, typeddict-item] if sanitize and sanitize_func is not None: ds = sanitize_func(ds) return ds if feature in static_features: - return retrieve_once(retrieval_times(coords, static=True)).squeeze() + static_times = retrieval_times(coords, static=True) + assert isinstance(static_times, dict) + return retrieve_once(static_times).squeeze() time_chunks = retrieval_times(coords, monthly_requests=monthly_requests) + assert isinstance(time_chunks, list) if concurrent_requests: delayed_datasets = [delayed(retrieve_once)(chunk) for chunk in time_chunks] datasets = compute(*delayed_datasets) diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 948e862c..4d51dedd 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -3,24 +3,46 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for loading gebco data. -""" +"""Module for loading gebco data.""" + +from __future__ import annotations import logging +from typing import TYPE_CHECKING, Any import rasterio as rio import xarray as xr from pandas import to_numeric from rasterio.warp import Resampling +if TYPE_CHECKING: + from atlite._types import PathLike + logger = logging.getLogger(__name__) crs = 4326 features = {"height": ["height"]} -def get_data_gebco_height(xs, ys, gebco_path): +def get_data_gebco_height( + xs: xr.DataArray, ys: xr.DataArray, gebco_path: PathLike +) -> xr.DataArray: + """Read and resample GEBCO bathymetry/elevation to the target grid. + + Parameters + ---------- + xs : xr.DataArray + Target x (longitude) coordinates. + ys : xr.DataArray + Target y (latitude) coordinates. + gebco_path : PathLike + Path to the GEBCO GeoTIFF or NetCDF file. + + Returns + ------- + xr.DataArray + Height data resampled to the target grid with dimensions ``(y, x)``. + """ x, X = xs.data[[0, -1]] y, Y = ys.data[[0, -1]] @@ -35,7 +57,7 @@ def get_data_gebco_height(xs, ys, gebco_path): out_shape=(len(ys), len(xs)), resampling=Resampling.average, ) - gebco = gebco[::-1] # change inversed y-axis + gebco = gebco[::-1] tags = dataset.tags(bidx=1) tags = {k: to_numeric(v, errors="ignore") for k, v in tags.items()} @@ -45,41 +67,40 @@ def get_data_gebco_height(xs, ys, gebco_path): def get_data( - cutout, - feature, - tmpdir, - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): - """ - Get the gebco height data. + cutout: Any, + feature: str, + tmpdir: PathLike, + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: + """Retrieve GEBCO height data for a cutout. Parameters ---------- - cutout : atlite.Cutout + cutout : Any + Cutout instance providing target coordinates. feature : str - Takes no effect, only here for consistency with other dataset modules. - tmpdir : str - Takes no effect, only here for consistency with other dataset modules. - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Must include `gebco_path`. + Feature name (expected ``"height"``). + tmpdir : PathLike + Temporary directory (unused, kept for interface consistency). + monthly_requests : bool, optional + Unused, kept for interface consistency. + concurrent_requests : bool, optional + Unused, kept for interface consistency. + **creation_parameters : Any + Must include ``"gebco_path"`` pointing to the GEBCO data file. Returns ------- xr.Dataset - + Dataset with height variable on the cutout grid. """ if "gebco_path" not in creation_parameters: logger.error('Argument "gebco_path" not defined') path = creation_parameters["gebco_path"] coords = cutout.coords - # assign time dimension even if not used return ( get_data_gebco_height(coords["x"], coords["y"], path) .to_dataset() diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 115e4d24..87cdcea2 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the NCEP -dataset. +Module for creating cutouts from the NCEP dataset. DEPRECATED ---------- @@ -12,18 +11,44 @@ for the time being! """ +from __future__ import annotations + import glob import os +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd import xarray as xr -engine = "pynio" -crs = 4326 +if TYPE_CHECKING: + from collections.abc import Generator + + from atlite._types import PathLike + +engine: str = "pynio" +crs: int = 4326 -def convert_lons_lats_ncep(ds, xs, ys): +def convert_lons_lats_ncep( + ds: xr.Dataset, xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any] +) -> xr.Dataset: + """Select and rename NCEP longitude/latitude coordinates, handling wraparound. + + Parameters + ---------- + ds : xr.Dataset + Dataset with ``lon_0`` and ``lat_0`` coordinates. + xs : slice or np.ndarray + Longitude selection range or array. + ys : slice or np.ndarray + Latitude selection range or array. + + Returns + ------- + xr.Dataset + Dataset with coordinates renamed to ``x``/``y`` and ``lon``/``lat``. + """ if not isinstance(xs, slice): first, second, last = np.asarray(xs)[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) @@ -33,7 +58,6 @@ def convert_lons_lats_ncep(ds, xs, ys): ds = ds.sel(lat_0=ys) - # Lons should go from -180. to +180. if len(ds.coords["lon_0"].sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0))): ds = xr.concat( [ds.sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0)), ds.sel(lon_0=xs)], @@ -50,12 +74,24 @@ def convert_lons_lats_ncep(ds, xs, ys): ds = ds.sel(lon_0=xs) ds = ds.rename({"lon_0": "x", "lat_0": "y"}) - ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - return ds + return ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) + + +def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: + """Stack initial and forecast times into a single hourly time dimension. + Parameters + ---------- + ds : xr.Dataset + Dataset with ``initial_time0_hours`` and ``forecast_time0`` dimensions. + drop_time_vars : bool, optional + Drop auxiliary time variables (default ``True``). -def convert_time_hourly_ncep(ds, drop_time_vars=True): - # Combine initial_time0 and forecast_time0 + Returns + ------- + xr.Dataset + Dataset with a unified ``time`` coordinate. + """ ds = ds.stack(time=("initial_time0_hours", "forecast_time0")).assign_coords( time=np.ravel( ds.coords["initial_time0_hours"] @@ -68,13 +104,21 @@ def convert_time_hourly_ncep(ds, drop_time_vars=True): return ds -def convert_unaverage_ncep(ds): - # the fields ending in _avg contain averages which have to be unaveraged by using - # \begin{equation} - # \tilde x_1 = x_1 \quad \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1 - # \end{equation} +def convert_unaverage_ncep(ds: xr.Dataset) -> xr.Dataset: + """Convert running-average variables (``*_avg``) to instantaneous values. + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_avg``. - def unaverage(da, dim="forecast_time0"): + Returns + ------- + xr.Dataset + Dataset with un-averaged variables replacing the originals. + """ + + def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: coords = da.coords[dim] y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} @@ -82,6 +126,7 @@ def unaverage(da, dim="forecast_time0"): return y - y.shift(**{dim: 1}).fillna(0.0) for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_avg"): ds[k[: -len("_avg")]] = unaverage(da) ds = ds.drop(k) @@ -89,20 +134,25 @@ def unaverage(da, dim="forecast_time0"): return ds -def convert_unaccumulate_ncep(ds): - # the fields ending in _acc contain values that are accumulated over the - # forecast_time which have to be unaccumulated by using: - # \begin{equation} - # \tilde x_1 = x_1 - # \tilde x_i = x_i - x_{i-1} \forall 1 < i <= 6 - # \end{equation} - # Source: - # http://rda.ucar.edu/datasets/ds094.1/#docs/FAQs_hrly_timeseries.html +def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: + """Convert accumulated variables (``*_acc``) to per-timestep values. + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_acc``. + + Returns + ------- + xr.Dataset + Dataset with de-accumulated variables replacing the originals. + """ - def unaccumulate(da, dim="forecast_time0"): + def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: return da - da.shift(**{dim: 1}).fillna(0.0) for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_acc"): ds[k[: -len("_acc")]] = unaccumulate(da) ds = ds.drop(k) @@ -110,17 +160,58 @@ def unaccumulate(da, dim="forecast_time0"): return ds -def convert_clip_lower(ds, variable, a_min, value): - """ - Set values of `variable` that are below `a_min` to `value`. - - Similar to `numpy.clip`. +def convert_clip_lower( + ds: xr.Dataset, variable: str, a_min: float, value: float +) -> xr.Dataset: + """Replace values at or below a threshold with a fill value. + + Parameters + ---------- + ds : xr.Dataset + Input dataset. + variable : str + Name of the variable to clip. + a_min : float + Threshold; values <= ``a_min`` are replaced. + value : float + Replacement value. + + Returns + ------- + xr.Dataset + Dataset with clipped variable. """ ds[variable] = ds[variable].where(ds[variable] > a_min).fillna(value) return ds -def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_wnd10m_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 10-m wind speed from NCEP U/V components. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``wnd10m`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -131,31 +222,107 @@ def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_influx_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare downward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``influx`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) ds = convert_time_hourly_ncep(ds) ds = ds.rename({"DSWRF_P8_L1_GGA0": "influx"}) - # clipping random fluctuations around zero ds = convert_clip_lower(ds, "influx", a_min=0.1, value=0.0) yield yearmonth, ds -def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_outflux_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare upward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``outflux`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) ds = convert_time_hourly_ncep(ds) ds = ds.rename({"USWRF_P8_L1_GGA0": "outflux"}) - # clipping random fluctuations around zero ds = convert_clip_lower(ds, "outflux", a_min=3.0, value=0.0) yield yearmonth, ds -def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 2-m air temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -164,7 +331,33 @@ def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_soil_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare soil temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``soil temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -173,10 +366,35 @@ def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_runoff_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface runoff from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``runoff`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) - # runoff has missing values: set nans to 0 ds = ds.fillna(0.0) ds = convert_unaccumulate_ncep(ds) ds = convert_time_hourly_ncep(ds) @@ -185,7 +403,33 @@ def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): +def prepare_height_ncep( + fn: PathLike, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare geopotential height from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to yield the same height data for. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``height`` variable for each yearmonth. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"HGT_P0_L105_GGA0": "height"}) @@ -193,7 +437,33 @@ def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): yield ym, ds -def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_roughness_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface roughness from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``roughness`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"SFCR_P8_L1_GGA0": "roughness"}) @@ -202,9 +472,42 @@ def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_meta_ncep( - xs, ys, year, month, template, height_config, module, engine=engine -): - fn = next(glob.iglob(template.format(year=year, month=month))) + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + engine: str = engine, +) -> xr.Dataset: + """Prepare cutout metadata including coordinates and height. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + year : int + Reference year. + month : int + Reference month. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + height_config : dict + Configuration dict for height preparation (must include ``tasks_func``). + module : Any + Dataset module reference. + engine : str, optional + xarray backend engine. + + Returns + ------- + xr.Dataset + Metadata dataset with coordinates, time, and ``height``. + """ + fn = next(glob.iglob(template.format(year=year, month=month))) # noqa: PTH207 with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() ds = convert_lons_lats_ncep(ds, xs, ys) @@ -227,107 +530,178 @@ def prepare_meta_ncep( return meta -def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): +def tasks_monthly_ncep( + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: + """Build per-month task dicts for NCEP data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to create tasks for. + prepare_func : callable + Preparation function to invoke per task. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + + Returns + ------- + list of dict + Task dictionaries keyed for the preparation function. + """ return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - fn=next(glob.iglob(template.format(year=ym[0], month=ym[1]))), - engine=engine, - yearmonth=ym, - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "fn": next(glob.iglob(template.format(year=ym[0], month=ym[1]))), # noqa: PTH207 + "engine": engine, + "yearmonth": ym, + } for ym in yearmonths ] def tasks_height_ncep( - xs, ys, yearmonths, prepare_func, template, meta_attrs, **extra_args -): + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], + **extra_args: Any, +) -> list[dict[str, Any]]: + """Build a single task dict for NCEP height data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs passed through to the preparation function. + prepare_func : callable + Preparation function to invoke. + template : str + Glob-able file path to the height data. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + **extra_args + Additional keyword arguments forwarded to the task dict. + + Returns + ------- + list of dict + Single-element list with the height task dictionary. + """ return [ dict( prepare_func=prepare_func, xs=xs, ys=ys, yearmonths=yearmonths, - fn=next(glob.iglob(template)), + fn=next(glob.iglob(template)), # noqa: PTH207 **extra_args, ) ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_influx_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/dswsfc.*.grb2", - ), - ), - "outflux": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_outflux_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/uswsfc.*.grb2", - ), - ), - "temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 +weather_data_config: dict[str, dict[str, Any]] = {} +try: + from atlite import config # type: ignore[attr-defined] + + weather_data_config = { + "influx": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_influx_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/dswsfc.*.grb2", + ), + }, + "outflux": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_outflux_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/uswsfc.*.grb2", + ), + }, + "temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_temperature_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/tmp2m.*.grb2", + ), + }, + "soil temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_soil_temperature_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/soilt1.*.grb2", + ), + }, + "wnd10m": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_wnd10m_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/wnd10m.*.grb2", + ), + }, + "runoff": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_runoff_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/runoff.*.grb2", + ), + }, + "roughness": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_roughness_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "{year}{month:0>2}/flxf.gdas.*.grb2", + ), + }, + "height": { + "tasks_func": tasks_height_ncep, + "prepare_func": prepare_height_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, + "height/cdas1.20130101.splgrbanl.grb2", + ), + }, + } +except ImportError: + pass + +meta_data_config: dict[str, Any] = {} +try: + from atlite import config # type: ignore[attr-defined] + + meta_data_config = { + "prepare_func": prepare_meta_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2", ), - ), - "soil temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_soil_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/soilt1.*.grb2", - ), - ), - "wnd10m": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_wnd10m_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/wnd10m.*.grb2", - ), - ), - "runoff": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_runoff_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/runoff.*.grb2", - ), - ), - "roughness": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_roughness_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/flxf.gdas.*.grb2", - ), - ), - "height": dict( - tasks_func=tasks_height_ncep, - prepare_func=prepare_height_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "height/cdas1.20130101.splgrbanl.grb2", - ), - ), -} - -meta_data_config = dict( - prepare_func=prepare_meta_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/tmp2m.*.grb2", - ), - height_config=weather_data_config["height"], -) + "height_config": weather_data_config["height"], + } +except (ImportError, KeyError): + pass diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index ec7851ff..3ade934f 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -1,16 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module containing specific operations for creating cutouts from the SARAH2 -dataset. -""" +"""Module for creating cutouts from the SARAH2 dataset.""" + +from __future__ import annotations -import glob import logging -import os import warnings from functools import partial +from pathlib import Path +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd @@ -20,6 +19,9 @@ from atlite.gis import regrid from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from atlite._types import PathLike + logger = logging.getLogger(__name__) @@ -36,31 +38,30 @@ "solar_azimuth", ], } -static_features = {} +static_features: dict[str, list[str]] = {} -def get_filenames(sarah_dir, coords): +def get_filenames(sarah_dir: str | PathLike, coords: dict[str, Any]) -> pd.DataFrame: """ - Get all files in directory `sarah_dir` relevent for coordinates `coords`. - - This function parses all files in the sarah directory which lay in the time - span of the coordinates. + Build a DataFrame of SIS and SID file paths for the requested time range. Parameters ---------- - sarah_dir : str - coords : atlite.Cutout.coords + sarah_dir : str or PathLike + Root directory containing SARAH NetCDF files. + coords : dict + Coordinate mapping with a ``"time"`` key whose index defines the + requested temporal range. Returns ------- - pd.DataFrame with two columns `sis` and `sid` for and timeindex for all - relevant files. - + pd.DataFrame + DataFrame with columns ``"sis"`` and ``"sid"`` indexed by date. """ - def _filenames_starting_with(name): - pattern = os.path.join(sarah_dir, "**", f"{name}*.nc") - files = pd.Series(glob.glob(pattern, recursive=True)) + def _filenames_starting_with(name: str) -> pd.Series[str]: + pattern = str(Path(sarah_dir) / "**" / f"{name}*.nc") + files = pd.Series([str(f) for f in Path(sarah_dir).rglob(f"{name}*.nc")]) assert not files.empty, ( f"No files found at {pattern}. Make sure " f"sarah_dir points to the correct directory!" @@ -70,48 +71,62 @@ def _filenames_starting_with(name): return files.sort_index() files = pd.concat( - dict(sis=_filenames_starting_with("SIS"), sid=_filenames_starting_with("SID")), + { + "sis": _filenames_starting_with("SIS"), + "sid": _filenames_starting_with("SID"), + }, join="inner", axis=1, ) - # SARAH files are named based on day, need to .floor("D") to compare correctly start = coords["time"].to_index()[0].floor("D") end = coords["time"].to_index()[-1].floor("D") if (start < files.index[0]) or (end > files.index[-1]): logger.error( - f"Files in {sarah_dir} do not cover the whole time span:" - f"\t{start} until {end}" + "Files in %s do not cover the whole time span:\t%s until %s", + sarah_dir, + start, + end, ) return files.loc[(files.index >= start) & (files.index <= end)].sort_index() -def interpolate(ds, dim="time"): +def interpolate( + ds: xr.Dataset | xr.DataArray, dim: str = "time" +) -> xr.Dataset | xr.DataArray: """ - Interpolate NaNs in a dataset along a chunked dimension. + Linearly interpolate NaN values along a dimension. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Input data with potential NaN gaps. + dim : str, default "time" + Dimension along which to interpolate. - This function is similar to xr.Dataset.interpolate_na but can be - used for interpolating along a chunked dimensions (default 'time''). - As the sarah data has mulitple NaN's in the areas of dawn and - nightfall and the data is per default chunked along the time axis, - use this function to interpolate. + Returns + ------- + xr.Dataset or xr.DataArray + Data with NaN values filled by linear interpolation. """ - def _interpolate1d(y): + def _interpolate1d( + y: np.ndarray[Any, np.dtype[np.floating[Any]]], + ) -> np.ndarray[Any, np.dtype[np.floating[Any]]]: nan = np.isnan(y) if nan.all() or not nan.any(): return y - def x(z): + def x(z: np.ndarray[Any, np.dtype[Any]]) -> np.ndarray[Any, np.dtype[np.intp]]: return z.nonzero()[0] y = np.array(y) y[nan] = np.interp(x(nan), x(~nan), y[~nan]) return y - def _interpolate(a): + def _interpolate(a: Any) -> Any: return a.map_blocks( partial(np.apply_along_axis, _interpolate1d, -1), dtype=a.dtype ) @@ -132,22 +147,41 @@ def _interpolate(a): ) -def as_slice(bounds, pad=True): +def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: """ - Convert coordinate bounds to slice and pad by 0.01. + Convert coordinate bounds to a slice, optionally with small padding. + + Parameters + ---------- + bounds : slice or tuple of float + Existing slice or ``(start, stop)`` tuple. + pad : bool, default True + If *bounds* is a tuple, add ±0.01 padding. + + Returns + ------- + slice + Slice suitable for ``.sel()`` indexing. """ if not isinstance(bounds, slice): - bounds = bounds + (-0.01, 0.01) + bounds = bounds + (-0.01, 0.01) # type: ignore[assignment] bounds = slice(*bounds) return bounds -def hourly_mean(ds): +def hourly_mean(ds: xr.Dataset) -> xr.Dataset: """ - Resample time data to one hour frequency. + Compute hourly means from 30-minute data by averaging consecutive pairs. + + Parameters + ---------- + ds : xr.Dataset + Dataset with 30-minute temporal resolution. - In contrast to the standard xarray resample function this preserves - chunks sizes along the time dimension. + Returns + ------- + xr.Dataset + Dataset resampled to hourly resolution. """ ds1 = ds.isel(time=slice(None, None, 2)) ds2 = ds.isel(time=slice(1, None, 2)) @@ -160,39 +194,37 @@ def hourly_mean(ds): def get_data( - cutout, feature, tmpdir, lock=None, monthly_requests=False, **creation_parameters -): + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: Any = None, + monthly_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: """ - Load stored SARAH data and reformat to matching the given cutout. - - This function loads and resamples the stored SARAH data for a given - `atlite.Cutout`. + Retrieve and process SARAH solar irradiance data for a cutout. Parameters ---------- - cutout : atlite.Cutout + cutout : Cutout + Target cutout defining the spatial and temporal extent. feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.sarah.features` - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Mandatory arguments are: - * 'sarah_dir', str. Directory of the stored SARAH data. - Possible arguments are: - * 'parallel', bool. Whether to load stored files in parallel - mode. Default is False. - * 'sarah_interpolate', bool. Whether to interpolate areas of dawn - and nightfall. This might slow down the loading process if only - a few cores are available. Default is True. + Feature name (e.g. ``"influx"``). + tmpdir : PathLike + Temporary directory for intermediate files. + lock : lock-like, optional + Lock for thread-safe file access. + monthly_requests : bool, default False + Whether to split requests by month. + **creation_parameters + Must include ``sarah_dir``. Optional keys: ``parallel`` (bool), + ``sarah_interpolate`` (bool). Returns ------- - xarray.Dataset - Dataset of dask arrays of the retrieved variables. - + xr.Dataset + Dataset with ``influx_direct``, ``influx_diffuse``, + ``solar_altitude``, and ``solar_azimuth`` variables. """ assert cutout.dt in ("30min", "30T", "h", "1h") @@ -204,21 +236,16 @@ def get_data( creation_parameters.setdefault("sarah_interpolate", True) files = get_filenames(sarah_dir, coords) - open_kwargs = dict(chunks=chunks, parallel=creation_parameters["parallel"]) + open_kwargs = {"chunks": chunks, "parallel": creation_parameters["parallel"]} ds_sis = xr.open_mfdataset(files.sis, combine="by_coords", **open_kwargs)[["SIS"]] ds_sid = xr.open_mfdataset(files.sid, combine="by_coords", **open_kwargs)[["SID"]] ds = xr.merge([ds_sis, ds_sid]) ds = ds.sel(lon=as_slice(cutout.extent[:2]), lat=as_slice(cutout.extent[2:])) - # fix float (im)precission ds = ds.assign_coords( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - # Interpolate, resample and possible regrid - if creation_parameters["sarah_interpolate"]: - ds = interpolate(ds) - else: - ds = ds.fillna(0) + ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) if cutout.dt not in ["30min", "30T"]: ds = hourly_mean(ds) @@ -226,19 +253,16 @@ def get_data( if (cutout.dx != dx) or (cutout.dy != dy): ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) - dif_attrs = dict(long_name="Surface Diffuse Shortwave Flux", units="W m-2") + dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) ds = ds.rename({"SID": "influx_direct"}).drop_vars("SIS") ds = ds.assign_coords(x=ds.coords["lon"], y=ds.coords["lat"]) ds = ds.swap_dims({"lon": "x", "lat": "y"}) - # Do not show DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sp = SolarPosition(ds, time_shift="0H") sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds + return xr.merge([ds, sp]) diff --git a/atlite/gis.py b/atlite/gis.py index 9cf9aa49..b22b7c36 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -1,14 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for Geographic Information System. -""" +"""Functions for Geographic Information System.""" + +from __future__ import annotations import logging import multiprocessing as mp from collections import OrderedDict from pathlib import Path +from typing import TYPE_CHECKING, Any, cast from warnings import catch_warnings, simplefilter import geopandas as gpd @@ -30,78 +31,131 @@ from shapely.strtree import STRtree from tqdm import tqdm +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + + from matplotlib.axes import Axes + + from atlite._types import ( + CrsLike, + DataArray, + Dataset, + GeoDataFrame, + Geometry, + GeoSeries, + NDArray, + PathLike, + ) + logger = logging.getLogger(__name__) -def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): +def get_coords( + x: slice, + y: slice, + time: slice, + dx: float = 0.25, + dy: float = 0.25, + dt: str = "h", + **kwargs: Any, +) -> Dataset: """ - Create an cutout coordinate system on the basis of slices and step sizes. + Create cutout coordinates from slices and resolutions. Parameters ---------- x : slice - Numerical slices with lower and upper bound of the x dimension. + Bounds of the x dimension. y : slice - Numerical slices with lower and upper bound of the y dimension. + Bounds of the y dimension. time : slice - Slice with strings with lower and upper bound of the time dimension. + Bounds of the time dimension. dx : float, optional - Step size of the x coordinate. The default is 0.25. + Step size of the x coordinate. dy : float, optional - Step size of the y coordinate. The default is 0.25. + Step size of the y coordinate. dt : str, optional - Frequency of the time coordinate. The default is 'h'. Valid are all - pandas offset aliases. + Frequency of the time coordinate. + **kwargs + Unused keyword arguments. Returns ------- - ds : xarray.Dataset - Dataset with x, y and time variables, representing the whole coordinate - system. - + xarray.Dataset + Dataset containing ``x``, ``y``, and ``time`` coordinates. """ x = slice(*sorted([x.start, x.stop])) y = slice(*sorted([y.start, y.stop])) - ds = xr.Dataset( - { - "x": np.round(np.arange(-180, 180, dx), 9), - "y": np.round(np.arange(-90, 90, dy), 9), - "time": pd.date_range(start="1940", end="now", freq=dt), - } - ) + ds = xr.Dataset({ + "x": np.round(np.arange(-180, 180, dx), 9), + "y": np.round(np.arange(-90, 90, dy), 9), + "time": pd.date_range(start="1940", end="now", freq=dt), + }) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) - return ds + return cast("Dataset", ds) -def spdiag(v): +def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: """ - Create a sparse diagonal matrix from a 1-dimensional array. + Create a sparse diagonal matrix. + + Parameters + ---------- + v : array-like + Values placed on the diagonal. + + Returns + ------- + scipy.sparse.csr_matrix + Sparse diagonal matrix with ``v`` on the diagonal. """ N = len(v) inds = np.arange(N + 1, dtype=np.int32) return sp.sparse.csr_matrix((v, inds[:-1], inds), (N, N)) -def reproject_shapes(shapes, crs1, crs2): +def reproject_shapes( + shapes: Iterable[Geometry] | pd.Series | dict[Any, Geometry], + crs1: CrsLike, + crs2: CrsLike, +) -> Iterable[Geometry] | pd.Series | OrderedDict[Any, Geometry]: """ - Project a collection of shapes from one crs to another. + Reproject a collection of geometries. + + Parameters + ---------- + shapes : iterable, pandas.Series, or dict + Shapes to reproject. + crs1 : any + Source coordinate reference system. + crs2 : any + Target coordinate reference system. + + Returns + ------- + iterable, pandas.Series, or collections.OrderedDict + Reprojected shapes with the same container type where applicable. """ transformer = Transformer.from_crs(crs1, crs2, always_xy=True) - def _reproject_shape(shape): + def _reproject_shape(shape: Geometry) -> Geometry: return transform(transformer.transform, shape) if isinstance(shapes, pd.Series): return shapes.map(_reproject_shape) - elif isinstance(shapes, dict): + if isinstance(shapes, dict): return OrderedDict((k, _reproject_shape(v)) for k, v in shapes.items()) - else: - return list(map(_reproject_shape, shapes)) + return list(map(_reproject_shape, shapes)) -def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_indicatormatrix( + orig: GeoDataFrame | GeoSeries | Iterable[Geometry], + dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the indicatormatrix. @@ -117,7 +171,13 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -128,15 +188,21 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + indicator = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] if o.intersects(d): j = idx[hash(o.wkt)] area = d.intersection(o).area @@ -145,7 +211,12 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): return indicator -def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_intersectionmatrix( + orig: GeoDataFrame | GeoSeries | Iterable[Geometry], + dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the intersectionmatrix. @@ -157,7 +228,13 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -168,25 +245,44 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - intersection = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + intersection = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] j = idx[hash(o.wkt)] intersection[i, j] = o.intersects(d) return intersection -def padded_transform_and_shape(bounds, res): +def padded_transform_and_shape( + bounds: tuple[float, float, float, float], res: float +) -> tuple[rio.Affine, tuple[int, int]]: """ - Get the (transform, shape) tuple of a raster with resolution `res` and - bounds `bounds`. + Return a padded raster transform and shape. + + Parameters + ---------- + bounds : tuple + Bounding box as ``(left, bottom, right, top)``. + res : float + Raster resolution. + + Returns + ------- + tuple + Affine transform and raster shape covering the padded bounds. """ left, bottom = ((b // res) * res for b in bounds[:2]) right, top = ((b // res + 1) * res for b in bounds[2:]) @@ -195,10 +291,38 @@ def padded_transform_and_shape(bounds, res): def projected_mask( - raster, geom, transform=None, shape=None, crs=None, allow_no_overlap=False, **kwargs -): + raster: rio.DatasetReader, + geom: GeoSeries, + transform: rio.Affine | None = None, + shape: tuple[int, int] | None = None, + crs: CrsLike = None, + allow_no_overlap: bool = False, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ - Load a mask and optionally project it to target resolution and shape. + Load a raster mask and optionally reproject it. + + Parameters + ---------- + raster : rasterio.DatasetReader + Raster source used to build the mask. + geom : geopandas.GeoSeries + Geometry used for masking. + transform : rasterio.Affine, optional + Target transform. + shape : tuple, optional + Target array shape. + crs : any, optional + Target coordinate reference system. + allow_no_overlap : bool, optional + Whether to return a nodata mask when geometry and raster do not overlap. + **kwargs + Additional keyword arguments passed to ``rasterio.mask.mask``. + + Returns + ------- + tuple + Masked array and its affine transform. """ nodata = kwargs.get("nodata", 255) kwargs.setdefault("indexes", 1) @@ -219,7 +343,7 @@ def projected_mask( return masked, transform_ assert shape is not None and crs is not None - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked, empty(shape), src_crs=raster.crs, @@ -230,19 +354,42 @@ def projected_mask( ) -def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): +def pad_extent( + src: NDArray, + src_transform: rio.Affine, + dst_transform: rio.Affine, + src_crs: CrsLike, + dst_crs: CrsLike, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ - Pad the extent of `src` by an equivalent of one cell of the target raster. + Pad an array before reprojection. - This ensures that the array is large enough to not be treated as - nodata in all cells of the destination raster. If src.ndim > 2, the - function expects the last two dimensions to be y,x. Additional - keyword arguments are used in `np.pad()`. + Parameters + ---------- + src : numpy.ndarray + Source array with spatial axes in the last two dimensions. + src_transform : rasterio.Affine + Transform of the source array. + dst_transform : rasterio.Affine + Transform of the destination raster. + src_crs : any + Source coordinate reference system. + dst_crs : any + Destination coordinate reference system. + **kwargs + Keyword arguments passed to ``numpy.pad``. + + Returns + ------- + tuple + Padded array and updated affine transform. """ if src.size == 0: return src, src_transform - left, top, right, bottom = *(src_transform * (0, 0)), *(src_transform * (1, 1)) + left, top = src_transform * (0, 0) + right, bottom = src_transform * (1, 1) covered = transform_bounds(src_crs, dst_crs, left, bottom, right, top) covered_res = min(abs(covered[2] - covered[0]), abs(covered[3] - covered[1])) pad = int(dst_transform[0] // covered_res * 1.1) @@ -250,7 +397,7 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): kwargs.setdefault("mode", "constant") if src.ndim == 2: - return rio.pad(src, src_transform, pad, **kwargs) + return rio.pad(src, src_transform, pad, **kwargs) # type: ignore[no-any-return] npad = ((0, 0),) * (src.ndim - 2) + ((pad, pad), (pad, pad)) padded = np.pad(src, npad, **kwargs) @@ -260,7 +407,9 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): return padded, rio.Affine(*transform[:6]) -def shape_availability(geometry, excluder): +def shape_availability( + geometry: GeoSeries, excluder: ExclusionContainer +) -> tuple[NDArray, rio.Affine]: """ Compute the eligible area in one or more geometries. @@ -268,7 +417,7 @@ def shape_availability(geometry, excluder): ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. excluder : atlite.gis.ExclusionContainer Container of all meta data or objects which to exclude, i.e. @@ -278,7 +427,7 @@ def shape_availability(geometry, excluder): ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. """ @@ -326,43 +475,38 @@ def shape_availability(geometry, excluder): def shape_availability_reprojected( - geometry, excluder, dst_transform, dst_crs, dst_shape -): + geometry: GeoSeries, + excluder: ExclusionContainer, + dst_transform: rio.Affine, + dst_crs: CrsLike, + dst_shape: tuple[int, int], +) -> tuple[NDArray, rio.Affine]: """ - Compute and reproject the eligible area of one or more geometries. - - The function executes `shape_availability` and reprojects the calculated - mask onto a new raster defined by (dst_transform, dst_crs, dst_shape). - Before reprojecting, the function pads the mask such all non-nodata data - points are projected in full cells of the target raster. The ensures that - all data within the mask are projected correclty (GDAL inherent 'problem'). + Compute availability and reproject it to a target raster. + Parameters ---------- - geometry : geopandas.Series - Geometry in which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is - computed. + geometry : geopandas.GeoSeries + Geometry for which availability is computed. excluder : atlite.gis.ExclusionContainer - Container of all meta data or objects which to exclude, i.e. - rasters and geometries. + Exclusion container defining masked areas. dst_transform : rasterio.Affine Transform of the target raster. - dst_crs : rasterio.CRS/proj.CRS - CRS of the target raster. + dst_crs : any + Coordinate reference system of the target raster. dst_shape : tuple Shape of the target raster. - masked : np.array - Average share of available area per grid cell. 0 indicates excluded, - 1 is fully included. - transform : rasterio.Affine - Affine transform of the mask. + Returns + ------- + tuple + Reprojected availability array and destination transform. """ masked, transform = shape_availability(geometry, excluder) masked, transform = pad_extent( masked, transform, dst_transform, excluder.crs, dst_crs ) - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked.astype(np.uint8), empty(dst_shape), resampling=rio.warp.Resampling.average, @@ -374,11 +518,9 @@ def shape_availability_reprojected( class ExclusionContainer: - """ - Container for exclusion objects and meta data. - """ + """Container for exclusion objects and meta data.""" - def __init__(self, crs=3035, res=100): + def __init__(self, crs: CrsLike = 3035, res: float = 100) -> None: """ Initialize a container for excluded areas. @@ -394,21 +536,25 @@ def __init__(self, crs=3035, res=100): The default is 100. """ - self.rasters = [] - self.geometries = [] - self.crs = crs - self.res = res + self.rasters: list[dict[str, Any]] = [] + self.geometries: list[dict[str, Any]] = [] + self.crs: CrsLike = crs + self.res: float = res def add_raster( self, - raster, - codes=None, - buffer=0, - invert=False, - nodata=255, - allow_no_overlap=False, - crs=None, - ): + raster: PathLike | rio.DatasetReader, + codes: int + | list[int] + | Sequence[int] + | Callable[[NDArray], NDArray] + | None = None, + buffer: float = 0, + invert: bool = False, + nodata: int = 255, + allow_no_overlap: bool = False, + crs: CrsLike = None, + ) -> None: """ Register a raster to the ExclusionContainer. @@ -426,6 +572,8 @@ def add_raster( Buffer around the excluded areas in units of ExclusionContainer.crs. Use this to create a buffer around the excluded/included area. The default is 0. + nodata : int, optional + Value to use for nodata pixels. The default is 255. invert : bool, optional Whether to exclude (False) or include (True) the specified areas of the raster. The default is False. @@ -437,18 +585,23 @@ def add_raster( CRS of the raster. Specify this if the raster has invalid crs. """ - d = dict( - raster=raster, - codes=codes, - buffer=buffer, - invert=invert, - nodata=nodata, - allow_no_overlap=allow_no_overlap, - crs=crs, - ) + d: dict[str, Any] = { + "raster": raster, + "codes": codes, + "buffer": buffer, + "invert": invert, + "nodata": nodata, + "allow_no_overlap": allow_no_overlap, + "crs": crs, + } self.rasters.append(d) - def add_geometry(self, geometry, buffer=0, invert=False): + def add_geometry( + self, + geometry: PathLike | GeoDataFrame | GeoSeries, + buffer: float = 0, + invert: bool = False, + ) -> None: """ Register a collection of geometries to the ExclusionContainer. @@ -464,12 +617,17 @@ def add_geometry(self, geometry, buffer=0, invert=False): of the geometries. The default is False. """ - d = dict(geometry=geometry, buffer=buffer, invert=invert) + d: dict[str, Any] = {"geometry": geometry, "buffer": buffer, "invert": invert} self.geometries.append(d) - def open_files(self): + def open_files(self) -> None: """ Open rasters and load geometries. + + Raises + ------ + ValueError + If a raster has an invalid CRS and none is provided. """ for d in self.rasters: raster = d["raster"] @@ -501,24 +659,27 @@ def open_files(self): d["geometry"] = geometry @property - def all_closed(self): - """ - Check whether all files in the raster container are closed. - """ + def all_closed(self) -> bool: + """Check whether all files in the raster container are closed.""" return all(isinstance(d["raster"], (str | Path)) for d in self.rasters) and all( isinstance(d["geometry"], (str | Path)) for d in self.geometries ) @property - def all_open(self): - """ - Check whether all files in the raster container are open. - """ + def all_open(self) -> bool: + """Check whether all files in the raster container are open.""" return all( isinstance(d["raster"], rio.DatasetReader) for d in self.rasters ) and all(isinstance(d["geometry"], gpd.GeoSeries) for d in self.geometries) - def __repr__(self): + def __repr__(self) -> str: + """Return string representation of the exclusion container. + + Returns + ------- + str + Human-readable summary of the exclusion container. + """ return ( f"Exclusion Container" f"\n registered rasters: {len(self.rasters)} " @@ -527,17 +688,20 @@ def __repr__(self): ) def compute_shape_availability( - self, geometry, dst_transform=None, dst_crs=None, dst_shape=None - ): + self, + geometry: GeoDataFrame | GeoSeries, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike = None, + dst_shape: tuple[int, int] | None = None, + ) -> tuple[NDArray, rio.Affine]: """ - Compute the eligible area in one or more geometries and optionally - reproject. + Compute the eligible area in one or more geometries. Parameters ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. dst_transform : rasterio.Affine Transform of the target raster. Define if the availability @@ -553,9 +717,14 @@ def compute_shape_availability( ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. + Raises + ------ + ValueError + If only some of ``dst_transform``, ``dst_crs``, ``dst_shape`` are given. + """ if isinstance(geometry, gpd.GeoDataFrame): geometry = geometry.geometry @@ -571,40 +740,44 @@ def compute_shape_availability( "Arguments dst_transform, dst_crs, dst_shape " "should be all None or all defined." ) + assert ( + dst_transform is not None + and dst_crs is not None + and dst_shape is not None + ) return shape_availability_reprojected( geometry, self, dst_transform, dst_crs, dst_shape ) - else: - return shape_availability(geometry, self) + return shape_availability(geometry, self) def plot_shape_availability( self, - geometry, - ax=None, - set_title=True, - dst_transform=None, - dst_crs=None, - dst_shape=None, - show_kwargs={}, - plot_kwargs={}, - ): + geometry: GeoDataFrame | GeoSeries, + ax: Axes | None = None, + set_title: bool = True, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike = None, + dst_shape: tuple[int, int] | None = None, + show_kwargs: dict[str, Any] | None = None, + plot_kwargs: dict[str, Any] | None = None, + ) -> Axes: """ - Plot the eligible area for one or more geometries and optionally - reproject. + Plot the eligible area for one or more geometries. This function uses its own default values for ``rasterio.plot.show`` and ``geopandas.GeoSeries.plot``. Therefore eligible land is drawn in green - Note that this funtion will likely fail if another CRS than the one of the + Note that this function will likely fail if another CRS than the one of the ExclusionContainer is used in the axis (e.g. cartopy projections). Parameters ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. - ax : matplotlib Axis, optional - set_title: boolean, optional + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure is created. + set_title : boolean, optional Whether to set the title with additional information on the share of eligible land. dst_transform : rasterio.Affine @@ -623,10 +796,14 @@ def plot_shape_availability( Returns ------- - _type_ - _description_ + matplotlib.axes.Axes + Axes with the plotted availability. """ + if show_kwargs is None: + show_kwargs = {} + if plot_kwargs is None: + plot_kwargs = {} import matplotlib.pyplot as plt if isinstance(geometry, gpd.GeoDataFrame): @@ -653,22 +830,43 @@ def plot_shape_availability( return ax -def _init_process(shapes_, excluder_, dst_transform_, dst_crs_, dst_shapes_): - global shapes, excluder, dst_transform, dst_crs, dst_shapes - shapes, excluder = shapes_, excluder_ - dst_transform, dst_crs, dst_shapes = dst_transform_, dst_crs_, dst_shapes_ +_mp_shapes: GeoSeries +_mp_excluder: ExclusionContainer +_mp_dst_transform: rio.Affine +_mp_dst_crs: CrsLike +_mp_dst_shapes: tuple[int, int] + + +def _init_process( + shapes_: GeoSeries, + excluder_: ExclusionContainer, + dst_transform_: rio.Affine, + dst_crs_: CrsLike, + dst_shapes_: tuple[int, int], +) -> None: + global _mp_shapes, _mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes + _mp_shapes, _mp_excluder = shapes_, excluder_ + _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes = ( + dst_transform_, + dst_crs_, + dst_shapes_, + ) -def _process_func(i): - args = (excluder, dst_transform, dst_crs, dst_shapes) +def _process_func(i: Any) -> NDArray: + args = (_mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes) with catch_warnings(): simplefilter("ignore") - return shape_availability_reprojected(shapes.loc[[i]], *args)[0] + return shape_availability_reprojected(_mp_shapes.loc[[i]], *args)[0] def compute_availabilitymatrix( - cutout, shapes, excluder, nprocesses=None, disable_progressbar=True -): + cutout: Any, + shapes: GeoDataFrame | GeoSeries, + excluder: ExclusionContainer, + nprocesses: int | None = None, + disable_progressbar: bool = True, +) -> DataArray: """ Compute the eligible share within cutout cells in the overlap with shapes. @@ -714,12 +912,12 @@ def compute_availabilitymatrix( shapes = shapes.to_crs(excluder.crs) args = (excluder, cutout.transform_r, cutout.crs, cutout.shape) - tqdm_kwargs = dict( - ascii=False, - unit=" gridcells", - total=len(shapes), - desc="Compute availability matrix", - ) + tqdm_kwargs = { + "ascii": False, + "unit": " gridcells", + "total": len(shapes), + "desc": "Compute availability matrix", + } if nprocesses is None: if not disable_progressbar: @@ -736,13 +934,12 @@ def compute_availabilitymatrix( assert excluder.all_closed, ( "For parallelization all raster files in excluder must be closed" ) - kwargs = { - "initializer": _init_process, - "initargs": (shapes, *args), - "maxtasksperchild": 20, - "processes": nprocesses, - } - with mp.get_context("spawn").Pool(**kwargs) as pool: + with mp.get_context("spawn").Pool( + processes=nprocesses, + initializer=_init_process, + initargs=(shapes, *args), + maxtasksperchild=20, + ) as pool: if disable_progressbar: availability = list(pool.map(_process_func, shapes.index)) else: @@ -750,16 +947,32 @@ def compute_availabilitymatrix( tqdm(pool.imap(_process_func, shapes.index), **tqdm_kwargs) ) - availability = np.stack(availability)[:, ::-1] # flip axis, see Notes - if availability.ndim == 4: - availability = availability.squeeze(axis=1) + availability_arr = np.stack(availability)[:, ::-1] # flip axis, see Notes + if availability_arr.ndim == 4: + availability_arr = availability_arr.squeeze(axis=1) coords = [(shapes.index), ("y", cutout.data.y.data), ("x", cutout.data.x.data)] - return xr.DataArray(availability, coords=coords) + return xr.DataArray(availability_arr, coords=coords) -def maybe_swap_spatial_dims(ds, namex="x", namey="y"): +def maybe_swap_spatial_dims( + ds: Dataset | DataArray, namex: str = "x", namey: str = "y" +) -> Dataset | DataArray: """ - Swap order of spatial dimensions according to atlite concention. + Ensure spatial coordinates follow atlite's axis ordering. + + Parameters + ---------- + ds : xarray.Dataset or xarray.DataArray + Object with spatial coordinates. + namex : str, optional + Name of the x dimension. + namey : str, optional + Name of the y dimension. + + Returns + ------- + xarray.Dataset or xarray.DataArray + Input object with spatial dimensions reversed if needed. """ swaps = {} lx, rx = ds.indexes[namex][[0, -1]] @@ -770,10 +983,10 @@ def maybe_swap_spatial_dims(ds, namex="x", namey="y"): if uy < ly: swaps[namey] = slice(None, None, -1) - return ds.isel(**swaps) if swaps else ds + return ds.isel(swaps) if swaps else ds -def _as_transform(x, y): +def _as_transform(x: pd.Index, y: pd.Index) -> rio.Affine: lx, rx = x[[0, -1]] ly, uy = y[[0, -1]] @@ -783,28 +996,30 @@ def _as_transform(x, y): return rio.Affine(dx, 0, lx - dx / 2, 0, dy, ly - dy / 2) -def regrid(ds, dimx, dimy, **kwargs): +def regrid( + ds: Dataset | DataArray, + dimx: pd.Index, + dimy: pd.Index, + **kwargs: Any, +) -> Dataset | DataArray: """ - Interpolate Dataset or DataArray `ds` to a new grid, using rasterio's - reproject facility. - - See also: https://mapbox.github.io/rasterio/topics/resampling.html + Reproject data to a new spatial grid. Parameters ---------- - ds : xr.Dataset|xr.DataArray - N-dim data on a spatial grid - dimx : pd.Index - New x-coordinates in destination crs. - dimx.name MUST refer to x-coord of ds. - dimy : pd.Index - New y-coordinates in destination crs. - dimy.name MUST refer to y-coord of ds. - **kwargs : - Arguments passed to rio.wrap.reproject; of note: - - resampling is one of gis.Resampling.{average,cubic,bilinear,nearest} - - src_crs, dst_crs define the different crs (default: EPSG 4326, ie latlong) + ds : xarray.Dataset or xarray.DataArray + Data on a spatial grid. + dimx : pandas.Index + Target x coordinates. ``dimx.name`` must match the source x dimension. + dimy : pandas.Index + Target y coordinates. ``dimy.name`` must match the source y dimension. + **kwargs + Keyword arguments passed to ``rasterio.warp.reproject``. + Returns + ------- + xarray.Dataset or xarray.DataArray + Regridded object on the target coordinates. """ namex = dimx.name namey = dimy.name @@ -819,7 +1034,7 @@ def regrid(ds, dimx, dimy, **kwargs): kwargs.setdefault("src_crs", CRS.from_epsg(4326)) kwargs.setdefault("dst_crs", CRS.from_epsg(4326)) - def _reproject(src, **kwargs): + def _reproject(src: NDArray, **kwargs: Any) -> NDArray: shape = src.shape[:-2] + dst_shape src, trans = pad_extent( src, @@ -836,31 +1051,33 @@ def _reproject(src, **kwargs): if reprojected.ndim != src.ndim: reprojected = reprojected.squeeze(axis=0) - return reprojected + return cast("NDArray", reprojected) data_vars = ds.data_vars.values() if isinstance(ds, xr.Dataset) else (ds,) dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" - return ( - xr.apply_ufunc( - _reproject, - ds, - input_core_dims=[[namey, namex]], - output_core_dims=[["yout", "xout"]], - output_dtypes=[dtypes.pop()], - dask_gufunc_kwargs=dict( - output_sizes={"yout": dst_shape[0], "xout": dst_shape[1]} - ), - dask="parallelized", - kwargs=kwargs, - ) - .rename({"yout": namey, "xout": namex}) - .assign_coords( - **{ + return cast( + "Dataset | DataArray", + ( + xr + .apply_ufunc( + _reproject, + ds, + input_core_dims=[[namey, namex]], + output_core_dims=[["yout", "xout"]], + output_dtypes=[dtypes.pop()], + dask_gufunc_kwargs={ + "output_sizes": {"yout": dst_shape[0], "xout": dst_shape[1]} + }, + dask="parallelized", + kwargs=kwargs, + ) + .rename({"yout": namey, "xout": namex}) + .assign_coords(**{ namey: (namey, dimy.data, ds.coords[namey].attrs), namex: (namex, dimx.data, ds.coords[namex].attrs), - } - ) - .assign_attrs(**ds.attrs) + }) + .assign_attrs(**ds.attrs) + ), ) diff --git a/atlite/hydro.py b/atlite/hydro.py index df08aacc..e449d1cf 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module involving hydro operations in atlite. -""" +"""Module involving hydro operations in atlite.""" + +from __future__ import annotations import logging from collections import namedtuple @@ -20,17 +20,58 @@ Basins = namedtuple("Basins", ["plants", "meta", "shapes"]) -def find_basin(shapes, lon, lat): +def find_basin( + shapes: gpd.GeoSeries, + lon: float, + lat: float, +) -> int: + """ + Find the basin containing a point. + + Parameters + ---------- + shapes : geopandas.GeoSeries + Basin geometries indexed by basin id. + lon : float + Longitude of the point. + lat : float + Latitude of the point. + + Returns + ------- + int + Basin id containing the point. + """ hids = shapes.index[shapes.intersects(Point(lon, lat))] if len(hids) > 1: logger.warning( - f"The point ({lon}, {lat}) is in several basins: {hids}. " - "Assuming the first one." + "The point (%s, %s) is in several basins: %s. Assuming the first one.", + lon, + lat, + hids, ) - return hids[0] - - -def find_upstream_basins(meta, hid): + return int(hids[0]) + + +def find_upstream_basins( + meta: pd.DataFrame, + hid: int, +) -> list[int]: + """ + Collect all upstream basins of a basin. + + Parameters + ---------- + meta : pandas.DataFrame + Basin metadata with a ``NEXT_DOWN`` column. + hid : int + Basin id from which to start. + + Returns + ------- + list[int] + Basin ids including the selected basin and all upstream basins. + """ hids = [hid] i = 0 while i < len(hids): @@ -39,7 +80,28 @@ def find_upstream_basins(meta, hid): return hids -def determine_basins(plants, hydrobasins, show_progress=False): +def determine_basins( + plants: pd.DataFrame, + hydrobasins: str | gpd.GeoDataFrame, + show_progress: bool = False, +) -> Basins: + """ + Determine local and upstream basins for hydro plants. + + Parameters + ---------- + plants : pandas.DataFrame + Plant table with ``lon`` and ``lat`` columns. + hydrobasins : str or geopandas.GeoDataFrame + HydroBASINS data source or loaded basin geometries. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + Basins + Basin assignments, metadata, and geometries for the plants. + """ if isinstance(hydrobasins, str): hydrobasins = gpd.read_file(hydrobasins) @@ -48,9 +110,12 @@ def determine_basins(plants, hydrobasins, show_progress=False): f"but received `type(hydrobasins) = {type(hydrobasins)}`" ) - missing_columns = pd.Index( - ["HYBAS_ID", "DIST_MAIN", "NEXT_DOWN", "geometry"] - ).difference(hydrobasins.columns) + missing_columns = pd.Index([ + "HYBAS_ID", + "DIST_MAIN", + "NEXT_DOWN", + "geometry", + ]).difference(hydrobasins.columns) assert missing_columns.empty, ( "Couldn't find the column(s) {} in the hydrobasins dataset.".format( ", ".join(missing_columns) @@ -62,7 +127,7 @@ def determine_basins(plants, hydrobasins, show_progress=False): meta = hydrobasins[hydrobasins.columns.difference(("geometry",))] shapes = hydrobasins["geometry"] - plant_basins = [] + plant_basins: list[tuple[int, list[int]]] = [] for p in tqdm( plants.itertuples(), disable=not show_progress, @@ -70,18 +135,40 @@ def determine_basins(plants, hydrobasins, show_progress=False): ): hid = find_basin(shapes, p.lon, p.lat) plant_basins.append((hid, find_upstream_basins(meta, hid))) - plant_basins = pd.DataFrame( + plant_basins_df = pd.DataFrame( plant_basins, columns=["hid", "upstream"], index=plants.index ) - unique_basins = pd.Index(plant_basins["upstream"].sum()).unique().rename("hid") - return Basins(plant_basins, meta.loc[unique_basins], shapes.loc[unique_basins]) + unique_basins = pd.Index(plant_basins_df["upstream"].sum()).unique().rename("hid") + return Basins(plant_basins_df, meta.loc[unique_basins], shapes.loc[unique_basins]) def shift_and_aggregate_runoff_for_plants( - basins, runoff, flowspeed=1, show_progress=False -): - inflow = xr.DataArray( + basins: Basins, + runoff: xr.DataArray, + flowspeed: float = 1, + show_progress: bool = False, +) -> xr.DataArray: + """ + Shift basin runoff in time and aggregate it per plant. + + Parameters + ---------- + basins : Basins + Basin mappings and metadata for the plants. + runoff : xarray.DataArray + Runoff time series indexed by ``hid`` and ``time``. + flowspeed : float, default 1 + Flow speed in m/s used to convert distance to travel time. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + xarray.DataArray + Plant inflow time series indexed by ``plant`` and ``time``. + """ + inflow: xr.DataArray = xr.DataArray( np.zeros((len(basins.plants), runoff.indexes["time"].size)), [("plant", basins.plants.index), runoff.coords["time"]], ) @@ -91,12 +178,12 @@ def shift_and_aggregate_runoff_for_plants( disable=not show_progress, desc="Shift and aggregate runoff by plant", ): - inflow_plant = inflow.loc[dict(plant=ppl.Index)] - distances = ( + inflow_plant: xr.DataArray = inflow.loc[{"plant": ppl.Index}] + distances: pd.Series = ( basins.meta.loc[ppl.upstream, "DIST_MAIN"] - basins.meta.at[ppl.hid, "DIST_MAIN"] ) - nhours = (distances / (flowspeed * 3.6) + 0.5).astype(int) + nhours: pd.Series = (distances / (flowspeed * 3.6) + 0.5).astype(int) for b in ppl.upstream: inflow_plant += runoff.sel(hid=b).roll(time=nhours.at[b]) diff --git a/atlite/pv/__init__.py b/atlite/pv/__init__.py index c5528ebf..e05416cb 100644 --- a/atlite/pv/__init__.py +++ b/atlite/pv/__init__.py @@ -1,6 +1,38 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -atlite pv module. -""" +"""Photovoltaic modeling functions.""" + +from __future__ import annotations + +from atlite.pv.irradiation import ( + DiffuseHorizontalIrrad, + TiltedDiffuseIrrad, + TiltedDirectIrrad, + TiltedGroundIrrad, + TiltedIrradiation, +) +from atlite.pv.orientation import ( + SurfaceOrientation, + get_orientation, + make_constant, + make_latitude, + make_latitude_optimal, +) +from atlite.pv.solar_panel_model import SolarPanelModel +from atlite.pv.solar_position import SolarPosition + +__all__: list[str] = [ + "DiffuseHorizontalIrrad", + "TiltedDirectIrrad", + "TiltedDiffuseIrrad", + "TiltedGroundIrrad", + "TiltedIrradiation", + "SurfaceOrientation", + "get_orientation", + "make_constant", + "make_latitude", + "make_latitude_optimal", + "SolarPanelModel", + "SolarPosition", +] diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 8275d58f..52941d39 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -2,20 +2,61 @@ # # SPDX-License-Identifier: MIT +"""Solar irradiation decomposition and transposition models.""" + +from __future__ import annotations + import logging +from typing import TYPE_CHECKING import numpy as np from dask.array import cos, fmax, fmin, radians, sin, sqrt +if TYPE_CHECKING: + from atlite._types import ( + ClearskyModel, + DataArray, + Dataset, + IrradiationType, + TrackingType, + TrigonModel, + ) + logger = logging.getLogger(__name__) -def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): - # Clearsky model from Reindl 1990 to split downward radiation into direct - # and diffuse contributions. Should switch to more up-to-date model, f.ex. - # Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , - # Lauret et al. (2013):http://dx.doi.org/10.1016/j.renene.2012.01.049 +def DiffuseHorizontalIrrad( + ds: Dataset, + solar_position: Dataset, + clearsky_model: ClearskyModel | None, + influx: DataArray, +) -> DataArray: + """ + Estimate diffuse horizontal irradiation from total horizontal irradiation. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation and, for the enhanced + model, temperature and humidity. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + clearsky_model : str or None + Reindl clearsky model to use, either ``"simple"`` or ``"enhanced"``. + If None, the model is chosen from the available data. + influx : xarray.DataArray + Total horizontal irradiation. + + Returns + ------- + xarray.DataArray + Diffuse horizontal irradiation. + Raises + ------ + KeyError + If ``clearsky_model`` is not ``'simple'`` or ``'enhanced'``. + """ sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -24,15 +65,9 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): "enhanced" if "temperature" in ds and "humidity" in ds else "simple" ) - # Reindl 1990 clearsky model - - k = influx / influx_toa # clearsky index - # k.values[k.values > 1.0] = 1.0 - # k = k.rename('clearsky index') + k = influx / influx_toa if clearsky_model == "simple": - # Simple Reindl model without ambient air temperature and - # relative humidity fraction = ( ((k > 0.0) & (k <= 0.3)) * fmin(1.0, 1.020 - 0.254 * k + 0.0123 * sinaltitude) @@ -41,8 +76,6 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): + (k >= 0.78) * fmax(0.1, 0.486 * k - 0.182 * sinaltitude) ) elif clearsky_model == "enhanced": - # Enhanced Reindl model with ambient air temperature and relative - # humidity T = ds["temperature"] rh = ds["humidity"] @@ -66,16 +99,37 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): else: raise KeyError("`clearsky model` must be chosen from 'simple' and 'enhanced'") - # Set diffuse fraction to one when the sun isn't up - # fraction = fraction.where(sinaltitude >= sin(radians(threshold))).fillna(1.0) - # fraction = fraction.rename('fraction index') - return (influx * fraction).rename("diffuse horizontal") -def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse): - # Hay-Davies Model +def TiltedDiffuseIrrad( + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + direct: DataArray, + diffuse: DataArray, +) -> DataArray: + """ + Calculate diffuse irradiation on a tilted surface. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence`` and ``slope``. + direct : xarray.DataArray + Direct horizontal irradiation. + diffuse : xarray.DataArray + Diffuse horizontal irradiation. + Returns + ------- + xarray.DataArray + Diffuse tilted irradiation. + """ sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -85,13 +139,9 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) influx = direct + diffuse with np.errstate(divide="ignore", invalid="ignore"): - # brightening factor f = sqrt(direct / influx).fillna(0.0) - - # anisotropy factor A = direct / influx_toa - # geometric factor R_b = cosincidence / sinaltitude diffuse_t = ( @@ -101,13 +151,11 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) + A * R_b ) * diffuse - # fixup: clip all negative values (unclear why it gets negative) - # note: REatlas does not do the fixup - if logger.isEnabledFor(logging.WARNING): - if ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any(): - logger.warning( - "diffuse_t exhibits negative values above altitude threshold." - ) + if ( + logger.isEnabledFor(logging.WARNING) + and ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any() + ): + logger.warning("diffuse_t exhibits negative values above altitude threshold.") with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) @@ -115,46 +163,105 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) return diffuse_t.rename("diffuse tilted") -def TiltedDirectIrrad(solar_position, surface_orientation, direct): +def TiltedDirectIrrad( + solar_position: Dataset, surface_orientation: Dataset, direct: DataArray +) -> DataArray: + """ + Calculate direct irradiation on a tilted surface. + + Parameters + ---------- + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence``. + direct : xarray.DataArray + Direct horizontal irradiation. + + Returns + ------- + xarray.DataArray + Direct tilted irradiation. + """ sinaltitude = sin(solar_position["altitude"]) cosincidence = surface_orientation["cosincidence"] - # geometric factor R_b = cosincidence / sinaltitude return (R_b * direct).rename("direct tilted") -def _albedo(ds, influx): - if "albedo" in ds: - albedo = ds["albedo"] - elif "outflux" in ds: - albedo = (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) - else: - raise AssertionError( - "Need either albedo or outflux as a variable in the dataset. " - "Check your cutout and dataset module." - ) +def _albedo(ds: Dataset, influx: DataArray) -> DataArray: + """ + Retrieve or derive surface albedo from the dataset. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing either ``albedo`` or ``outflux``. + influx : xarray.DataArray + Downward surface irradiation used when deriving albedo from outflux. + + Returns + ------- + xarray.DataArray + Surface albedo. - return albedo + Raises + ------ + AssertionError + If the dataset lacks both ``albedo`` and ``outflux`` variables. + """ + if "albedo" in ds: + return ds["albedo"] + if "outflux" in ds: + return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) + raise AssertionError( + "Need either albedo or outflux as a variable in the dataset. " + "Check your cutout and dataset module." + ) + + +def TiltedGroundIrrad( + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + influx: DataArray, +) -> DataArray: + """ + Calculate ground-reflected irradiation on a tilted surface. + Parameters + ---------- + ds : xarray.Dataset + Dataset containing albedo information or reflected outflux. + solar_position : xarray.Dataset + Solar position dataset. + surface_orientation : xarray.Dataset + Surface orientation including ``slope``. + influx : xarray.DataArray + Total horizontal irradiation. -def TiltedGroundIrrad(ds, solar_position, surface_orientation, influx): + Returns + ------- + xarray.DataArray + Ground-reflected tilted irradiation. + """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 return ground_t.rename("ground tilted") def TiltedIrradiation( - ds, - solar_position, - surface_orientation, - trigon_model, - clearsky_model, - tracking=0, - altitude_threshold=1.0, - irradiation="total", -): + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, + tracking: TrackingType | int = 0, + altitude_threshold: float = 1.0, + irradiation: IrradiationType = "total", +) -> DataArray: """ Calculate the irradiation on a tilted surface. @@ -168,7 +275,8 @@ def TiltedIrradiation( surface_orientation : xarray.Dataset Surface orientation calculated using atlite.orientation.SurfaceOrientation. trigon_model : str - Type of trigonometry model. Defaults to 'simple'if used via `convert_irradiation`. + Type of trigonometry model. Defaults to 'simple' if used via + `convert_irradiation`. clearsky_model : str or None Either the 'simple' or the 'enhanced' Reindl clearsky model. The default choice of None will choose dependending on @@ -176,6 +284,8 @@ def TiltedIrradiation( incorporates ambient air temperature and relative humidity. NOTE: this option is only used if the used climate dataset doesn't provide direct and diffuse irradiation separately! + tracking : int or str, default 0 + Type of solar tracking. 0 for fixed, other values for tracking modes. altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] will be set to zero. Default value equals 1.0 degrees. @@ -192,11 +302,16 @@ def TiltedIrradiation( result : xarray.DataArray The desired irradiation quantity on the tilted surface. + Raises + ------ + AssertionError + If the dataset lacks required irradiation variables. + ValueError + If ``irradiation`` is not a recognized type. """ influx_toa = ds["influx_toa"] - def clip(influx, influx_max): - # use .data in clip due to dask-xarray incompatibilities + def clip(influx: DataArray, influx_max: DataArray) -> DataArray: return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) if "influx" in ds: @@ -211,6 +326,7 @@ def clip(influx, influx_max): "Need either influx or influx_direct and influx_diffuse in the " "dataset. Check your cutout and dataset module." ) + if trigon_model == "simple": k = surface_orientation["cosincidence"] / sin(solar_position["altitude"]) if tracking != "dual": @@ -243,10 +359,9 @@ def clip(influx, influx_max): result = diffuse_t.rename("diffuse tilted") elif irradiation == "ground": result = ground_t.rename("ground tilted") - - # The solar_position algorithms have a high error for small solar altitude - # values, leading to big overall errors from the 1/sinaltitude factor. - # => Suppress irradiation below solar altitudes of 1 deg. + else: + msg = f"Unknown irradiation type: {irradiation}" + raise ValueError(msg) cap_alt = solar_position["altitude"] < radians(altitude_threshold) result = result.where(~(cap_alt | (direct + diffuse <= 0.01)), 0) diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index d5240546..27e645d9 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -2,31 +2,56 @@ # # SPDX-License-Identifier: MIT +"""Panel orientation and tilt angle utilities.""" + +from __future__ import annotations + import sys +from typing import TYPE_CHECKING, Any import numpy as np import xarray as xr from dask.array import arccos, arcsin, arctan, cos, logical_and, radians, sin from numpy import pi +if TYPE_CHECKING: + from collections.abc import Callable + + from atlite._types import Dataset, NumericArray, OrientationName, TrackingType + -def get_orientation(name, **params): +def get_orientation( + name: OrientationName | dict[str, Any], **params: Any +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: """ - Definitions: - -`slope` is the angle between ground and panel. - -`azimuth` is the clockwise angle from North. - i.e. azimuth = 180 faces exactly South + Return an orientation factory by name. + + Parameters + ---------- + name : str or dict + Orientation name or parameter dictionary containing ``name``. + **params + Parameters passed to the orientation factory. + + Returns + ------- + callable + Orientation function returning ``slope`` and ``azimuth``. """ if isinstance(name, dict): params = name name = params.pop("name", "constant") - return getattr(sys.modules[__name__], f"make_{name}")(**params) + result: Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]] = ( + getattr(sys.modules[__name__], f"make_{name}")(**params) + ) + return result -def make_latitude_optimal(): +def make_latitude_optimal() -> Callable[ + [NumericArray, NumericArray, Dataset], dict[str, xr.DataArray] +]: """ - Returns an optimal tilt angle for the given ``lat``, assuming that the - panel is facing towards the equator, using a simple method from [1]. + Return an optimal tilt angle assuming the panel faces the equator. This method only works for latitudes between 0 and 50. For higher latitudes, a static 40 degree angle is returned. @@ -40,14 +65,32 @@ def make_latitude_optimal(): [2] http://dx.doi.org/10.1016/j.solener.2010.12.014 [3] https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py - Parameters - ---------- - lat : float - Latitude in degrees. - + Returns + ------- + callable + Orientation function returning latitude-optimal ``slope`` and ``azimuth``. """ - def latitude_optimal(lon, lat, solar_position): + def latitude_optimal( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, xr.DataArray]: + """ + Build an orientation with latitude-dependent optimal tilt. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with ``slope`` and ``azimuth``. + """ slope = np.empty_like(lat.values) below_25 = np.abs(lat.values) <= np.radians(25) @@ -59,39 +102,142 @@ def latitude_optimal(lon, lat, solar_position): ) + np.radians(0.31) slope[~below_50] = np.radians(40.0) - # South orientation for panels on northern hemisphere and vice versa azimuth = np.where(lat.values < 0, 0, pi) - return dict( - slope=xr.DataArray(slope, coords=lat.coords), - azimuth=xr.DataArray(azimuth, coords=lat.coords), - ) + return { + "slope": xr.DataArray(slope, coords=lat.coords), + "azimuth": xr.DataArray(azimuth, coords=lat.coords), + } return latitude_optimal -def make_constant(slope, azimuth): - slope = radians(slope) - azimuth = radians(azimuth) +def make_constant( + slope: float, azimuth: float +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: + """ + Create an orientation function with constant slope and azimuth. - def constant(lon, lat, solar_position): - return dict(slope=slope, azimuth=azimuth) + Parameters + ---------- + slope : float + Surface slope in degrees. + azimuth : float + Surface azimuth in degrees clockwise from north. + + Returns + ------- + callable + Orientation function returning constant ``slope`` and ``azimuth``. + """ + slope_rad = radians(slope) + azimuth_rad = radians(azimuth) + + def constant( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, NumericArray]: + """ + Return the configured constant panel orientation. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with constant ``slope`` and ``azimuth``. + """ + return {"slope": slope_rad, "azimuth": azimuth_rad} return constant -def make_latitude(azimuth=180): - azimuth = radians(azimuth) +def make_latitude( + azimuth: float = 180, +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: + """ + Create an orientation function with slope equal to latitude. - def latitude(lon, lat, solar_position): - return dict(slope=lat, azimuth=azimuth) + Parameters + ---------- + azimuth : float, default 180 + Surface azimuth in degrees clockwise from north. + + Returns + ------- + callable + Orientation function using latitude as slope. + """ + azimuth_rad = radians(azimuth) + + def latitude( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, NumericArray]: + """ + Return an orientation with slope equal to latitude. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with latitude-based ``slope`` and constant ``azimuth``. + """ + return {"slope": lat, "azimuth": azimuth_rad} return latitude -def SurfaceOrientation(ds, solar_position, orientation, tracking=None): +def SurfaceOrientation( + ds: Dataset, + solar_position: Dataset, + orientation: Callable[ + [NumericArray, NumericArray, Dataset], dict[str, NumericArray] + ], + tracking: TrackingType = None, +) -> Dataset: """ Compute cos(incidence) for slope and panel azimuth. + Parameters + ---------- + ds : xarray.Dataset + Weather dataset containing ``lon`` and ``lat`` coordinates in degrees. + solar_position : xarray.Dataset + Dataset with solar position variables ``altitude`` and ``azimuth`` + in radians. + orientation : callable + Function returning a dict with ``slope`` and ``azimuth`` (in radians) + given ``(lon, lat, solar_position)``. Typically produced by + :func:`get_orientation`. + tracking : {None, 'horizontal', 'tilted_horizontal', 'vertical', 'dual'}, optional + Tracking type. ``None`` for fixed panels, ``'horizontal'`` for 1-axis + horizontal tracking, ``'tilted_horizontal'`` for 1-axis horizontal + tracking of a tilted panel, ``'vertical'`` for 1-axis vertical + tracking, or ``'dual'`` for 2-axis tracking. + + Returns + ------- + xarray.Dataset + Dataset with ``cosincidence``, ``slope``, and ``azimuth``. + + Raises + ------ + AssertionError + If ``tracking`` is not a recognized tracking type. + References ---------- [1] Sproul, A. B., Derivation of the solar geometric relationships using @@ -104,9 +250,9 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): lon = radians(ds["lon"]) lat = radians(ds["lat"]) - orientation = orientation(lon, lat, solar_position) - surface_slope = orientation["slope"] - surface_azimuth = orientation["azimuth"] + orientation_dict = orientation(lon, lat, solar_position) + surface_slope = orientation_dict["slope"] + surface_azimuth = orientation_dict["azimuth"] sun_altitude = solar_position["altitude"] sun_azimuth = solar_position["azimuth"] @@ -116,24 +262,19 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): surface_azimuth - sun_azimuth ) + cos(surface_slope) * sin(sun_altitude) - elif tracking == "horizontal": # horizontal tracking with horizontal axis - axis_azimuth = orientation[ - "azimuth" - ] # here orientation['azimuth'] refers to the azimuth of the tracker axis. + elif tracking == "horizontal": + axis_azimuth = orientation_dict["azimuth"] rotation = arctan( (cos(sun_altitude) / sin(sun_altitude)) * sin(sun_azimuth - axis_azimuth) ) surface_slope = abs(rotation) surface_azimuth = axis_azimuth + arcsin(sin(rotation) / sin(surface_slope)) - # the 2nd part yields +/-1 and determines if the panel is facing east or west cosincidence = cos(surface_slope) * sin(sun_altitude) + sin( surface_slope ) * cos(sun_altitude) * cos(sun_azimuth - surface_azimuth) - elif tracking == "tilted_horizontal": # horizontal tracking with tilted axis' - axis_tilt = orientation[ - "slope" - ] # here orientation['slope'] refers to the tilt of the tracker axis. + elif tracking == "tilted_horizontal": + axis_tilt = orientation_dict["slope"] rotation = arctan( (cos(sun_altitude) * sin(sun_azimuth - surface_azimuth)) @@ -168,29 +309,25 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): + cos(axis_tilt) * sin(sun_altitude) ) + sin(rotation) * cos(sun_altitude) * sin(sun_azimuth - surface_azimuth) - elif tracking == "vertical": # vertical tracking, surface azimuth = sun_azimuth + elif tracking == "vertical": cosincidence = sin(surface_slope) * cos(sun_altitude) + cos( surface_slope ) * sin(sun_altitude) - elif tracking == "dual": # both vertical and horizontal tracking + elif tracking == "dual": cosincidence = np.float64(1.0) else: - assert False, ( + msg = ( "Values describing tracking system must be None for no tracking," + "'horizontal' for 1-axis horizontal tracking," - + "tilted_horizontal' for 1-axis horizontal tracking of tilted panle," - + "vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" + + "'tilted_horizontal' for 1-axis horizontal tracking of tilted panel," + + "'vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" ) + raise AssertionError(msg) - # fixup incidence angle: if the panel is badly oriented and the sun shines - # on the back of the panel (incidence angle > 90degree), the irradiation - # would be negative instead of 0; this is prevented here. cosincidence = cosincidence.clip(min=0) - return xr.Dataset( - { - "cosincidence": cosincidence, - "slope": surface_slope, - "azimuth": surface_azimuth, - } - ) + return xr.Dataset({ + "cosincidence": cosincidence, + "slope": surface_slope, + "azimuth": surface_azimuth, + }) diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index e713635d..2fc02004 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -2,14 +2,23 @@ # # SPDX-License-Identifier: MIT +"""Solar panel electrical performance models.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + import numpy as np -# Huld model was copied from gsee -- global solar energy estimator -# by Stefan Pfenninger -# https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py +if TYPE_CHECKING: + import xarray as xr + from atlite._types import DataArray -def _power_huld(irradiance, t_amb, pc): + +def _power_huld( + irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] +) -> DataArray: """ AC power per capacity predicted by Huld model, based on W/m2 irradiance. @@ -18,6 +27,11 @@ def _power_huld(irradiance, t_amb, pc): [1] Huld, T. et al., 2010. Mapping the performance of PV modules, effects of module type and data averaging. Solar Energy, 84(2), p.324-338. DOI: 10.1016/j.solener.2009.12.002 + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ # normalized module temperature T_ = (pc["c_temp_amb"] * t_amb + pc["c_temp_irrad"] * irradiance) - pc["r_tmod"] @@ -39,21 +53,25 @@ def _power_huld(irradiance, t_amb, pc): da = G_ * eff * pc.get("inverter_efficiency", 1.0) da.attrs["units"] = "kWh/kWp" - da = da.rename("specific generation") + return da.rename("specific generation") - return da - -def _power_bofinger(irradiance, t_amb, pc): +def _power_bofinger( + irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] +) -> DataArray: """ - AC power per capacity predicted by bofinger model, based on W/m2 - irradiance. + Predict AC power per capacity using the Bofinger model. Maximum power point tracking is assumed. [2] Hans Beyer, Gerd Heilscher and Stefan Bofinger, 2004. A robust model for the MPP performance of different types of PV-modules applied for the performance check of grid connected systems. + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ fraction = (pc["NOCT"] - pc["Tamb"]) / pc["Intc"] @@ -74,12 +92,35 @@ def _power_bofinger(irradiance, t_amb, pc): return power.rename("AC power") -def SolarPanelModel(ds, irradiance, pc): - model = pc.get("model", "huld") +def SolarPanelModel( + ds: xr.Dataset, irradiance: DataArray, pc: dict[str, Any] +) -> DataArray: + """ + Compute PV power output for the selected panel model. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing ambient temperature. + irradiance : xarray.DataArray + Plane-of-array irradiation. + pc : dict + Panel configuration including the model parameters. + + Returns + ------- + xarray.DataArray + Specific PV power output. + + Raises + ------ + AssertionError + If the panel model is unknown. + """ + model: Literal["huld", "bofinger"] = pc.get("model", "huld") if model == "huld": return _power_huld(irradiance, ds["temperature"], pc) - elif model == "bofinger": + if model == "bofinger": return _power_bofinger(irradiance, ds["temperature"], pc) - else: - AssertionError(f"Unknown panel model: {model}") + raise AssertionError(f"Unknown panel model: {model}") diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 494cd433..30bf3813 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -2,6 +2,11 @@ # # SPDX-License-Identifier: MIT +"""Solar position calculation utilities.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING from warnings import warn import pandas as pd @@ -9,8 +14,11 @@ from dask.array import arccos, arcsin, arctan2, cos, radians, sin from numpy import pi +if TYPE_CHECKING: + from atlite._types import Dataset -def SolarPosition(ds, time_shift="0H"): + +def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset: """ Compute solar azimuth and altitude. @@ -27,16 +35,18 @@ def SolarPosition(ds, time_shift="0H"): instantenous data (e.g. SARAH). Must be parseable by pandas.to_timedelta(). Default: "0H" + Returns + ------- + xarray.Dataset + Dataset with ``altitude`` and ``azimuth`` in radians. + References ---------- - [1] Michalsky, J. J., The astronomical almanac’s algorithm for approximate + [1] Michalsky, J. J., The astronomical almanac's algorithm for approximate solar position (1950–2050), Solar Energy, 40(3), 227–235 (1988). [2] Sproul, A. B., Derivation of the solar geometric relationships using vector analysis, Renewable Energy, 32(7), 1187–1205 (2007). [3] Kalogirou, Solar Energy Engineering (2009). - - More accurate algorithms would be - --------------------------------- [4] I. Reda and A. Andreas, Solar position algorithm for solar radiation applications. Solar Energy, vol. 76, no. 5, pp. 577-589, 2004. [5] I. Reda and A. Andreas, Corrigendum to Solar position algorithm for @@ -44,11 +54,6 @@ def SolarPosition(ds, time_shift="0H"): [6] Blanc, P., & Wald, L., The SG2 algorithm for a fast and accurate computation of the position of the sun for multi-decadal time period, Solar Energy, 86(10), 3072–3083 (2012). - - The unfortunately quite computationally intensive SPA algorithm [4,5] has - been implemented using numba or plain numpy for a single location at - https://github.com/pvlib/pvlib-python/blob/master/pvlib/spa.py. - """ # Act like a getter if these return variables are already in ds rvs = { @@ -60,10 +65,14 @@ def SolarPosition(ds, time_shift="0H"): return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) warn( - """The calculation method and handling of solar position variables will change. - The solar position will in the future be a permanent variables of a cutout. - Recreate your cutout to remove this warning and permanently include the solar position variables into your cutout.""", + ( + "The calculation method and handling of solar position variables will " + "change. The solar position will in the future be a permanent variable of " + "a cutout. Recreate your cutout to remove this warning and permanently " + "include the solar position variables into your cutout." + ), DeprecationWarning, + stacklevel=2, ) # up to h and dec from [1] @@ -116,6 +125,4 @@ def SolarPosition(ds, time_shift="0H"): az.attrs["units"] = "rad" vars = {da.name: da for da in [alt, az]} - solar_position = xr.Dataset(vars) - - return solar_position + return xr.Dataset(vars) diff --git a/atlite/resource.py b/atlite/resource.py index 0cc0443a..3177d9ed 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -1,10 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for providing access to external ressources, like windturbine or pv -panel configurations. -""" +"""Module for accessing external resources like wind turbine and PV panel configurations.""" from __future__ import annotations @@ -13,7 +10,7 @@ import re from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import pandas as pd @@ -26,7 +23,6 @@ logger = logging.getLogger(name=__name__) - RESOURCE_DIRECTORY = Path(__file__).parent / "resources" WINDTURBINE_DIRECTORY = RESOURCE_DIRECTORY / "windturbine" SOLARPANEL_DIRECTORY = RESOURCE_DIRECTORY / "solarpanel" @@ -37,18 +33,42 @@ from typing_extensions import NotRequired + from atlite._types import DataArray, NDArray, PathLike + class TurbineConfig(TypedDict): - V: np.ndarray - POW: np.ndarray + """Wind turbine configuration dictionary.""" + + V: NDArray + POW: NDArray P: float hub_height: float | int name: NotRequired[str] manufacturer: NotRequired[str] source: NotRequired[str] + class PanelConfig(TypedDict): + """Solar panel configuration dictionary.""" + + model: NotRequired[Literal["huld", "bofinger"]] + efficiency: NotRequired[float] + A: NotRequired[float] + B: NotRequired[float] + C: NotRequired[float] + name: NotRequired[str] + source: NotRequired[str] + + class CSPConfig(TypedDict): + """CSP installation configuration dictionary.""" + + efficiency: DataArray + path: PathLike + technology: NotRequired[str] + name: NotRequired[str] + source: NotRequired[str] + def get_windturbineconfig( - turbine: str | Path | dict, add_cutout_windspeed: bool = True + turbine: str | PathLike | dict[str, Any], add_cutout_windspeed: bool = True ) -> TurbineConfig: """ Load the wind 'turbine' configuration. @@ -78,38 +98,47 @@ def get_windturbineconfig( config : dict Config with details on the turbine + Raises + ------ + KeyError + If ``turbine`` is not a str, Path, or dict. + """ - if not isinstance(turbine, (str | Path | dict)): + if not isinstance(turbine, (str, Path, dict)): raise KeyError( f"`turbine` must be a str, pathlib.Path or dict, but is {type(turbine)}." ) if isinstance(turbine, str) and turbine.startswith("oedb:"): - conf = get_oedb_windturbineconfig(turbine[len("oedb:") :]) + conf = cast( + "dict[str, Any]", get_oedb_windturbineconfig(turbine[len("oedb:") :]) + ) - elif isinstance(turbine, (str | Path)): + elif isinstance(turbine, (str, Path)): if isinstance(turbine, str): turbine_path = windturbines[turbine.replace(".yaml", "")] elif isinstance(turbine, Path): turbine_path = turbine - with open(turbine_path) as f: + with Path(turbine_path).open() as f: conf = yaml.safe_load(f) - conf = dict( - V=np.array(conf["V"]), - POW=np.array(conf["POW"]), - hub_height=conf["HUB_HEIGHT"], - P=np.max(conf["POW"]), - ) + conf = { + "V": np.array(conf["V"]), + "POW": np.array(conf["POW"]), + "hub_height": conf["HUB_HEIGHT"], + "P": np.max(conf["POW"]), + } elif isinstance(turbine, dict): conf = turbine - return _validate_turbine_config_dict(conf, add_cutout_windspeed) + return _validate_turbine_config_dict( + cast("dict[str, Any]", conf), add_cutout_windspeed + ) -def get_solarpanelconfig(panel): +def get_solarpanelconfig(panel: str | PathLike) -> PanelConfig: """ Load the 'panel'.yaml file from local disk and provide a solar panel dict. @@ -127,7 +156,7 @@ def get_solarpanelconfig(panel): Config with details on the solarpanel """ - assert isinstance(panel, (str | Path)) + assert isinstance(panel, (str, Path)) if isinstance(panel, str): panel_path = solarpanels[panel.replace(".yaml", "")] @@ -135,16 +164,13 @@ def get_solarpanelconfig(panel): elif isinstance(panel, Path): panel_path = panel - with open(panel_path) as f: - conf = yaml.safe_load(f) + with Path(panel_path).open() as f: + return cast("PanelConfig", yaml.safe_load(f)) - return conf - -def get_cspinstallationconfig(installation): +def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: """ - Load the 'installation'.yaml file from local disk to provide the system - efficiencies. + Load a CSP installation configuration from a YAML file. Parameters ---------- @@ -160,7 +186,7 @@ def get_cspinstallationconfig(installation): Config with details on the CSP installation. """ - assert isinstance(installation, (str | Path)) + assert isinstance(installation, (str, Path)) if isinstance(installation, str): installation_path = cspinstallations[installation.replace(".yaml", "")] @@ -168,63 +194,86 @@ def get_cspinstallationconfig(installation): elif isinstance(installation, Path): installation_path = installation - # Load and set expected index columns - with open(installation_path) as f: - config = yaml.safe_load(f) + with Path(installation_path).open() as f: + config = cast("dict[str, Any]", yaml.safe_load(f)) config["path"] = installation_path - ## Convert efficiency dict to xr.DataArray and convert units to deg -> rad, % -> p.u. da = pd.DataFrame(config["efficiency"]).set_index(["altitude", "azimuth"]) - # Handle as xarray DataArray early - da will be 'return'-ed da = da.to_xarray()["value"] - # Solar altitude + azimuth expected in deg for better readibility - # calculations use solar position in rad - # Convert da to new coordinates and drop old da = da.rename({"azimuth": "azimuth [deg]", "altitude": "altitude [deg]"}) - da = da.assign_coords( - { - "altitude": radians(da["altitude [deg]"]), - "azimuth": radians(da["azimuth [deg]"]), - } - ) + da = da.assign_coords({ + "altitude": radians(da["altitude [deg]"]), + "azimuth": radians(da["azimuth [deg]"]), + }) da = da.swap_dims({"altitude [deg]": "altitude", "azimuth [deg]": "azimuth"}) da = da.chunk("auto") - # Efficiency unit from % to p.u. da /= 1.0e2 config["efficiency"] = da - return config + return cast("CSPConfig", config) + + +def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> float: + """ + Return the rated capacity per unit of a solar panel configuration. + Parameters + ---------- + panel : str or pathlib.Path or dict + Solar panel configuration or reference to one. -def solarpanel_rated_capacity_per_unit(panel): - # unit is m^2 here + Returns + ------- + float + Rated capacity per unit area or per panel, depending on the model. - if isinstance(panel, (str | Path)): + Raises + ------ + ValueError + If the panel model is unknown. + """ + if isinstance(panel, (str, Path)): panel = get_solarpanelconfig(panel) model = panel.get("model", "huld") if model == "huld": - return panel["efficiency"] - elif model == "bofinger": - # one unit in the capacity layout is interpreted as one panel of a - # capacity (A + 1000 * B + log(1000) * C) * 1000W/m^2 * (k / 1000) + return cast("float", panel["efficiency"]) + if model == "bofinger": A, B, C = itemgetter("A", "B", "C")(panel) - return (A + B * 1000.0 + C * np.log(1000.0)) * 1e3 + return cast("float", (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) + raise ValueError(f"Unknown panel model: {model}") + +def windturbine_rated_capacity_per_unit( + turbine: str | PathLike | TurbineConfig, +) -> float: + """ + Return the rated capacity of a wind turbine configuration. -def windturbine_rated_capacity_per_unit(turbine): - if isinstance(turbine, (str | Path)): + Parameters + ---------- + turbine : str or pathlib.Path or dict + Wind turbine configuration or reference to one. + + Returns + ------- + float + Rated turbine capacity. + """ + if isinstance(turbine, (str, Path)): turbine = get_windturbineconfig(turbine) return turbine["P"] -def windturbine_smooth(turbine, params=None): +def windturbine_smooth( + turbine: TurbineConfig, params: dict[str, float] | None | bool = None +) -> TurbineConfig: """ Smooth the powercurve in `turbine` with a gaussian kernel. @@ -252,30 +301,25 @@ def windturbine_smooth(turbine, params=None): if params is None or params is True: params = {} - eta = params.get("eta", 0.95) - Delta_v = params.get("Delta_v", 1.27) - sigma = params.get("sigma", 2.29) + params = cast("dict[str, float]", params) + eta: float = params.get("eta", 0.95) + Delta_v: float = params.get("Delta_v", 1.27) + sigma: float = params.get("sigma", 2.29) - def kernel(v_0): - # all velocities in m/s + def kernel(v_0: NDArray) -> NDArray: return ( 1.0 / np.sqrt(2 * np.pi * sigma * sigma) * np.exp(-(v_0 - Delta_v) * (v_0 - Delta_v) / (2 * sigma * sigma)) ) - def smooth(velocities, power): - # interpolate kernel and power curve to the same, regular velocity grid + def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: velocities_reg = np.linspace(-50.0, 50.0, 1001) power_reg = np.interp(velocities_reg, velocities, power) kernel_reg = kernel(velocities_reg) - # convolve power and kernel - # the downscaling is necessary because scipy expects the velocity - # increments to be 1., but here, they are 0.1 convolution = 0.1 * fftconvolve(power_reg, kernel_reg, mode="same") - # sample down so power curve doesn't get too long velocities_new = np.linspace(0.0, 35.0, 72) power_new = eta * np.interp(velocities_new, velocities_reg, convolution) @@ -283,7 +327,7 @@ def smooth(velocities, power): turbine = turbine.copy() turbine["V"], turbine["POW"] = smooth(turbine["V"], turbine["POW"]) - turbine["P"] = np.max(turbine["POW"]) + turbine["P"] = cast("float", float(np.max(turbine["POW"]))) if any(turbine["POW"][np.where(turbine["V"] == 0.0)] > 1e-2): logger.warning( @@ -297,15 +341,17 @@ def smooth(velocities, power): return turbine -def _max_v_is_zero_pow(turbine): - return np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0) +def _max_v_is_zero_pow(turbine: TurbineConfig) -> bool: + return cast( + "bool", bool(np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0)) + ) def _validate_turbine_config_dict( - turbine: dict, add_cutout_windspeed: bool + turbine: dict[str, Any], add_cutout_windspeed: bool ) -> TurbineConfig: """ - Checks the turbine config dict format and power curve. + Check the turbine config dict format and power curve. Parameters ---------- @@ -322,6 +368,11 @@ def _validate_turbine_config_dict( dict validated and potentially modified turbine config dict + Raises + ------ + ValueError + If the turbine config dict is missing required keys or has invalid values. + """ if not all(key in turbine for key in ("POW", "V", "P", "hub_height")): err_msg = ( @@ -330,11 +381,10 @@ def _validate_turbine_config_dict( ) raise ValueError(err_msg) - if not all(isinstance(turbine[p], (np.ndarray | list)) for p in ("POW", "V")): + if not all(isinstance(turbine[p], (np.ndarray, list)) for p in ("POW", "V")): err_msg = "turbine entries 'POW' and 'V' must be np.ndarray or list" raise ValueError(err_msg) - # convert lists from user provided turbine dicts to numpy arrays if any(isinstance(turbine[p], list) for p in ("POW", "V")): turbine["V"] = np.array(turbine["V"]) turbine["POW"] = np.array(turbine["POW"]) @@ -344,36 +394,34 @@ def _validate_turbine_config_dict( raise ValueError(err_msg) if not np.all(np.diff(turbine["V"]) >= 0): - # This check is not strict as it uses `>=` instead of `>` and thus allows equal - # wind speeds in the array. However, many power curves have two entries for the - # same wind speed at the cut-in and cut-out speeds which would make them fail if - # using `>` only. err_msg = ( "wind speed 'V' in the turbine config dict is expected to be increasing, " f"but is currently not in ascending order:\n{turbine['V']}" ) raise ValueError(err_msg) - if add_cutout_windspeed is True and not _max_v_is_zero_pow(turbine): + if add_cutout_windspeed is True and not _max_v_is_zero_pow( + cast("TurbineConfig", turbine) + ): turbine["V"] = np.pad(turbine["V"], (0, 1), "maximum") turbine["POW"] = np.pad(turbine["POW"], (0, 1), "constant", constant_values=0) logger.info( - "adding a cut-out wind speed to the turbine power curve at " - f"V={turbine['V'][-1]} m/s." + "adding a cut-out wind speed to the turbine power curve at V=%s m/s.", + turbine["V"][-1], ) - if not _max_v_is_zero_pow(turbine): + if not _max_v_is_zero_pow(cast("TurbineConfig", turbine)): logger.warning( "The power curve does not have a cut-out wind speed, i.e. the power" " output corresponding to the\nhighest wind speed is not zero. You can" " either change the power curve manually or set\n" "'add_cutout_windspeed=True' in the Cutout.wind conversion method." ) - return turbine + return cast("TurbineConfig", turbine) def get_oedb_windturbineconfig( - search: int | str | None = None, **search_params + search: int | str | None = None, **search_params: Any ) -> TurbineConfig: """ Download a windturbine configuration from the OEDB database. @@ -406,6 +454,10 @@ def get_oedb_windturbineconfig( >>> get_oedb_windturbineconfig(name="E-53/800", manufacturer="Enercon") {'V': ..., 'POW': ..., ...} + Raises + ------ + RuntimeError + If no turbine or multiple turbines match the search. """ # Parse information of different allowed 'turbine' values if isinstance(search, int): @@ -428,9 +480,8 @@ def get_oedb_windturbineconfig( _oedb_turbines = df[df.has_power_curve] logger.info( - "Searching turbine power curve in OEDB database using " - + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()) - + "." + "Searching turbine power curve in OEDB database using %s.", + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()), ) # Working copy @@ -455,7 +506,7 @@ def get_oedb_windturbineconfig( if len(df) < 1: raise RuntimeError("No turbine found.") - elif len(df) > 1: + if len(df) > 1: raise RuntimeError( f"Provided information corresponds to {len(df)} turbines," " use `id` for an unambiguous search.\n" @@ -506,13 +557,13 @@ def get_oedb_windturbineconfig( name = "{manufacturer}_{name}".format(**turbineconf).translate(charmap) windturbines[name] = turbineconf - return turbineconf + return turbineconf # type: ignore[return-value] # Global caches _oedb_turbines = None windturbines = arrowdict({p.stem: p for p in WINDTURBINE_DIRECTORY.glob("*.yaml")}) solarpanels = arrowdict({p.stem: p for p in SOLARPANEL_DIRECTORY.glob("*.yaml")}) -cspinstallations = arrowdict( - {p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml")} -) +cspinstallations = arrowdict({ + p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml") +}) diff --git a/atlite/utils.py b/atlite/utils.py index 86bd5bc3..f0ee9865 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -1,14 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -General utility functions for internal use. -""" +"""General utility functions for internal use.""" + +from __future__ import annotations import logging import re import textwrap from pathlib import Path +from typing import TYPE_CHECKING, Any, TypeAlias, cast import pandas as pd import xarray as xr @@ -16,12 +17,40 @@ from atlite.datasets import modules as datamodules from atlite.gis import maybe_swap_spatial_dims +if TYPE_CHECKING: + from collections.abc import Callable + + pass + logger = logging.getLogger(__name__) +PathLike: TypeAlias = str | Path +NDArray: TypeAlias = Any +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset + -def migrate_from_cutout_directory(old_cutout_dir, path): +def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> Dataset: """ - Convert an old style cutout directory to new style netcdf file. + Convert an old-style cutout directory to a new-style netCDF file. + + Parameters + ---------- + old_cutout_dir : str or Path + Path to the legacy cutout directory containing ``meta.nc``. + path : str or Path + Output path for the migrated ``.nc`` file. + + Returns + ------- + xr.Dataset + The migrated cutout data. + + Raises + ------ + MergeError + If automatic migration of multi-file datasets fails. + """ old_cutout_dir = Path(old_cutout_dir) with xr.open_dataset(old_cutout_dir / "meta.nc") as meta: @@ -61,7 +90,7 @@ def migrate_from_cutout_directory(old_cutout_dir, path): ) raise - data = maybe_swap_spatial_dims(data) + data = cast("Dataset", maybe_swap_spatial_dims(data)) module = data.attrs["module"] data.attrs["prepared_features"] = list(datamodules[module].features) for v in data: @@ -72,34 +101,48 @@ def migrate_from_cutout_directory(old_cutout_dir, path): path = Path(path).with_suffix(".nc") logger.info( - f"Writing cutout data to {path}. When done, load it again using" - f"\n\n\tatlite.Cutout('{path}')" + "Writing cutout data to %s. When done, load it again using" + "\n\n\tatlite.Cutout('%s')", + path, + path, ) data.to_netcdf(path) return data -def timeindex_from_slice(timeslice): +def timeindex_from_slice(timeslice: Any) -> pd.DatetimeIndex: + """ + Create an hourly DatetimeIndex from a slice with start/end month strings. + + Parameters + ---------- + timeslice : slice + Slice with start and end as month strings (e.g. ``"2013-01"``). + + Returns + ------- + pd.DatetimeIndex + Hourly index spanning the given months. + """ end = pd.Timestamp(timeslice.end) + pd.offsets.DateOffset(months=1) return pd.date_range(timeslice.start, end, freq="1h", closed="left") -class arrowdict(dict): - """ - A subclass of dict, which allows you to get items in the dict using the - attribute syntax! - """ +class arrowdict(dict[str, Any]): + """Dict subclass enabling attribute-style access to items.""" - def __getattr__(self, item): + def __getattr__(self, item: str) -> Any: + """Retrieve a dict value as an attribute, raising AttributeError on missing keys.""" # noqa: DOC201, DOC501 try: return self.__getitem__(item) except KeyError as e: - raise AttributeError(e.args[0]) + raise AttributeError(e.args[0]) from e _re_pattern = re.compile("[a-zA-Z_][a-zA-Z0-9_]*") - def __dir__(self): - dict_keys = [] + def __dir__(self) -> list[str]: + """List keys that are valid Python identifiers for tab-completion.""" # noqa: DOC201 + dict_keys: list[str] = [] for k in self.keys(): if isinstance(k, str): m = self._re_pattern.match(k) @@ -110,29 +153,41 @@ def __dir__(self): class CachedAttribute: """ - Computes attribute value and caches it in the instance. + Descriptor that computes an attribute value once and caches it on the instance. - From the Python Cookbook (Denis Otkidach) This decorator allows you - to create a property which can be computed once and accessed many - times. Sort of like memoization. + Based on the Python Cookbook recipe by Denis Otkidach. """ - # For python 3.8 >= use functoolts.cached_property instead. + method: Callable[[Any], Any] + name: str + __doc__: str | None - def __init__(self, method, name=None, doc=None): - # record the unbound-method and the name + def __init__( + self, + method: Callable[[Any], Any], + name: str | None = None, + doc: str | None = None, + ) -> None: + """ + Initialize the cached attribute descriptor. + + Parameters + ---------- + method : callable + Method whose return value will be cached. + name : str, optional + Attribute name for caching. Defaults to ``method.__name__``. + doc : str, optional + Docstring override. Defaults to ``method.__doc__``. + """ self.method = method self.name = name or method.__name__ self.__doc__ = doc or method.__doc__ - def __get__(self, inst, cls): + def __get__(self, inst: Any, cls: type[Any] | None) -> Any: + """Compute on first access, cache the result, and return it.""" # noqa: DOC201 if inst is None: - # instance attribute accessed on class, return self - # You get here if you write `Foo.bar` return self - # compute, cache and return the instance's attribute value result = self.method(inst) - # setattr redefines the instance's attribute so this doesn't get called - # again setattr(inst, self.name, result) return result diff --git a/atlite/wind.py b/atlite/wind.py index 92dd1f19..c2e95e1d 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -1,32 +1,28 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with wind data generation. -""" +"""Functions for use in conjunction with wind data generation.""" from __future__ import annotations import logging import re -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np -import xarray as xr - -logger = logging.getLogger(__name__) - if TYPE_CHECKING: - from typing import Literal + from atlite._types import DataArray, Dataset, NDArray + +logger = logging.getLogger(__name__) def extrapolate_wind_speed( - ds: xr.Dataset, + ds: Dataset, to_height: int | float, from_height: int | None = None, method: Literal["logarithmic", "power"] = "logarithmic", -) -> xr.DataArray: +) -> DataArray: """ Extrapolate the wind speed from a given height above ground to another. @@ -59,8 +55,12 @@ def extrapolate_wind_speed( Raises ------ + AssertionError + If no wind speed variables are found in the dataset. RuntimeError - If the cutout is missing the data for the chosen `method` + If the cutout is missing the data for the chosen `method`. + ValueError + If ``method`` is not ``'logarithmic'`` or ``'power'``. References ---------- @@ -72,42 +72,42 @@ def extrapolate_wind_speed( Wind Resource Assessment: A Comparison against Tall Towers' https://doi.org/10.3390/en14144169 . """ - # Fast lane - to_name = f"wnd{int(to_height):0d}m" + to_name: str = f"wnd{int(to_height):0d}m" if to_name in ds: return ds[to_name] if from_height is None: - # Determine closest height to to_name - heights = np.asarray([int(s[3:-1]) for s in ds if re.match(r"wnd\d+m", s)]) + heights: NDArray = np.asarray([ + int(str(s)[3:-1]) for s in ds if re.match(r"wnd\d+m", str(s)) + ]) if len(heights) == 0: raise AssertionError("Wind speed is not in dataset") - from_height = heights[np.argmin(np.abs(heights - to_height))] + from_height = int(heights[np.argmin(np.abs(heights - to_height))]) - from_name = f"wnd{int(from_height):0d}m" + from_name: str = f"wnd{int(from_height):0d}m" if method == "logarithmic": try: - roughness = ds["roughness"] + roughness: DataArray = ds["roughness"] except KeyError: raise RuntimeError( "The logarithmic interpolation method requires surface roughness (roughness);\n" "make sure you choose a compatible dataset like ERA5" - ) - wnd_spd = ds[from_name] * ( + ) from None + wnd_spd: DataArray = ds[from_name] * ( np.log(to_height / roughness) / np.log(from_height / roughness) ) - method_desc = "logarithmic method with roughness" + method_desc: str = "logarithmic method with roughness" elif method == "power": try: - wnd_shear_exp = ds["wnd_shear_exp"] + wnd_shear_exp: DataArray = ds["wnd_shear_exp"] except KeyError: raise RuntimeError( "The power law interpolation method requires a wind shear exponent (wnd_shear_exp);\n" "make sure you choose a compatible dataset like ERA5 and update your cutout" - ) + ) from None wnd_spd = ds[from_name] * (to_height / from_height) ** wnd_shear_exp method_desc = "power method with wind shear exponent" else: @@ -115,14 +115,12 @@ def extrapolate_wind_speed( f"Interpolation method must be 'logarithmic' or 'power', but is: {method}" ) - wnd_spd.attrs.update( - { - "long name": ( - f"extrapolated {to_height} m wind speed using {method_desc} " - f" and {from_height} m wind speed" - ), - "units": "m s**-1", - } - ) - - return wnd_spd.rename(to_name) + wnd_spd.attrs.update({ + "long name": ( + f"extrapolated {to_height} m wind speed using {method_desc} " + f" and {from_height} m wind speed" + ), + "units": "m s**-1", + }) + + return cast("DataArray", wnd_spd.rename(to_name)) diff --git a/doc/chart.py b/doc/chart.py index e03a0e0b..4d08befe 100755 --- a/doc/chart.py +++ b/doc/chart.py @@ -7,6 +7,8 @@ This is a temporary script file. """ +from typing import Any + import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(12, 5)) @@ -37,16 +39,20 @@ processedstr = "\n" + "\n\n\n".join([" ◦ " + s for s in processeddata]) + "\n" # defaults for boxes and arrows -kwargs = dict(verticalalignment="center", fontsize=14, color="#545454") -arrowkwargs = dict( - head_width=0.2, - width=0.13, - head_length=0.05, - edgecolor="white", - length_includes_head=True, - color="lightgray", - alpha=1, -) +kwargs: dict[str, Any] = { + "verticalalignment": "center", + "fontsize": 14, + "color": "#545454", +} +arrowkwargs = { + "head_width": 0.2, + "width": 0.13, + "head_length": 0.05, + "edgecolor": "white", + "length_includes_head": True, + "color": "lightgray", + "alpha": 1, +} y = 0.5 # First arrow @@ -61,7 +67,12 @@ y, climatestr, **kwargs, - bbox=dict(facecolor="indianred", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "indianred", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) # Second arrow @@ -74,7 +85,12 @@ y, processedstr, **kwargs, - bbox=dict(facecolor="olivedrab", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "olivedrab", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) diff --git a/doc/conf.py b/doc/conf.py index 64119206..15e6e9d2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Sphinx configuration for atlite documentation.""" + # # atlite documentation build configuration file, created by # sphinx-quickstart on Tue Jan 5 10:04:42 2016. @@ -15,7 +17,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. - from importlib.metadata import version as get_version # If extensions (or modules to document with autodoc) are in another directory, @@ -33,7 +34,7 @@ # ones. extensions = [ "sphinx.ext.autodoc", - #'sphinx.ext.autosummary', + # 'sphinx.ext.autosummary', "sphinx.ext.intersphinx", "sphinx.ext.todo", "sphinx.ext.mathjax", @@ -41,8 +42,8 @@ "nbsphinx", "nbsphinx_link", # 'sphinx.ext.pngmath', - #'sphinxcontrib.tikz', - #'rinoh.frontend.sphinx', + # 'sphinxcontrib.tikz', + # 'rinoh.frontend.sphinx', "sphinx.ext.imgconverter", # for SVG conversion ] @@ -253,15 +254,15 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { +latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', + # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', + # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. - #'preamble': '', + # 'preamble': '', # Latex figure (float) alignment - #'figure_align': 'htbp', + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples diff --git a/examples/building_stock_weather_aggregation.ipynb b/examples/building_stock_weather_aggregation.ipynb index fafc2373..727847c5 100644 --- a/examples/building_stock_weather_aggregation.ipynb +++ b/examples/building_stock_weather_aggregation.ipynb @@ -307,7 +307,8 @@ "# Clip the raster data and reproject the result back into EPSG:4326 to match the cutout,\n", "# also remove some unnecessary dimensions via `squeeze()`\n", "layout = (\n", - " population_density.rio.clip(\n", + " population_density.rio\n", + " .clip(\n", " finland_3035.geometry, from_disk=True\n", " ) # Clip the population density raster data with the reprojected Finland shape.\n", " .rio.reproject( # Reproject and resample the population density raster to match the cutout.\n", @@ -445,7 +446,8 @@ "dirs = {\"north\": 0.0, \"east\": 90.0, \"south\": 180.0, \"west\": 270.0}\n", "for name, lout in layouts.items():\n", " irr_total[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az}, layout=lout.fillna(0.0)\n", " )\n", " .squeeze()\n", @@ -453,7 +455,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_direct[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"direct\",\n", @@ -463,7 +466,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_diffuse[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"diffuse\",\n", diff --git a/examples/historic-comparison-germany.ipynb b/examples/historic-comparison-germany.ipynb index dda326ef..64dbe871 100644 --- a/examples/historic-comparison-germany.ipynb +++ b/examples/historic-comparison-germany.ipynb @@ -67,17 +67,17 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import zipfile\n", + "from pathlib import Path\n", "\n", "import requests\n", "\n", "\n", "def download_file(url, local_filename):\n", " # variant of http://stackoverflow.com/a/16696317\n", - " if not os.path.exists(local_filename):\n", + " if not Path(local_filename).exists():\n", " r = requests.get(url, stream=True)\n", - " with open(local_filename, \"wb\") as f:\n", + " with Path(local_filename).open(\"wb\") as f:\n", " for chunk in r.iter_content(chunk_size=1024):\n", " if chunk:\n", " f.write(chunk)\n", @@ -121,7 +121,7 @@ "opsd.index = opsd.index.tz_convert(None)\n", "\n", "# We are only interested in the 2012 data\n", - "opsd = opsd[(\"2011\" < opsd.index) & (opsd.index < \"2013\")]" + "opsd = opsd[(opsd.index > \"2011\") & (opsd.index < \"2013\")]" ] }, { @@ -261,31 +261,33 @@ "\n", " Parameters\n", " ----------\n", - " typ : str\n", - " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", - " cap_range : (optional) list-like\n", - " Two entries, limiting the lower and upper range of capacities (in kW)\n", - " to include. Left-inclusive, right-exclusive.\n", - " until : str\n", - " String representation of a datetime object understood by pandas.to_datetime()\n", - " for limiting to installations existing until this datetime.\n", + " typ : str\n", + " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", + " cap_range : (optional) list-like\n", + " Two entries, limiting the lower and upper range of capacities (in kW)\n", + " to include. Left-inclusive, right-exclusive.\n", + " until : str\n", + " String representation of a datetime object understood by pandas.to_datetime()\n", + " for limiting to installations existing until this datetime.\n", "\n", - " \"\"\"\n", + " Returns\n", + " -------\n", + " pandas.DataFrame\n", + " Filtered capacities.\n", "\n", + " \"\"\"\n", " # Load locations of installed capacities and remove incomplete entries\n", - " cols = OrderedDict(\n", - " (\n", - " (\"installation_date\", 0),\n", - " (\"plz\", 2),\n", - " (\"city\", 3),\n", - " (\"type\", 6),\n", - " (\"capacity\", 8),\n", - " (\"level\", 9),\n", - " (\"lat\", 19),\n", - " (\"lon\", 20),\n", - " (\"validation\", 22),\n", - " )\n", - " )\n", + " cols = OrderedDict((\n", + " (\"installation_date\", 0),\n", + " (\"plz\", 2),\n", + " (\"city\", 3),\n", + " (\"type\", 6),\n", + " (\"capacity\", 8),\n", + " (\"level\", 9),\n", + " (\"lat\", 19),\n", + " (\"lon\", 20),\n", + " (\"validation\", 22),\n", + " ))\n", " database = pd.read_csv(\n", " \"eeg_anlagenregister_2015.08.utf8.csv\",\n", " sep=\";\",\n", @@ -455,9 +457,10 @@ ], "source": [ "compare = (\n", - " pd.DataFrame(\n", - " dict(atlite=pv.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"])\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -492,11 +495,10 @@ "source": [ "pv_opt = cutout.pv(panel=\"CSi\", orientation=\"latitude_optimal\", layout=solar_layout)\n", "compare_opt = (\n", - " pd.DataFrame(\n", - " dict(\n", - " atlite=pv_opt.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"]\n", - " )\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv_opt.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare_opt.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -685,14 +687,14 @@ "outputs": [], "source": [ "turbine_categories = [\n", - " dict(name=\"Vestas_V25_200kW\", up=400.0),\n", - " dict(name=\"Vestas_V47_660kW\", up=700.0),\n", - " dict(name=\"Bonus_B1000_1000kW\", up=1100.0),\n", - " dict(name=\"Suzlon_S82_1.5_MW\", up=1600.0),\n", - " dict(name=\"Vestas_V66_1750kW\", up=1900.0),\n", - " dict(name=\"Vestas_V80_2MW_gridstreamer\", up=2200.0),\n", - " dict(name=\"Siemens_SWT_2300kW\", up=2500.0),\n", - " dict(name=\"Vestas_V90_3MW\", up=50000.0),\n", + " {\"name\": \"Vestas_V25_200kW\", \"up\": 400.0},\n", + " {\"name\": \"Vestas_V47_660kW\", \"up\": 700.0},\n", + " {\"name\": \"Bonus_B1000_1000kW\", \"up\": 1100.0},\n", + " {\"name\": \"Suzlon_S82_1.5_MW\", \"up\": 1600.0},\n", + " {\"name\": \"Vestas_V66_1750kW\", \"up\": 1900.0},\n", + " {\"name\": \"Vestas_V80_2MW_gridstreamer\", \"up\": 2200.0},\n", + " {\"name\": \"Siemens_SWT_2300kW\", \"up\": 2500.0},\n", + " {\"name\": \"Vestas_V90_3MW\", \"up\": 50000.0},\n", "]" ] }, @@ -1235,13 +1237,11 @@ }, "outputs": [], "source": [ - "compare = pd.DataFrame(\n", - " {\n", - " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", - " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", - " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", - " }\n", - ")\n", + "compare = pd.DataFrame({\n", + " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", + " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", + "})\n", "\n", "compare = compare / 1e3 # in GW" ] @@ -1399,7 +1399,8 @@ " filter(lambda r: r.attributes[\"iso_3166_2\"].startswith(\"DE\"), shp.records())\n", ")\n", "laender = (\n", - " gpd.GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", + " gpd\n", + " .GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", " .rename(columns={\"iso_3166_2\": \"state\"})\n", " .set_index(\"state\")\n", " .set_crs(4236)\n", diff --git a/examples/plotting_with_atlite.ipynb b/examples/plotting_with_atlite.ipynb index eba004ac..51843928 100644 --- a/examples/plotting_with_atlite.ipynb +++ b/examples/plotting_with_atlite.ipynb @@ -263,14 +263,14 @@ "gs = GridSpec(3, 3, figure=fig)\n", "\n", "ax = fig.add_subplot(gs[:, 0:2], projection=projection)\n", - "plot_grid_dict = dict(\n", - " alpha=0.1,\n", - " edgecolor=\"k\",\n", - " zorder=4,\n", - " aspect=\"equal\",\n", - " facecolor=\"None\",\n", - " transform=plate(),\n", - ")\n", + "plot_grid_dict = {\n", + " \"alpha\": 0.1,\n", + " \"edgecolor\": \"k\",\n", + " \"zorder\": 4,\n", + " \"aspect\": \"equal\",\n", + " \"facecolor\": \"None\",\n", + " \"transform\": plate(),\n", + "}\n", "UkIr.plot(ax=ax, zorder=1, transform=plate())\n", "cells.plot(ax=ax, **plot_grid_dict)\n", "country_bound.plot(ax=ax, edgecolor=\"orange\", facecolor=\"None\", transform=plate())\n", @@ -401,7 +401,8 @@ "cells_generation = sites.merge(cells, how=\"inner\").rename(pd.Series(sites.index))\n", "\n", "layout = (\n", - " xr.DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", + " xr\n", + " .DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", " .reindex_like(cap_factors)\n", " .rename(\"Installed Capacity [MW]\")\n", ")\n", @@ -519,7 +520,8 @@ " spine.set_edgecolor(\"white\")\n", "\n", "power_generation = (\n", - " cutout.wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", + " cutout\n", + " .wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", " .to_pandas()\n", " .rename_axis(index=\"\", columns=\"shapes\")\n", ")\n", diff --git a/examples/solarpv_tracking_options.ipynb b/examples/solarpv_tracking_options.ipynb index e31d345e..50a49205 100644 --- a/examples/solarpv_tracking_options.ipynb +++ b/examples/solarpv_tracking_options.ipynb @@ -408,7 +408,9 @@ "source": [ "day_profiles = [ds.loc[day, point].squeeze() for ds in data]\n", "\n", - "df = pd.DataFrame({k: v.to_series() for k, v in zip(labels, day_profiles)})\n", + "df = pd.DataFrame({\n", + " k: v.to_series() for k, v in zip(labels, day_profiles, strict=False)\n", + "})\n", "df.plot(figsize=(10, 5))\n", "plt.title(\"PV Tracking: Portugal @(-9°, 40°), May 1, 2019\")" ] @@ -452,7 +454,7 @@ ], "source": [ "average = [ds.mean(\"dim_0\").mean().item() for ds in data]\n", - "df = pd.Series({k: v for k, v in zip(labels, average)})\n", + "df = pd.Series(dict(zip(labels, average, strict=False)))\n", "df.mul(100).plot.barh(figsize=(10, 5), zorder=2)\n", "plt.grid(axis=\"x\", zorder=1)\n", "plt.title(\"PV Tracking: Average Capacity Factor per Cell [%]\")" diff --git a/examples/working-with-csp.ipynb b/examples/working-with-csp.ipynb index ca9eaf72..301af585 100644 --- a/examples/working-with-csp.ipynb +++ b/examples/working-with-csp.ipynb @@ -27,6 +27,8 @@ "metadata": {}, "outputs": [], "source": [ + "from pathlib import Path\n", + "\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -521,7 +523,7 @@ "}\n", "\n", "layout = xr.zeros_like(cf)\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power" + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power" ] }, { @@ -586,18 +588,14 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"solar tower\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\n", - " \"SAM_solar_tower\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"solar tower\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\"SAM_solar_tower\"),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\"../profiles_and_efficiencies_from_sam/ST-salt_time-series_spain.csv\")\n", @@ -764,21 +762,19 @@ "\n", "# installed power = 950 W/m^2 * area = 1205.0 MW\n", "installed_power = config[\"r_irradiance\"] * area / 1.0e6\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power\n", + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power\n", "\n", "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"parabolic trough\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SAM_parabolic_trough\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"parabolic trough\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SAM_parabolic_trough\"\n", + " ),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\n", @@ -951,17 +947,15 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SARAH\"\n", - " ),\n", - " cutout.csp(\n", - " installation=\"SAM_parabolic_trough\",\n", - " layout=layout,\n", - " ).rename(\"ERA5\"),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SARAH\"\n", + " ),\n", + " cutout.csp(\n", + " installation=\"SAM_parabolic_trough\",\n", + " layout=layout,\n", + " ).rename(\"ERA5\"),\n", + "])\n", "\n", "# Load reference NREL SAM time-series from file\n", "df = pd.read_csv(\n", @@ -1187,14 +1181,16 @@ "# Interpolate values to a finer grid and fill missing values by extrapolation\n", "# Order is relevant: Start with Azimuth (where we have sufficient values) and then continue with altitude\n", "da = (\n", - " da.interpolate_na(\"azimuth\")\n", + " da\n", + " .interpolate_na(\"azimuth\")\n", " .interpolate_na(\"altitude\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", ")\n", "\n", "# Use rolling horizon to smooth values, average over 3x3 adjacent values per pixel\n", "da = (\n", - " da.rolling(azimuth=3, altitude=3)\n", + " da\n", + " .rolling(azimuth=3, altitude=3)\n", " .mean()\n", " .interpolate_na(\"altitude\", fill_value=\"extrapolate\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", @@ -1286,7 +1282,7 @@ " \"efficiency\": df,\n", "}\n", "\n", - "with open(\"SAM_solar_tower.yaml\", \"w\") as f:\n", + "with Path(\"SAM_solar_tower.yaml\").open(\"w\") as f:\n", " yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)" ] } diff --git a/pyproject.toml b/pyproject.toml index d7d3c952..5bfa288b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Documentation = "https://atlite.readthedocs.io/en/latest/" [project.optional-dependencies] -dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff"] +dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff", "mypy", "types-PyYAML"] docs = [ "numpydoc==1.8.0", @@ -81,42 +81,48 @@ include = ["atlite"] [tool.ruff] extend-include = ['*.ipynb'] +preview = true [tool.ruff.lint] select = [ - 'F', # pyflakes - 'E', # pycodestyle: Error - 'W', # pycodestyle: Warning - 'I', # isort - 'D', # pydocstyle - 'UP', # pyupgrade - 'TID', # flake8-tidy-imports - # 'NPY', # numpy - 'RUF013', # ruff + 'F', # pyflakes + 'E', # pycodestyle: Error + 'W', # pycodestyle: Warning + 'I', # isort + 'D', # pydocstyle + 'UP', # pyupgrade + 'TID', # flake8-tidy-imports + 'B', # flake8-bugbear + 'SIM', # flake8-simplify + 'RET', # flake8-return + 'C4', # flake8-comprehensions + 'TC', # flake8-type-checking + 'NPY', # numpy + 'G', # flake8-logging-format + 'PTH', # flake8-use-pathlib + 'DOC', # pydoclint: docstring-signature alignment + 'RUF013', # ruff: implicit-optional + 'RUF100', # ruff: unused-noqa ] ignore = [ 'E501', # line too long 'E741', # ambiguous variable names - 'D105', # Missing docstring in magic method - 'D212', # Multi-line docstring summary should start at the second line - 'D200', # One-line docstring should fit on one line with quotes - 'D401', # First line should be in imperative mood - 'D404', # First word of the docstring should not be "This - 'D413', # Missing blank line after last section - - # # pydocstyle ignores, which could be enabled in future when existing - # # issues are fixed - 'D100', # Missing docstring in public module - 'D101', # Missing docstring in public class - 'D102', # Missing docstring in public method - 'D103', # Missing docstring in public function - 'D107', # Missing docstring in __init__ - 'D202', # No blank lines allowed after function docstring - 'D203', # 1 blank line required before class docstring - 'D205', # 1 blank line required between summary line and description - 'D400', # First line should end with a period - 'D415', # First line should end with a period, question mark, or exclamation point - 'D417', # Missing argument descriptions in the docstring - ] + +[tool.ruff.lint.per-file-ignores] +"test/**" = ["D101", "D102", "D103", "D205"] +"examples/**" = ["D103"] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.mypy] +python_version = "3.11" +warn_return_any = true +warn_unused_configs = true +ignore_missing_imports = true +warn_unused_ignores = true +disallow_incomplete_defs = true +check_untyped_defs = true +exclude = [".venv/", "build/"] diff --git a/test/conftest.py b/test/conftest.py index 9dfb99a6..77235d36 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,12 +2,14 @@ # # SPDX-License-Identifier: MIT +"""Shared pytest fixtures for atlite tests.""" + import os from datetime import date from pathlib import Path import pytest -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from atlite import Cutout @@ -35,8 +37,7 @@ def cutouts_path(tmp_path_factory, pytestconfig): path = Path(custom_path) path.mkdir(parents=True, exist_ok=True) return path - else: - return tmp_path_factory.mktemp("atlite_cutouts") + return tmp_path_factory.mktemp("atlite_cutouts") def _prepare_era5_cutout(path, prepare_kwargs=None, **kwargs): @@ -110,18 +111,16 @@ def cutout_era5_weird_resolution(cutouts_path): @pytest.fixture(scope="session") def cutout_era5_reduced(cutouts_path): tmp_path = cutouts_path / "cutout_era5_reduced.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) - return cutout + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) @pytest.fixture(scope="session") def cutout_era5_overwrite(cutouts_path, cutout_era5_reduced): tmp_path = cutouts_path / "cutout_era5_overwrite.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) # cutout.data = cutout.data.drop_vars("influx_direct") # cutout.prepare("influx", overwrite=True) # TODO Needs to be fixed - return cutout @pytest.fixture(scope="session") diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 86f0f469..270fae91 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Tests for time aggregation functionality.""" + import numpy as np import pandas as pd import pytest @@ -23,21 +25,19 @@ def identity_convert(ds, **kwargs): @pytest.fixture def cutout(): - np.random.seed(42) + rng = np.random.default_rng(42) times = xr.date_range("2020-01-01", periods=24, freq="h") - data = xr.Dataset( - { - "var": xr.DataArray( - np.random.rand(24, 3, 4), - dims=["time", "y", "x"], - coords={ - "time": times, - "y": [50.0, 51.0, 52.0], - "x": [5.0, 6.0, 7.0, 8.0], - }, - ) - } - ) + data = xr.Dataset({ + "var": xr.DataArray( + rng.random((24, 3, 4)), + dims=["time", "y", "x"], + coords={ + "time": times, + "y": [50.0, 51.0, 52.0], + "x": [5.0, 6.0, 7.0, 8.0], + }, + ) + }) return MockCutout(data) @@ -158,12 +158,12 @@ def test_capacity_factor_with_aggregate_time_raises(self, cutout): class TestInvalidArgs: def test_invalid_aggregate_time_value(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") + convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") # type: ignore[arg-type] def test_aggregate_time_false_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=False) + convert_and_aggregate(cutout, identity_convert, aggregate_time=False) # type: ignore[arg-type] def test_aggregate_time_true_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=True) + convert_and_aggregate(cutout, identity_convert, aggregate_time=True) # type: ignore[arg-type] diff --git a/test/test_creation.py b/test/test_creation.py index 52d95c7a..055b089a 100755 --- a/test/test_creation.py +++ b/test/test_creation.py @@ -50,12 +50,12 @@ def test_shape(ref): def test_extent(ref): reference_extent = [-4.125, 1.625, 55.875, 61.125] - assert all([x == y for x, y in zip(ref.extent, reference_extent)]) + assert all(x == y for x, y in zip(ref.extent, reference_extent, strict=False)) def test_bounds(ref): reference_extent = [-4.125, 55.875, 1.625, 61.125] - assert all([x == y for x, y in zip(ref.bounds, reference_extent)]) + assert all(x == y for x, y in zip(ref.bounds, reference_extent, strict=False)) def test_transform(ref): @@ -147,7 +147,7 @@ def test_dx_dy_dt(): ) assert dx == cutout.dx assert dy == cutout.dy - assert "h" == cutout.dt + assert cutout.dt == "h" def test_available_features(ref): diff --git a/test/test_dynamic_line_rating.py b/test/test_dynamic_line_rating.py index 58ea591d..3532a415 100644 --- a/test/test_dynamic_line_rating.py +++ b/test/test_dynamic_line_rating.py @@ -103,9 +103,7 @@ def test_suedkabel_sample_case(): def test_right_angle_in_different_configuration(): - """ - Test different configurations of angle difference of 90 degree. - """ + """Test different configurations of angle difference of 90 degree.""" ds = { "temperature": 313, "wnd100m": 0.61, @@ -149,9 +147,7 @@ def test_right_angle_in_different_configuration(): def test_angle_increase(): - """ - Test an increasing angle which should lead to an increasing capacity. - """ + """Test an increasing angle which should lead to an increasing capacity.""" ds = { "temperature": 313, "wnd100m": 0.61, diff --git a/test/test_gis.py b/test/test_gis.py index 23c28fe7..6d003f6b 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -72,7 +72,8 @@ def raster(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster.tif" with rio.open( @@ -96,7 +97,8 @@ def raster_reproject(tmp_path_factory): bounds = rio.warp.transform_bounds(4326, 3035, X0, Y0, X1, Y1) res = 1000 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster_reproject.tif" with rio.open( @@ -120,7 +122,8 @@ def raster_codes(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = (np.random.rand(*shape) * 100).astype(int) + rng = np.random.default_rng(42) + mask = (rng.random(shape) * 100).astype(int) mask = mask.astype(rio.int32) path = tmp_path / "raster_codes.tif" with rio.open( @@ -139,9 +142,7 @@ def raster_codes(tmp_path_factory): def test_exclusioncontainer_repr(ref): - """ - Test ExclusionContainer.__repr__. - """ + """Test ExclusionContainer.__repr__.""" excluder = ExclusionContainer(ref.crs, res=0.01) assert "Exclusion Container" in repr(excluder) @@ -199,9 +200,7 @@ def test_open_closed_checks(ref, geometry, raster): def test_area(ref): - """ - Test the area of the cutout. - """ + """Test the area of the cutout.""" area = ref.area(crs=3035) assert isinstance(area, xr.DataArray) assert area.dims == ("y", "x") @@ -249,9 +248,7 @@ def test_bounds(ref): def test_regrid(): - """ - Test the atlite.gis.regrid function with average resampling. - """ + """Test the atlite.gis.regrid function with average resampling.""" # define blocks A = 0.25 B = 0.5 @@ -293,9 +290,7 @@ def test_regrid(): def test_pad_extent(): - """ - Test whether padding works with arrays of dimension > 2. - """ + """Test whether padding works with arrays of dimension > 2.""" src = np.ones((3, 2)) src_trans = rio.Affine(1, 0, 0, 0, 1, 0) dst_trans = rio.Affine(2, 0, 0, 0, 2, 0) @@ -379,9 +374,7 @@ def test_availability_matrix_flat_parallel_anonymous_function(ref, raster_codes) def test_availability_matrix_flat_wo_progressbar(ref): - """ - Same as `test_availability_matrix_flat` but without progressbar. - """ + """Same as `test_availability_matrix_flat` but without progressbar.""" shapes = gpd.GeoSeries( [box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs ).rename_axis("shape") @@ -410,9 +403,7 @@ def test_availability_matrix_flat_parallel_wo_progressbar(ref): def test_shape_availability_area(ref): - """ - Area of the mask and the shape must be close. - """ + """Area of the mask and the shape must be close.""" shapes = gpd.GeoSeries([box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs) res = 100 excluder = ExclusionContainer(res=res) @@ -481,9 +472,7 @@ def test_shape_availability_exclude_geometry(ref): def test_shape_availability_exclude_raster(ref, raster): - """ - When excluding the half of the geometry, the eligible area must be half. - """ + """When excluding the half of the geometry, the eligible area must be half.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -518,9 +507,7 @@ def test_shape_availability_exclude_raster(ref, raster): def test_shape_availability_excluder_partial_overlap(ref, raster): - """ - Test behavior, when a raster only overlaps half of the geometry. - """ + """Test behavior, when a raster only overlaps half of the geometry.""" bounds = X0 - 2, Y0, X0 + 2, Y1 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -543,9 +530,7 @@ def test_shape_availability_excluder_partial_overlap(ref, raster): def test_shape_availability_excluder_raster_no_overlap(ref, raster): - """ - Check if the allow_no_overlap flag works. - """ + """Check if the allow_no_overlap flag works.""" bounds = X0 - 10.0, Y0 - 10.0, X0 - 2.0, Y0 - 2.0 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -627,9 +612,7 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): def test_shape_availability_exclude_raster_codes(ref, raster_codes): - """ - Test exclusion of multiple raster codes. - """ + """Test exclusion of multiple raster codes.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -653,9 +636,7 @@ def test_shape_availability_exclude_raster_codes(ref, raster_codes): def test_plot_shape_availability(ref, raster): - """ - Test plotting of shape availability. - """ + """Test plotting of shape availability.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index 1ed9bdf1..a9e698b7 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -12,13 +12,14 @@ import os import sys from datetime import date +from pathlib import Path import geopandas as gpd import numpy as np import pandas as pd import pytest import urllib3 -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from shapely.geometry import LineString as Line from shapely.geometry import Point @@ -36,16 +37,12 @@ def all_notnull_test(cutout): - """ - Test if no nan's in the prepared data occur. - """ + """Test if no nan's in the prepared data occur.""" assert cutout.data.notnull().all() def prepared_features_test(cutout): - """ - The prepared features series should contain all variables in cutout.data. - """ + """Verify that prepared features contain all variables in cutout.data.""" assert set(cutout.prepared_features) == set(cutout.data) @@ -106,7 +103,8 @@ def pv_test(cutout, time=TIME, skip_optimal_sum_test=False): return_capacity=True, ) cap_per_region = ( - cells.assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) + cells + .assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) .groupby("regions") .cap_factor.sum() ) @@ -228,7 +226,7 @@ def csp_test(cutout): Test the atlite.Cutout.csp function with different for different settings and technologies. """ - ## Test technology = "solar tower" + # Test technology = "solar tower" st = cutout.csp(atlite.cspinstallations.SAM_solar_tower, capacity_factor=True) assert st.notnull().all() @@ -240,7 +238,7 @@ def csp_test(cutout): ll = cutout.csp(atlite.cspinstallations.lossless_installation) assert (st <= ll).all() - ## Test technology = "parabolic trough" + # Test technology = "parabolic trough" pt = cutout.csp(atlite.cspinstallations.SAM_parabolic_trough, capacity_factor=True) assert pt.notnull().all() @@ -254,27 +252,21 @@ def csp_test(cutout): def solar_thermal_test(cutout): - """ - Test the atlite.Cutout.solar_thermal function with different settings. - """ + """Test the atlite.Cutout.solar_thermal function with different settings.""" cap_factor = cutout.solar_thermal() assert cap_factor.notnull().all() assert cap_factor.sum() > 0 def heat_demand_test(cutout): - """ - Test the atlite.Cutout.heat_demand function with different settings. - """ + """Test the atlite.Cutout.heat_demand function with different settings.""" demand = cutout.heat_demand() assert demand.notnull().all() assert demand.sum() > 0 def soil_temperature_test(cutout): - """ - Test the atlite.Cutout.soil_temperature function with different settings. - """ + """Test the atlite.Cutout.soil_temperature function with different settings.""" demand = cutout.soil_temperature() assert demand.notnull().all() assert demand.sum() > 0 @@ -358,19 +350,17 @@ def runoff_test(cutout): def hydro_test(cutout): - """ - Test the atlite.Cutout.hydro function. - """ + """Test the atlite.Cutout.hydro function.""" plants = pd.DataFrame( cutout.grid.loc[[0], ["x", "y"]].values, columns=["lon", "lat"] ) basins = gpd.GeoDataFrame( - dict( - geometry=[cutout.grid.geometry[0]], - HYBAS_ID=[0], - DIST_MAIN=10, - NEXT_DOWN=None, - ), + { + "geometry": [cutout.grid.geometry[0]], + "HYBAS_ID": [0], + "DIST_MAIN": 10, + "NEXT_DOWN": None, + }, index=[0], crs=cutout.crs, ) @@ -386,9 +376,7 @@ def line_rating_test(cutout): def coefficient_of_performance_test(cutout): - """ - Test the coefficient_of_performance function. - """ + """Test the coefficient_of_performance function.""" cap_factor = cutout.coefficient_of_performance(source="air") assert cap_factor.notnull().all() assert cap_factor.sum() > 0 @@ -405,21 +393,17 @@ def test_data_module_arguments_era5(cutout_era5): All data variables should have an attribute to which module they belong. """ - for v in cutout_era5.data: + for _v in cutout_era5.data: assert cutout_era5.data.attrs["module"] == "era5" @staticmethod def test_all_non_na_era5(cutout_era5): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5.data).all() @staticmethod def test_all_non_na_era5_coarse(cutout_era5_coarse): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_coarse.data).all() @staticmethod @@ -428,24 +412,18 @@ def test_all_non_na_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_all_non_na_era5_weird_resolution(cutout_era5_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_era5(cutout_era5): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_era5.data.x), 0.25) assert np.allclose(np.diff(cutout_era5.data.y), 0.25) @staticmethod def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_coarse.data.x), cutout_era5_coarse.data.attrs["dx"] ) @@ -459,9 +437,7 @@ def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_dx_dy_preservation_era5_weird_resolution(cutout_era5_weird_resolution): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_weird_resolution.data.x), cutout_era5_weird_resolution.data.attrs["dx"], @@ -510,9 +486,7 @@ def test_pv_tracking_era5(cutout_era5): @staticmethod def test_pv_era5_2days_crossing_months(cutout_era5_2days_crossing_months): - """ - See https://github.com/PyPSA/atlite/issues/256. - """ + """See https://github.com/PyPSA/atlite/issues/256.""" # noqa: DOC201 return pv_test(cutout_era5_2days_crossing_months, time="2013-03-01") @staticmethod @@ -532,6 +506,11 @@ def test_pv_era5_and_era5t(cutout_era5t): Note: the above page says that ERA5 data are made available with a *3* month delay, but experience shows that it's with a *2* month delay. Hence the test with previous vs. second-previous month. + + Returns + ------- + object + PV test result. """ today = date.today() first_day_this_month = today.replace(day=1) @@ -588,35 +567,27 @@ def test_line_rating_era5(cutout_era5): @pytest.mark.skipif( - not os.path.exists(SARAH_DIR), reason="'sarah_dir' is not a valid path" + not Path(SARAH_DIR).exists(), reason="'sarah_dir' is not a valid path" ) class TestSarah: @staticmethod def test_all_non_na_sarah(cutout_sarah): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah.data).all() @staticmethod def test_all_non_na_sarah_fine(cutout_sarah_fine): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_fine.data).all() @staticmethod def test_all_non_na_sarah_weird_resolution(cutout_sarah_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_sarah(cutout_sarah): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_sarah.data.x), 0.25) assert np.allclose(np.diff(cutout_sarah.data.y), 0.25) @@ -642,12 +613,10 @@ def test_runoff_sarah(cutout_sarah): @pytest.mark.skipif( - not os.path.exists(GEBCO_PATH), reason="'gebco_path' is not a valid path" + not Path(GEBCO_PATH).exists(), reason="'gebco_path' is not a valid path" ) class TestGebco: @staticmethod def test_all_non_na_gebco(cutout_gebco): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_gebco.data).all()