diff --git a/datareservoirio/client.py b/datareservoirio/client.py index 35a53b42..7e7ab839 100644 --- a/datareservoirio/client.py +++ b/datareservoirio/client.py @@ -431,6 +431,7 @@ def get( df = pd.DataFrame(columns=("index", "values")).astype({"index": "int64"}) try: + # When we move to pandas 3, the .loc here breaks with None start and end, haven't dug into why yet series = ( df.set_index("index").squeeze("columns").loc[start:end].copy(deep=True) ) @@ -466,6 +467,7 @@ def get_samples_aggregate( aggregation_period=None, aggregation_function=None, max_page_size=_DEFAULT_MAX_PAGE_SIZE, + include_empty_aggregations=False, ): """ Retrieve a series from DataReservoir.io using the samples/aggregate endpoint. @@ -489,6 +491,8 @@ def get_samples_aggregate( max_page_size : optional Maximum number of samples to return per page. The method automatically follows links to next pages and returns the entire series. For advanced usage. + include_empty_aggregations : optional + Whether to include empty aggregations with no data in the returned series. Default is False. Returns ------- pandas.Series @@ -550,6 +554,7 @@ def get_samples_aggregate( params["aggregationFunction"] = aggregation_function params["start"] = start.isoformat() params["end"] = end.isoformat() + params["includeEmptyAggregations"] = include_empty_aggregations next_page_link = f"{environment.api_base_url}reservoir/timeseries/{series_id}/samples/aggregate?{urlencode(params)}" diff --git a/docs/user_guide/access_data.rst b/docs/user_guide/access_data.rst index d0846f6c..e5edb961 100644 --- a/docs/user_guide/access_data.rst +++ b/docs/user_guide/access_data.rst @@ -29,12 +29,19 @@ is *"tick"* (100 nanoseconds). aggregation_period='15m', aggregation_function='mean') - # Get all data for selected time period + # Get all available data for selected time period timeseries = client.get_samples_aggregate(series_id, start='2024-01-01', end='2024-01-02', aggregation_period='tick', aggregation_function='mean') + # Get all datapoints resampled to 1 minute even if there is no data. Empty values will be filled with NaN. + timeseries = client.get_samples_aggregate(series_id, + start='2024-01-01', end='2024-01-02', + aggregation_period='1m', + aggregation_function='mean', + include_empty_aggregations=True) + .. note:: :py:meth:`Client.get_samples_aggregate` returns a :py:class:`pandas.Series`. The :py:mod:`start`, :py:mod:`end`, :py:mod:`aggregation_period` and :py:mod:`aggregation_function` parameters are required. diff --git a/docs/user_guide/advanced_config.rst b/docs/user_guide/advanced_config.rst index 39086e75..4f19c2b4 100644 --- a/docs/user_guide/advanced_config.rst +++ b/docs/user_guide/advanced_config.rst @@ -151,4 +151,84 @@ Using the :py:mod:`max_page_size` parameter in :py:mod:`get_samples_aggregate` m The :py:meth:`Client.get_samples_aggregate` method uses an endpoint that has support for paging of responses. This means that instead of making one big request, it might make a series of smaller requests traversing links to next pages returned in each partial response. -Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number. \ No newline at end of file +Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number. + +Using the :py:mod:`include_empty_aggregations` parameter in :py:mod:`get_samples_aggregate` method +--------------------------------------------------------------------------------------------------- + +The :py:meth:`Client.get_samples_aggregate` method aggregates data into fixed intervals based on the ``aggregation_period`` parameter. By default, the method only returns aggregations that contain data. + +The ``include_empty_aggregations`` parameter controls whether to include aggregation intervals that have no data points. This is useful when you need a complete time series with regular intervals, even for periods where no measurements were recorded. + +**Default behavior (include_empty_aggregations=False):** + +When ``include_empty_aggregations`` is ``False`` (default), only aggregations with data are returned. This results in a sparse series that may have gaps. + +.. code-block:: python + + import datareservoirio as drio + + auth = drio.Authenticator() + client = drio.Client(auth) + + # Returns only aggregations with data + timeseries = client.get_samples_aggregate( + 'your-series-id', + start='2026-02-23', + end='2026-02-24', + aggregation_period='1m', + aggregation_function='mean', + include_empty_aggregations=False # Default + ) + + print(timeseries) + + # Result will only include time intervals that have data. + # 2026-02-23 00:03:00+00:00 2.2 + # 2026-02-23 23:56:00+00:00 1.0 + +**With empty aggregations (include_empty_aggregations=True):** + +When ``include_empty_aggregations`` is ``True``, all aggregation intervals within the specified time range are returned, with ``NaN`` (Not a Number) values for intervals that contain no data. + +.. code-block:: python + + import datareservoirio as drio + + auth = drio.Authenticator() + client = drio.Client(auth) + + # Returns all aggregations, including those with no data + timeseries = client.get_samples_aggregate( + 'your-series-id', + start='2026-02-23', + end='2026-02-24', + aggregation_period='1m', + aggregation_function='mean', + include_empty_aggregations=True + ) + + print(timeseries) + + # Result has a complete time series with NaN values where data is missing + # 2026-02-23 00:00:00+00:00 NaN + # 2026-02-23 00:01:00+00:00 NaN + # 2026-02-23 00:02:00+00:00 NaN + # 2026-02-23 00:03:00+00:00 2.2 + # 2026-02-23 00:04:00+00:00 NaN + # .. + # 2026-02-23 23:55:00+00:00 NaN + # 2026-02-23 23:56:00+00:00 1.0 + # 2026-02-23 23:57:00+00:00 NaN + # 2026-02-23 23:58:00+00:00 NaN + # 2026-02-23 23:59:00+00:00 NaN + +**Use Cases:** + +* **Analysis requiring regular intervals:** Set ``include_empty_aggregations=True`` when your analysis requires evenly-spaced data points (e.g., time-series forecasting models that expect regular intervals). + +* **Detecting data gaps:** Set ``include_empty_aggregations=True`` if you need to identify periods with missing measurements. + +* **Visualization:** Set ``include_empty_aggregations=True`` when creating time-series plots that should display the full time range uniformly. + +* **Memory efficiency:** Use ``include_empty_aggregations=False`` (default) if storage or memory is a concern and you only need data-bearing intervals. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4c616762..1b226bc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ dependencies = [ "numpy", "oauthlib", - "pandas", + "pandas < 3", "pyarrow", "requests", "requests-oauthlib",