From 4cb5c181bed995f52e449dfa24ebcf6aa04eb871 Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Tue, 24 Feb 2026 13:10:06 +0100 Subject: [PATCH 1/8] Added new parameter `include_empty_aggregations` to get_samples_aggregate function --- datareservoirio/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datareservoirio/client.py b/datareservoirio/client.py index 35a53b42..1fd2ffad 100644 --- a/datareservoirio/client.py +++ b/datareservoirio/client.py @@ -466,6 +466,7 @@ def get_samples_aggregate( aggregation_period=None, aggregation_function=None, max_page_size=_DEFAULT_MAX_PAGE_SIZE, + include_empty_aggregations=False, ): """ Retrieve a series from DataReservoir.io using the samples/aggregate endpoint. @@ -489,6 +490,8 @@ def get_samples_aggregate( max_page_size : optional Maximum number of samples to return per page. The method automatically follows links to next pages and returns the entire series. For advanced usage. + include_empty_aggregations : optional + Whether to include empty aggregations with no data in the returned series. Default is False. Returns ------- pandas.Series @@ -550,6 +553,7 @@ def get_samples_aggregate( params["aggregationFunction"] = aggregation_function params["start"] = start.isoformat() params["end"] = end.isoformat() + params["includeEmptyAggregations"] = include_empty_aggregations next_page_link = f"{environment.api_base_url}reservoir/timeseries/{series_id}/samples/aggregate?{urlencode(params)}" From f3e11b8f09b2d72e594ec3e40875e511bd518d53 Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Tue, 24 Feb 2026 14:19:36 +0100 Subject: [PATCH 2/8] Updated documentation --- docs/user_guide/access_data.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/user_guide/access_data.rst b/docs/user_guide/access_data.rst index d0846f6c..d0dd6feb 100644 --- a/docs/user_guide/access_data.rst +++ b/docs/user_guide/access_data.rst @@ -29,12 +29,19 @@ is *"tick"* (100 nanoseconds). aggregation_period='15m', aggregation_function='mean') - # Get all data for selected time period + # Get all available data for selected time period timeseries = client.get_samples_aggregate(series_id, start='2024-01-01', end='2024-01-02', aggregation_period='tick', aggregation_function='mean') + # Get all datapoints resampled to 1 minute even if there is no data. Empty values will be filled with null. + timeseries = client.get_samples_aggregate(series_id, + start='2024-01-01', end='2024-01-02', + aggregation_period='1m', + aggregation_function='mean', + include_aggregated_data=True) + .. note:: :py:meth:`Client.get_samples_aggregate` returns a :py:class:`pandas.Series`. The :py:mod:`start`, :py:mod:`end`, :py:mod:`aggregation_period` and :py:mod:`aggregation_function` parameters are required. From 18036c8625a5eceb2823e9ad81b396b8e3f922be Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Tue, 24 Feb 2026 14:44:36 +0100 Subject: [PATCH 3/8] Fix documentation --- docs/user_guide/access_data.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/access_data.rst b/docs/user_guide/access_data.rst index d0dd6feb..ee1954cf 100644 --- a/docs/user_guide/access_data.rst +++ b/docs/user_guide/access_data.rst @@ -40,7 +40,7 @@ is *"tick"* (100 nanoseconds). start='2024-01-01', end='2024-01-02', aggregation_period='1m', aggregation_function='mean', - include_aggregated_data=True) + include_empty_aggregations=True) .. note:: From eaf2c39a70c40b0c2d79d5752319d8155504796a Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Tue, 24 Feb 2026 14:49:02 +0100 Subject: [PATCH 4/8] Updated doc yet again --- docs/user_guide/access_data.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/access_data.rst b/docs/user_guide/access_data.rst index ee1954cf..e5edb961 100644 --- a/docs/user_guide/access_data.rst +++ b/docs/user_guide/access_data.rst @@ -35,7 +35,7 @@ is *"tick"* (100 nanoseconds). aggregation_period='tick', aggregation_function='mean') - # Get all datapoints resampled to 1 minute even if there is no data. Empty values will be filled with null. + # Get all datapoints resampled to 1 minute even if there is no data. Empty values will be filled with NaN. timeseries = client.get_samples_aggregate(series_id, start='2024-01-01', end='2024-01-02', aggregation_period='1m', From ce5e5f988907fa083df798369f671ce2052e948a Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Tue, 24 Feb 2026 16:02:59 +0100 Subject: [PATCH 5/8] Further documentation improvement --- docs/user_guide/advanced_config.rst | 82 ++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/docs/user_guide/advanced_config.rst b/docs/user_guide/advanced_config.rst index 39086e75..4f19c2b4 100644 --- a/docs/user_guide/advanced_config.rst +++ b/docs/user_guide/advanced_config.rst @@ -151,4 +151,84 @@ Using the :py:mod:`max_page_size` parameter in :py:mod:`get_samples_aggregate` m The :py:meth:`Client.get_samples_aggregate` method uses an endpoint that has support for paging of responses. This means that instead of making one big request, it might make a series of smaller requests traversing links to next pages returned in each partial response. -Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number. \ No newline at end of file +Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number. + +Using the :py:mod:`include_empty_aggregations` parameter in :py:mod:`get_samples_aggregate` method +--------------------------------------------------------------------------------------------------- + +The :py:meth:`Client.get_samples_aggregate` method aggregates data into fixed intervals based on the ``aggregation_period`` parameter. By default, the method only returns aggregations that contain data. + +The ``include_empty_aggregations`` parameter controls whether to include aggregation intervals that have no data points. This is useful when you need a complete time series with regular intervals, even for periods where no measurements were recorded. + +**Default behavior (include_empty_aggregations=False):** + +When ``include_empty_aggregations`` is ``False`` (default), only aggregations with data are returned. This results in a sparse series that may have gaps. + +.. code-block:: python + + import datareservoirio as drio + + auth = drio.Authenticator() + client = drio.Client(auth) + + # Returns only aggregations with data + timeseries = client.get_samples_aggregate( + 'your-series-id', + start='2026-02-23', + end='2026-02-24', + aggregation_period='1m', + aggregation_function='mean', + include_empty_aggregations=False # Default + ) + + print(timeseries) + + # Result will only include time intervals that have data. + # 2026-02-23 00:03:00+00:00 2.2 + # 2026-02-23 23:56:00+00:00 1.0 + +**With empty aggregations (include_empty_aggregations=True):** + +When ``include_empty_aggregations`` is ``True``, all aggregation intervals within the specified time range are returned, with ``NaN`` (Not a Number) values for intervals that contain no data. + +.. code-block:: python + + import datareservoirio as drio + + auth = drio.Authenticator() + client = drio.Client(auth) + + # Returns all aggregations, including those with no data + timeseries = client.get_samples_aggregate( + 'your-series-id', + start='2026-02-23', + end='2026-02-24', + aggregation_period='1m', + aggregation_function='mean', + include_empty_aggregations=True + ) + + print(timeseries) + + # Result has a complete time series with NaN values where data is missing + # 2026-02-23 00:00:00+00:00 NaN + # 2026-02-23 00:01:00+00:00 NaN + # 2026-02-23 00:02:00+00:00 NaN + # 2026-02-23 00:03:00+00:00 2.2 + # 2026-02-23 00:04:00+00:00 NaN + # .. + # 2026-02-23 23:55:00+00:00 NaN + # 2026-02-23 23:56:00+00:00 1.0 + # 2026-02-23 23:57:00+00:00 NaN + # 2026-02-23 23:58:00+00:00 NaN + # 2026-02-23 23:59:00+00:00 NaN + +**Use Cases:** + +* **Analysis requiring regular intervals:** Set ``include_empty_aggregations=True`` when your analysis requires evenly-spaced data points (e.g., time-series forecasting models that expect regular intervals). + +* **Detecting data gaps:** Set ``include_empty_aggregations=True`` if you need to identify periods with missing measurements. + +* **Visualization:** Set ``include_empty_aggregations=True`` when creating time-series plots that should display the full time range uniformly. + +* **Memory efficiency:** Use ``include_empty_aggregations=False`` (default) if storage or memory is a concern and you only need data-bearing intervals. \ No newline at end of file From 44f736ff617aeb4a72786a52ad2fc228f2c5338f Mon Sep 17 00:00:00 2001 From: Wojciech Bochniarz Date: Thu, 26 Feb 2026 15:37:15 +0100 Subject: [PATCH 6/8] Fix dataframe slicing when start or end are not provided --- datareservoirio/client.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/datareservoirio/client.py b/datareservoirio/client.py index 1fd2ffad..0f73628e 100644 --- a/datareservoirio/client.py +++ b/datareservoirio/client.py @@ -395,6 +395,9 @@ def get( pandas.Series Series data """ + start_provided = start is not None + end_provided = end is not None + if not start: start = _START_DEFAULT if not end: @@ -430,21 +433,19 @@ def get( else: df = pd.DataFrame(columns=("index", "values")).astype({"index": "int64"}) - try: - series = ( - df.set_index("index").squeeze("columns").loc[start:end].copy(deep=True) - ) - except KeyError as e: + s = df.set_index("index").squeeze("columns") + + # Ensure sorted (cheap if already sorted) + if not s.index.is_monotonic_increasing: logging.warning( "The time series you requested is not properly ordered. The data will be sorted to attempt to resolve the issue. Please note that this operation may take some time." ) - series = ( - df.set_index("index") - .sort_index() - .squeeze("columns") - .loc[start:end] - .copy(deep=True) - ) + s = s.sort_index() + + series = s.loc[ + start if start_provided else None : end if end_provided else None + ].copy(deep=True) + series.index.name = None if series.empty and raise_empty: # may become empty after slicing From 99b9bdf3b06149a25746e1ec6f2827ca188eb165 Mon Sep 17 00:00:00 2001 From: Branislav Jenco Date: Fri, 27 Feb 2026 11:58:02 +0100 Subject: [PATCH 7/8] Locking pandas to v2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4c616762..1b226bc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ dependencies = [ "numpy", "oauthlib", - "pandas", + "pandas < 3", "pyarrow", "requests", "requests-oauthlib", From e7771793bbbd1619af2fbece66d9c9649e36fd8c Mon Sep 17 00:00:00 2001 From: Branislav Jenco Date: Fri, 27 Feb 2026 11:58:10 +0100 Subject: [PATCH 8/8] Putting back the implementation --- datareservoirio/client.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datareservoirio/client.py b/datareservoirio/client.py index 0f73628e..7e7ab839 100644 --- a/datareservoirio/client.py +++ b/datareservoirio/client.py @@ -395,9 +395,6 @@ def get( pandas.Series Series data """ - start_provided = start is not None - end_provided = end is not None - if not start: start = _START_DEFAULT if not end: @@ -433,19 +430,22 @@ def get( else: df = pd.DataFrame(columns=("index", "values")).astype({"index": "int64"}) - s = df.set_index("index").squeeze("columns") - - # Ensure sorted (cheap if already sorted) - if not s.index.is_monotonic_increasing: + try: + # When we move to pandas 3, the .loc here breaks with None start and end, haven't dug into why yet + series = ( + df.set_index("index").squeeze("columns").loc[start:end].copy(deep=True) + ) + except KeyError as e: logging.warning( "The time series you requested is not properly ordered. The data will be sorted to attempt to resolve the issue. Please note that this operation may take some time." ) - s = s.sort_index() - - series = s.loc[ - start if start_provided else None : end if end_provided else None - ].copy(deep=True) - + series = ( + df.set_index("index") + .sort_index() + .squeeze("columns") + .loc[start:end] + .copy(deep=True) + ) series.index.name = None if series.empty and raise_empty: # may become empty after slicing