Skip to content

Commit 7210d42

Browse files
authored
Merge pull request #1264 from hkad98/jkd/paging
feat: enable paging customization
2 parents 6a58801 + 80a9dd9 commit 7210d42

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

packages/gooddata-pandas/src/gooddata_pandas/data_access.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ def _extract_from_attributes_and_maybe_metrics(
358358
col_to_attr_idx: dict[str, int],
359359
col_to_metric_idx: dict[str, int],
360360
index_to_attr_idx: Optional[dict[str, int]] = None,
361+
result_page_len: Optional[int] = None,
361362
) -> tuple[dict, dict]:
362363
"""
363364
Internal function that extracts data from execution response with attributes columns and
@@ -371,6 +372,8 @@ def _extract_from_attributes_and_maybe_metrics(
371372
col_to_metric_idx (dict[str, int]): A mapping of pandas column names to metric dimension indices.
372373
index_to_attr_idx (Optional[dict[str, int]]):
373374
An optional mapping of pandas index names to attribute dimension indices.
375+
result_page_len (Optional[int]): Optional page size for result pagination.
376+
Defaults to _RESULT_PAGE_LEN (1000). Larger values can improve performance for large result sets.
374377
375378
Returns:
376379
tuple: A tuple containing the following dictionaries:
@@ -379,7 +382,8 @@ def _extract_from_attributes_and_maybe_metrics(
379382
"""
380383
exec_def = execution.exec_def
381384
offset = [0 for _ in exec_def.dimensions]
382-
limit = [len(exec_def.metrics), _RESULT_PAGE_LEN] if exec_def.has_metrics() else [_RESULT_PAGE_LEN]
385+
page_len = result_page_len if result_page_len is not None else _RESULT_PAGE_LEN
386+
limit = [len(exec_def.metrics), page_len] if exec_def.has_metrics() else [page_len]
383387
attribute_dim = 1 if exec_def.has_metrics() else 0
384388
result = execution.read_result(limit=limit, offset=offset)
385389
safe_index_to_attr_idx = index_to_attr_idx if index_to_attr_idx is not None else dict()
@@ -421,6 +425,7 @@ def compute_and_extract(
421425
filter_by: Optional[Union[Filter, list[Filter]]] = None,
422426
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
423427
is_cancellable: bool = False,
428+
result_page_len: Optional[int] = None,
424429
) -> tuple[dict, dict]:
425430
"""
426431
Convenience function that computes and extracts data from the execution response.
@@ -435,6 +440,8 @@ def compute_and_extract(
435440
submitted to the backend.
436441
is_cancellable (bool, optional): Whether the execution of this definition should be cancelled when
437442
the connection is interrupted.
443+
result_page_len (Optional[int]): Optional page size for result pagination.
444+
Defaults to 1000. Larger values can improve performance for large result sets.
438445
439446
Returns:
440447
tuple: A tuple containing the following dictionaries:
@@ -472,4 +479,5 @@ def compute_and_extract(
472479
col_to_attr_idx,
473480
col_to_metric_idx,
474481
index_to_attr_idx,
482+
result_page_len=result_page_len,
475483
)

packages/gooddata-pandas/src/gooddata_pandas/dataframe.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def indexed(
7575
filter_by: Optional[Union[Filter, list[Filter]]] = None,
7676
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
7777
is_cancellable: bool = False,
78+
result_page_len: Optional[int] = None,
7879
) -> pandas.DataFrame:
7980
"""
8081
Creates a data frame indexed by values of the label. The data frame columns will be created from either
@@ -90,6 +91,8 @@ def indexed(
9091
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
9192
submitted to the backend.
9293
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
94+
result_page_len (Optional[int]): Optional page size for result pagination.
95+
Defaults to 1000. Larger values can improve performance for large result sets.
9396
9497
Returns:
9598
pandas.DataFrame: A DataFrame instance.
@@ -102,6 +105,7 @@ def indexed(
102105
filter_by=filter_by,
103106
on_execution_submitted=on_execution_submitted,
104107
is_cancellable=is_cancellable,
108+
result_page_len=result_page_len,
105109
)
106110

107111
_idx = make_pandas_index(index)
@@ -114,6 +118,7 @@ def not_indexed(
114118
filter_by: Optional[Union[Filter, list[Filter]]] = None,
115119
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
116120
is_cancellable: bool = False,
121+
result_page_len: Optional[int] = None,
117122
) -> pandas.DataFrame:
118123
"""
119124
Creates a data frame with columns created from metrics and or labels.
@@ -125,6 +130,8 @@ def not_indexed(
125130
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
126131
submitted to the backend.
127132
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
133+
result_page_len (Optional[int]): Optional page size for result pagination.
134+
Defaults to 1000. Larger values can improve performance for large result sets.
128135
129136
Returns:
130137
pandas.DataFrame: A DataFrame instance.
@@ -137,6 +144,7 @@ def not_indexed(
137144
filter_by=filter_by,
138145
on_execution_submitted=on_execution_submitted,
139146
is_cancellable=is_cancellable,
147+
result_page_len=result_page_len,
140148
)
141149

142150
return pandas.DataFrame(data=data)
@@ -148,6 +156,7 @@ def for_items(
148156
auto_index: bool = True,
149157
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
150158
is_cancellable: bool = False,
159+
result_page_len: Optional[int] = None,
151160
) -> pandas.DataFrame:
152161
"""
153162
Creates a data frame for named items. This is a convenience method that will create DataFrame with or
@@ -162,6 +171,8 @@ def for_items(
162171
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
163172
submitted to the backend.
164173
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
174+
result_page_len (Optional[int]): Optional page size for result pagination.
175+
Defaults to 1000. Larger values can improve performance for large result sets.
165176
166177
Returns:
167178
pandas.DataFrame: A DataFrame instance.
@@ -184,14 +195,19 @@ def for_items(
184195
if not auto_index or not has_measures or not has_attributes:
185196
columns: ColumnsDef = {**resolved_attr_cols, **resolved_measure_cols}
186197

187-
return self.not_indexed(columns=columns, filter_by=filter_by)
198+
return self.not_indexed(
199+
columns=columns,
200+
filter_by=filter_by,
201+
result_page_len=result_page_len,
202+
)
188203

189204
return self.indexed(
190205
index_by=resolved_attr_cols,
191206
columns=resolved_measure_cols,
192207
filter_by=filter_by,
193208
on_execution_submitted=on_execution_submitted,
194209
is_cancellable=is_cancellable,
210+
result_page_len=result_page_len,
195211
)
196212

197213
def for_visualization(
@@ -200,6 +216,7 @@ def for_visualization(
200216
auto_index: bool = True,
201217
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
202218
is_cancellable: bool = False,
219+
result_page_len: Optional[int] = None,
203220
) -> pandas.DataFrame:
204221
"""
205222
Creates a data frame with columns based on the content of the visualization with the provided identifier.
@@ -211,6 +228,8 @@ def for_visualization(
211228
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
212229
submitted to the backend.
213230
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
231+
result_page_len (Optional[int]): Optional page size for result pagination.
232+
Defaults to 1000. Larger values can improve performance for large result sets.
214233
215234
Returns:
216235
pandas.DataFrame: A DataFrame instance.
@@ -231,6 +250,7 @@ def for_visualization(
231250
auto_index=auto_index,
232251
on_execution_submitted=on_execution_submitted,
233252
is_cancellable=is_cancellable,
253+
result_page_len=result_page_len,
234254
)
235255

236256
def for_created_visualization(

packages/gooddata-pandas/src/gooddata_pandas/series.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def indexed(
3030
filter_by: Optional[Union[Filter, list[Filter]]] = None,
3131
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
3232
is_cancellable: bool = False,
33+
result_page_len: Optional[int] = None,
3334
) -> pandas.Series:
3435
"""Creates pandas Series from data points calculated from a single `data_by`.
3536
@@ -68,6 +69,9 @@ def indexed(
6869
6970
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
7071
72+
result_page_len (Optional[int]): Optional page size for result pagination.
73+
Defaults to 1000. Larger values can improve performance for large result sets.
74+
7175
Returns:
7276
pandas.Series: pandas series instance
7377
"""
@@ -80,6 +84,7 @@ def indexed(
8084
filter_by=filter_by,
8185
on_execution_submitted=on_execution_submitted,
8286
is_cancellable=is_cancellable,
87+
result_page_len=result_page_len,
8388
)
8489

8590
_idx = make_pandas_index(index)
@@ -93,6 +98,7 @@ def not_indexed(
9398
filter_by: Optional[Union[Filter, list[Filter]]] = None,
9499
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
95100
is_cancellable: bool = False,
101+
result_page_len: Optional[int] = None,
96102
) -> pandas.Series:
97103
"""
98104
Creates a pandas.Series from data points calculated from a single `data_by` without constructing an index.
@@ -122,6 +128,8 @@ def not_indexed(
122128
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
123129
submitted to the backend.
124130
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
131+
result_page_len (Optional[int]): Optional page size for result pagination.
132+
Defaults to 1000. Larger values can improve performance for large result sets.
125133
126134
Returns:
127135
pandas.Series: The resulting pandas Series instance.
@@ -140,6 +148,7 @@ def not_indexed(
140148
filter_by=filter_by,
141149
on_execution_submitted=on_execution_submitted,
142150
is_cancellable=is_cancellable,
151+
result_page_len=result_page_len,
143152
)
144153

145154
return pandas.Series(data=data["_series"])

0 commit comments

Comments
 (0)