Skip to content

Commit 54d8f64

Browse files
Gayathri Srividya Rajavarapuebyhr
authored andcommitted
REST catalog: implement lazy pagination generators for list_* methods
Replace the collect-then-return approach with proper generator functions that yield results page by page. Extract per-page fetch logic into dedicated helper methods (_fetch_tables_page, _fetch_views_page, _fetch_namespaces_page) decorated with @Retry so authentication retries work correctly per page. Co-authored-by: Yuya Ebihara <ebyhry@gmail.com>
1 parent b79fff7 commit 54d8f64

1 file changed

Lines changed: 31 additions & 36 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,14 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
10381038
return self._response_to_table(self.identifier_to_tuple(identifier), table_response)
10391039

10401040
@retry(**_RETRY_ARGS)
1041+
def _fetch_tables_page(self, url: str, params: dict[str, str]) -> ListTablesResponse:
1042+
response = self._session.get(url, params=params)
1043+
try:
1044+
response.raise_for_status()
1045+
except HTTPError as exc:
1046+
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1047+
return ListTablesResponse.model_validate_json(response.text)
1048+
10411049
@override
10421050
def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]:
10431051
self._check_endpoint(Capability.V1_LIST_TABLES)
@@ -1052,27 +1060,18 @@ def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]:
10521060
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
10531061
params["pageSize"] = str(page_size)
10541062

1055-
tables: list[Identifier] = []
10561063
page_token: str | None = None
10571064

10581065
while True:
10591066
if page_token:
10601067
params["pageToken"] = page_token
1061-
response = self._session.get(url, params=params)
1062-
try:
1063-
response.raise_for_status()
1064-
except HTTPError as exc:
1065-
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1066-
1067-
parsed = ListTablesResponse.model_validate_json(response.text)
1068-
tables.extend([(*table.namespace, table.name) for table in parsed.identifiers])
1068+
parsed = self._fetch_tables_page(url, params)
1069+
yield from [(*table.namespace, table.name) for table in parsed.identifiers]
10691070

10701071
if not parsed.next_page_token:
10711072
break
10721073
page_token = parsed.next_page_token
10731074

1074-
return iter(tables)
1075-
10761075
@retry(**_RETRY_ARGS)
10771076
@override
10781077
def load_table(self, identifier: str | Identifier) -> Table:
@@ -1151,10 +1150,18 @@ def _remove_catalog_name_from_table_request_identifier(self, table_request: Comm
11511150
return table_request
11521151

11531152
@retry(**_RETRY_ARGS)
1153+
def _fetch_views_page(self, url: str, params: dict[str, str]) -> ListViewsResponse:
1154+
response = self._session.get(url, params=params)
1155+
try:
1156+
response.raise_for_status()
1157+
except HTTPError as exc:
1158+
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1159+
return ListViewsResponse.model_validate_json(response.text)
1160+
11541161
@override
11551162
def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]:
11561163
if Capability.V1_LIST_VIEWS not in self._supported_endpoints:
1157-
return []
1164+
return
11581165
namespace_tuple = self._check_valid_namespace_identifier(namespace)
11591166
namespace_concat = self._encode_namespace_path(namespace_tuple)
11601167
url = self.url(Endpoints.list_views, namespace=namespace_concat)
@@ -1166,28 +1173,18 @@ def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]:
11661173
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
11671174
params["pageSize"] = str(page_size)
11681175

1169-
views: list[Identifier] = []
11701176
page_token: str | None = None
11711177

11721178
while True:
11731179
if page_token:
11741180
params["pageToken"] = page_token
1175-
1176-
response = self._session.get(url, params=params)
1177-
try:
1178-
response.raise_for_status()
1179-
except HTTPError as exc:
1180-
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1181-
1182-
parsed = ListViewsResponse.model_validate_json(response.text)
1183-
views.extend([(*view.namespace, view.name) for view in parsed.identifiers])
1181+
parsed = self._fetch_views_page(url, params)
1182+
yield from [(*view.namespace, view.name) for view in parsed.identifiers]
11841183

11851184
if not parsed.next_page_token:
11861185
break
11871186
page_token = parsed.next_page_token
11881187

1189-
return iter(views)
1190-
11911188
@retry(**_RETRY_ARGS)
11921189
@override
11931190
def load_view(self, identifier: str | Identifier) -> View:
@@ -1276,6 +1273,14 @@ def drop_namespace(self, namespace: str | Identifier) -> None:
12761273
_handle_non_200_response(exc, {404: NoSuchNamespaceError, 409: NamespaceNotEmptyError})
12771274

12781275
@retry(**_RETRY_ARGS)
1276+
def _fetch_namespaces_page(self, params: dict[str, str]) -> ListNamespaceResponse:
1277+
response = self._session.get(self.url(Endpoints.list_namespaces), params=params)
1278+
try:
1279+
response.raise_for_status()
1280+
except HTTPError as exc:
1281+
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1282+
return ListNamespaceResponse.model_validate_json(response.text)
1283+
12791284
@override
12801285
def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]:
12811286
self._check_endpoint(Capability.V1_LIST_NAMESPACES)
@@ -1288,30 +1293,20 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifi
12881293
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
12891294
params["pageSize"] = str(page_size)
12901295

1291-
namespaces: list[Identifier] = []
12921296
page_token: str | None = None
12931297

12941298
while True:
12951299
if namespace_tuple:
12961300
params["parent"] = self._encode_namespace_path(namespace_tuple)
12971301
if page_token:
12981302
params["pageToken"] = page_token
1299-
response = self._session.get(self.url(Endpoints.list_namespaces), params=params)
1300-
1301-
try:
1302-
response.raise_for_status()
1303-
except HTTPError as exc:
1304-
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
1305-
1306-
parsed = ListNamespaceResponse.model_validate_json(response.text)
1307-
namespaces.extend(parsed.namespaces)
1303+
parsed = self._fetch_namespaces_page(params)
1304+
yield from parsed.namespaces
13081305

13091306
if not parsed.next_page_token:
13101307
break
13111308
page_token = parsed.next_page_token
13121309

1313-
return iter(namespaces)
1314-
13151310
@retry(**_RETRY_ARGS)
13161311
@override
13171312
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:

0 commit comments

Comments
 (0)