Skip to content

Commit fb95038

Browse files
committed
Bug: HiveCatalog's _commit_table refresh and update the metadata within transaction (#607)
* make refresh and update metadata in a transaction * fix integration tests
1 parent 8d52993 commit fb95038

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

pyiceberg/catalog/hive.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -369,22 +369,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
369369
identifier_tuple = self.identifier_to_tuple_without_catalog(
370370
tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
371371
)
372-
current_table = self.load_table(identifier_tuple)
373372
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
374-
base_metadata = current_table.metadata
375-
for requirement in table_request.requirements:
376-
requirement.validate(base_metadata)
377-
378-
updated_metadata = update_table_metadata(base_metadata, table_request.updates)
379-
if updated_metadata == base_metadata:
380-
# no changes, do nothing
381-
return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location)
382-
383-
# write new metadata
384-
new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1
385-
new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version)
386-
self._write_metadata(updated_metadata, current_table.io, new_metadata_location)
387-
388373
# commit to hive
389374
# https://github.com/apache/hive/blob/master/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift#L1232
390375
with self._client as open_client:
@@ -394,11 +379,28 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
394379
if lock.state != LockState.ACQUIRED:
395380
raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}")
396381

397-
tbl = open_client.get_table(dbname=database_name, tbl_name=table_name)
398-
tbl.parameters = _construct_parameters(
382+
hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name)
383+
io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location)
384+
current_table = self._convert_hive_into_iceberg(hive_table, io)
385+
386+
base_metadata = current_table.metadata
387+
for requirement in table_request.requirements:
388+
requirement.validate(base_metadata)
389+
390+
updated_metadata = update_table_metadata(base_metadata, table_request.updates)
391+
if updated_metadata == base_metadata:
392+
# no changes, do nothing
393+
return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location)
394+
395+
# write new metadata
396+
new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1
397+
new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version)
398+
self._write_metadata(updated_metadata, current_table.io, new_metadata_location)
399+
400+
hive_table.parameters = _construct_parameters(
399401
metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location
400402
)
401-
open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=tbl)
403+
open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table)
402404
except NoSuchObjectException as e:
403405
raise NoSuchTableError(f"Table does not exist: {table_name}") from e
404406
finally:

0 commit comments

Comments
 (0)