Skip to content

Commit 8b198f5

Browse files
committed
fix positional deletes
1 parent efa91e7 commit 8b198f5

File tree

2 files changed

+19
-16
lines changed

2 files changed

+19
-16
lines changed

dev/provision.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
# Create SparkSession against the remote Spark Connect server
2626
spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
2727

28-
2928
catalogs = {
3029
"rest": load_catalog(
3130
"rest",
@@ -128,10 +127,8 @@
128127
"""
129128
)
130129

131-
spark.sql(
132-
f"""
133-
INSERT INTO {identifier}
134-
VALUES
130+
spark.sql("""
131+
SELECT * FROM VALUES
135132
(CAST('2023-03-01' AS date), 1, 'a'),
136133
(CAST('2023-03-02' AS date), 2, 'b'),
137134
(CAST('2023-03-03' AS date), 3, 'c'),
@@ -143,9 +140,9 @@
143140
(CAST('2023-03-09' AS date), 9, 'i'),
144141
(CAST('2023-03-10' AS date), 10, 'j'),
145142
(CAST('2023-03-11' AS date), 11, 'k'),
146-
(CAST('2023-03-12' AS date), 12, 'l');
147-
"""
148-
)
143+
(CAST('2023-03-12' AS date), 12, 'l')
144+
AS t(dt, number, letter)
145+
""").coalesce(1).writeTo(identifier).append()
149146

150147
spark.sql(f"ALTER TABLE {identifier} CREATE TAG tag_12")
151148

@@ -169,15 +166,13 @@
169166
'write.delete.mode'='merge-on-read',
170167
'write.update.mode'='merge-on-read',
171168
'write.merge.mode'='merge-on-read',
172-
'format-version'='2'
169+
'format-version'='{format_version}'
173170
);
174171
"""
175172
)
176173

177-
spark.sql(
178-
f"""
179-
INSERT INTO {identifier}
180-
VALUES
174+
spark.sql("""
175+
SELECT * FROM VALUES
181176
(CAST('2023-03-01' AS date), 1, 'a'),
182177
(CAST('2023-03-02' AS date), 2, 'b'),
183178
(CAST('2023-03-03' AS date), 3, 'c'),
@@ -189,9 +184,9 @@
189184
(CAST('2023-03-09' AS date), 9, 'i'),
190185
(CAST('2023-03-10' AS date), 10, 'j'),
191186
(CAST('2023-03-11' AS date), 11, 'k'),
192-
(CAST('2023-03-12' AS date), 12, 'l');
193-
"""
194-
)
187+
(CAST('2023-03-12' AS date), 12, 'l')
188+
AS t(dt, number, letter)
189+
""").coalesce(1).writeTo(identifier).append()
195190

196191
# Perform two deletes, should produce:
197192
# v2: two positional delete files in v2

tests/integration/test_reads.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,8 @@ def test_pyarrow_deletes(catalog: Catalog, format_version: int) -> None:
432432
# (11, 'k'),
433433
# (12, 'l')
434434
test_positional_mor_deletes = catalog.load_table(f"default.test_positional_mor_deletes_v{format_version}")
435+
if format_version == 2:
436+
assert len(test_positional_mor_deletes.inspect.delete_files()) > 0, "Table should produce position delete files"
435437
arrow_table = test_positional_mor_deletes.scan().to_arrow()
436438
assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]
437439

@@ -470,6 +472,8 @@ def test_pyarrow_deletes_double(catalog: Catalog, format_version: int) -> None:
470472
# (11, 'k'),
471473
# (12, 'l')
472474
test_positional_mor_double_deletes = catalog.load_table(f"default.test_positional_mor_double_deletes_v{format_version}")
475+
if format_version == 2:
476+
assert len(test_positional_mor_double_deletes.inspect.delete_files()) > 0, "Table should produce position delete files"
473477
arrow_table = test_positional_mor_double_deletes.scan().to_arrow()
474478
assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10, 11, 12]
475479

@@ -508,6 +512,8 @@ def test_pyarrow_batches_deletes(catalog: Catalog, format_version: int) -> None:
508512
# (11, 'k'),
509513
# (12, 'l')
510514
test_positional_mor_deletes = catalog.load_table(f"default.test_positional_mor_deletes_v{format_version}")
515+
if format_version == 2:
516+
assert len(test_positional_mor_deletes.inspect.delete_files()) > 0, "Table should produce position delete files"
511517
arrow_table = test_positional_mor_deletes.scan().to_arrow_batch_reader().read_all()
512518
assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]
513519

@@ -550,6 +556,8 @@ def test_pyarrow_batches_deletes_double(catalog: Catalog, format_version: int) -
550556
# (11, 'k'),
551557
# (12, 'l')
552558
test_positional_mor_double_deletes = catalog.load_table(f"default.test_positional_mor_double_deletes_v{format_version}")
559+
if format_version == 2:
560+
assert len(test_positional_mor_double_deletes.inspect.delete_files()) > 0, "Table should produce position delete files"
553561
arrow_table = test_positional_mor_double_deletes.scan().to_arrow_batch_reader().read_all()
554562
assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10, 11, 12]
555563

0 commit comments

Comments
 (0)