Skip to content

Commit 75184e2

Browse files
tests for string and binary views (#221)
Ref: #187 Land only after duckdb/duckdb#20165 is merged Adds tests for arrow string and binary view filters
2 parents 5005219 + 0a20de1 commit 75184e2

File tree

1 file changed

+41
-3
lines changed

1 file changed

+41
-3
lines changed

tests/fast/arrow/test_filter_pushdown.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
import duckdb
99

1010
pa = pytest.importorskip("pyarrow")
11+
pd = pytest.importorskip("pyarrow.dataset")
12+
pa_lib = pytest.importorskip("pyarrow.lib")
1113
pq = pytest.importorskip("pyarrow.parquet")
12-
ds = pytest.importorskip("pyarrow.dataset")
1314
np = pytest.importorskip("numpy")
1415
re = pytest.importorskip("re")
1516

@@ -26,7 +27,7 @@ def create_pyarrow_table(rel):
2627

2728
def create_pyarrow_dataset(rel):
2829
table = create_pyarrow_table(rel)
29-
return ds.dataset(table)
30+
return pd.dataset(table)
3031

3132

3233
def test_decimal_filter_pushdown(duckdb_cursor):
@@ -549,7 +550,7 @@ def test_9371(self, duckdb_cursor, tmp_path):
549550
df = df.set_index("ts") # SET INDEX! (It all works correctly when the index is not set)
550551
df.to_parquet(str(file_path))
551552

552-
my_arrow_dataset = ds.dataset(str(file_path))
553+
my_arrow_dataset = pd.dataset(str(file_path))
553554
res = duckdb_cursor.execute("SELECT * FROM my_arrow_dataset WHERE ts = ?", parameters=[dt]).fetch_arrow_table()
554555
output = duckdb_cursor.sql("select * from res").fetchall()
555556
expected = [(1, dt), (2, dt), (3, dt)]
@@ -1018,3 +1019,40 @@ def test_dynamic_filter(self, duckdb_cursor):
10181019
duckdb_cursor.register("t", t)
10191020
res = duckdb_cursor.sql("SELECT a FROM t ORDER BY a LIMIT 11").fetchall()
10201021
assert len(res) == 11
1022+
1023+
def test_binary_view_filter(self, duckdb_cursor):
1024+
"""Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
1025+
table = pa.table({"col": pa.array([b"abc", b"efg"], type=pa.binary_view())})
1026+
dset = pd.dataset(table)
1027+
res = duckdb_cursor.sql("select * from dset where col = 'abc'::binary")
1028+
assert len(res) == 1
1029+
1030+
def test_string_view_filter(self, duckdb_cursor):
1031+
"""Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
1032+
table = pa.table({"col": pa.array(["abc", "efg"], type=pa.string_view())})
1033+
dset = pd.dataset(table)
1034+
res = duckdb_cursor.sql("select * from dset where col = 'abc'")
1035+
assert len(res) == 1
1036+
1037+
@pytest.mark.xfail(raises=pa_lib.ArrowNotImplementedError)
1038+
def test_canary_for_pyarrow_string_view_filter_support(self, duckdb_cursor):
1039+
"""This canary will xpass when pyarrow implements string view filter support."""
1040+
# predicate: field == "string value"
1041+
filter_expr = pd.field("col") == pd.scalar("val1")
1042+
# dataset with a string view column
1043+
table = pa.table({"col": pa.array(["val1", "val2"], type=pa.string_view())})
1044+
dset = pd.dataset(table)
1045+
# creating the scanner fails
1046+
dset.scanner(columns=["col"], filter=filter_expr)
1047+
1048+
@pytest.mark.xfail(raises=pa_lib.ArrowNotImplementedError)
1049+
def test_canary_for_pyarrow_binary_view_filter_support(self, duckdb_cursor):
1050+
"""This canary will xpass when pyarrow implements binary view filter support."""
1051+
# predicate: field == const
1052+
const = pd.scalar(pa.scalar(b"bin1", pa.binary_view()))
1053+
filter_expr = pd.field("col") == const
1054+
# dataset with a string view column
1055+
table = pa.table({"col": pa.array([b"bin1", b"bin2"], type=pa.binary_view())})
1056+
dset = pd.dataset(table)
1057+
# creating the scanner fails
1058+
dset.scanner(columns=["col"], filter=filter_expr)

0 commit comments

Comments
 (0)