88import duckdb
99
1010pa = pytest .importorskip ("pyarrow" )
11+ pd = pytest .importorskip ("pyarrow.dataset" )
12+ pa_lib = pytest .importorskip ("pyarrow.lib" )
1113pq = pytest .importorskip ("pyarrow.parquet" )
12- ds = pytest .importorskip ("pyarrow.dataset" )
1314np = pytest .importorskip ("numpy" )
1415re = pytest .importorskip ("re" )
1516
@@ -26,7 +27,7 @@ def create_pyarrow_table(rel):
2627
2728def create_pyarrow_dataset (rel ):
2829 table = create_pyarrow_table (rel )
29- return ds .dataset (table )
30+ return pd .dataset (table )
3031
3132
3233def test_decimal_filter_pushdown (duckdb_cursor ):
@@ -549,7 +550,7 @@ def test_9371(self, duckdb_cursor, tmp_path):
549550 df = df .set_index ("ts" ) # SET INDEX! (It all works correctly when the index is not set)
550551 df .to_parquet (str (file_path ))
551552
552- my_arrow_dataset = ds .dataset (str (file_path ))
553+ my_arrow_dataset = pd .dataset (str (file_path ))
553554 res = duckdb_cursor .execute ("SELECT * FROM my_arrow_dataset WHERE ts = ?" , parameters = [dt ]).fetch_arrow_table ()
554555 output = duckdb_cursor .sql ("select * from res" ).fetchall ()
555556 expected = [(1 , dt ), (2 , dt ), (3 , dt )]
@@ -1018,3 +1019,40 @@ def test_dynamic_filter(self, duckdb_cursor):
10181019 duckdb_cursor .register ("t" , t )
10191020 res = duckdb_cursor .sql ("SELECT a FROM t ORDER BY a LIMIT 11" ).fetchall ()
10201021 assert len (res ) == 11
1022+
1023+ def test_binary_view_filter (self , duckdb_cursor ):
1024+ """Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
1025+ table = pa .table ({"col" : pa .array ([b"abc" , b"efg" ], type = pa .binary_view ())})
1026+ dset = pd .dataset (table )
1027+ res = duckdb_cursor .sql ("select * from dset where col = 'abc'::binary" )
1028+ assert len (res ) == 1
1029+
1030+ def test_string_view_filter (self , duckdb_cursor ):
1031+ """Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
1032+ table = pa .table ({"col" : pa .array (["abc" , "efg" ], type = pa .string_view ())})
1033+ dset = pd .dataset (table )
1034+ res = duckdb_cursor .sql ("select * from dset where col = 'abc'" )
1035+ assert len (res ) == 1
1036+
1037+ @pytest .mark .xfail (raises = pa_lib .ArrowNotImplementedError )
1038+ def test_canary_for_pyarrow_string_view_filter_support (self , duckdb_cursor ):
1039+ """This canary will xpass when pyarrow implements string view filter support."""
1040+ # predicate: field == "string value"
1041+ filter_expr = pd .field ("col" ) == pd .scalar ("val1" )
1042+ # dataset with a string view column
1043+ table = pa .table ({"col" : pa .array (["val1" , "val2" ], type = pa .string_view ())})
1044+ dset = pd .dataset (table )
1045+ # creating the scanner fails
1046+ dset .scanner (columns = ["col" ], filter = filter_expr )
1047+
1048+ @pytest .mark .xfail (raises = pa_lib .ArrowNotImplementedError )
1049+ def test_canary_for_pyarrow_binary_view_filter_support (self , duckdb_cursor ):
1050+ """This canary will xpass when pyarrow implements binary view filter support."""
1051+ # predicate: field == const
1052+ const = pd .scalar (pa .scalar (b"bin1" , pa .binary_view ()))
1053+ filter_expr = pd .field ("col" ) == const
1054+ # dataset with a string view column
1055+ table = pa .table ({"col" : pa .array ([b"bin1" , b"bin2" ], type = pa .binary_view ())})
1056+ dset = pd .dataset (table )
1057+ # creating the scanner fails
1058+ dset .scanner (columns = ["col" ], filter = filter_expr )
0 commit comments