|
20 | 20 | import re |
21 | 21 | import threading |
22 | 22 | import time |
| 23 | +import tracemalloc |
23 | 24 | from typing import Any |
24 | 25 |
|
25 | 26 | import pyarrow as pa |
@@ -252,13 +253,6 @@ def test_filter(df): |
252 | 253 | assert result.column(2) == pa.array([5]) |
253 | 254 |
|
254 | 255 |
|
255 | | -def test_show_empty(df, capsys): |
256 | | - df_empty = df.filter(column("a") > literal(3)) |
257 | | - df_empty.show() |
258 | | - captured = capsys.readouterr() |
259 | | - assert "DataFrame has no rows" in captured.out |
260 | | - |
261 | | - |
262 | 256 | def test_sort(df): |
263 | 257 | df = df.sort(column("b").sort(ascending=False)) |
264 | 258 |
|
@@ -1390,6 +1384,27 @@ def test_collect_partitioned(): |
1390 | 1384 | assert [[batch]] == ctx.create_dataframe([[batch]]).collect_partitioned() |
1391 | 1385 |
|
1392 | 1386 |
|
| 1387 | +def test_collect_multiple_batches_to_pyarrow(): |
| 1388 | + ctx = SessionContext() |
| 1389 | + |
| 1390 | + batch1 = pa.RecordBatch.from_arrays( |
| 1391 | + [pa.array([1, 2])], |
| 1392 | + names=["a"], |
| 1393 | + ) |
| 1394 | + batch2 = pa.RecordBatch.from_arrays( |
| 1395 | + [pa.array([3, 4])], |
| 1396 | + names=["a"], |
| 1397 | + ) |
| 1398 | + |
| 1399 | + df = ctx.create_dataframe([[batch1], [batch2]]) |
| 1400 | + |
| 1401 | + batches = df.collect() |
| 1402 | + |
| 1403 | + assert len(batches) == 2 |
| 1404 | + table = pa.Table.from_batches(batches) |
| 1405 | + assert table.column("a").to_pylist() == [1, 2, 3, 4] |
| 1406 | + |
| 1407 | + |
1393 | 1408 | def test_union(ctx): |
1394 | 1409 | batch = pa.RecordBatch.from_arrays( |
1395 | 1410 | [pa.array([1, 2, 3]), pa.array([4, 5, 6])], |
@@ -1470,6 +1485,24 @@ def test_empty_to_pandas(df): |
1470 | 1485 | assert set(pandas_df.columns) == {"a", "b", "c"} |
1471 | 1486 |
|
1472 | 1487 |
|
| 1488 | +def test_show_no_batches(capsys): |
| 1489 | + """Ensure showing a query with no batches still prints headers.""" |
| 1490 | + ctx = SessionContext() |
| 1491 | + df = ctx.sql("SELECT 1 AS a WHERE 1=0") |
| 1492 | + df.show() |
| 1493 | + captured = capsys.readouterr() |
| 1494 | + assert "| a |" in captured.out |
| 1495 | + assert "Empty DataFrame" not in captured.out |
| 1496 | + |
| 1497 | + |
| 1498 | +def test_show_empty_dataframe(df, capsys): |
| 1499 | + """Ensure showing an empty DataFrame prints a helpful message.""" |
| 1500 | + empty_df = df.limit(0) |
| 1501 | + empty_df.show() |
| 1502 | + captured = capsys.readouterr() |
| 1503 | + assert "Empty DataFrame" in captured.out |
| 1504 | + |
| 1505 | + |
1473 | 1506 | def test_to_polars(df): |
1474 | 1507 | # Skip test if polars is not installed |
1475 | 1508 | pl = pytest.importorskip("polars") |
@@ -1574,6 +1607,23 @@ async def test_execute_stream_partitioned_async(df): |
1574 | 1607 | assert not remaining_batches |
1575 | 1608 |
|
1576 | 1609 |
|
| 1610 | +def test_arrow_c_stream_streaming(large_df): |
| 1611 | + df = large_df.repartition(4) |
| 1612 | + capsule = df.__arrow_c_stream__() |
| 1613 | + ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p |
| 1614 | + ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_char_p] |
| 1615 | + ptr = ctypes.pythonapi.PyCapsule_GetPointer(capsule, b"arrow_array_stream") |
| 1616 | + reader = pa.RecordBatchReader._import_from_c(ptr) |
| 1617 | + |
| 1618 | + tracemalloc.start() |
| 1619 | + batch_count = sum(1 for _ in reader) |
| 1620 | + _current, peak = tracemalloc.get_traced_memory() |
| 1621 | + tracemalloc.stop() |
| 1622 | + |
| 1623 | + assert batch_count > 1 |
| 1624 | + assert peak < 50 * MB |
| 1625 | + |
| 1626 | + |
1577 | 1627 | def test_empty_to_arrow_table(df): |
1578 | 1628 | # Convert empty datafusion dataframe to pyarrow Table |
1579 | 1629 | pyarrow_table = df.limit(0).to_arrow_table() |
@@ -2664,19 +2714,3 @@ def trigger_interrupt(): |
2664 | 2714 |
|
2665 | 2715 | # Make sure the interrupt thread has finished |
2666 | 2716 | interrupt_thread.join(timeout=1.0) |
2667 | | - |
2668 | | - |
2669 | | -def test_show_select_where_no_rows(capsys) -> None: |
2670 | | - ctx = SessionContext() |
2671 | | - df = ctx.sql("SELECT 1 WHERE 1=0") |
2672 | | - df.show() |
2673 | | - out = capsys.readouterr().out |
2674 | | - assert "DataFrame has no rows" in out |
2675 | | - |
2676 | | - |
2677 | | -def test_show_from_empty_batch(capsys) -> None: |
2678 | | - ctx = SessionContext() |
2679 | | - batch = pa.record_batch([pa.array([], type=pa.int32())], names=["a"]) |
2680 | | - ctx.create_dataframe([[batch]]).show() |
2681 | | - out = capsys.readouterr().out |
2682 | | - assert "| a |" in out |
|
0 commit comments