Skip to content

Commit 03092ed

Browse files
timsaucerclaude
andcommitted
Add examples to docstrings for new register/read methods
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4a2d7ba commit 03092ed

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import datafusion as dfn
2121
import numpy as np
22+
import pyarrow as pa
2223
import pytest
2324
from datafusion import col, lit
2425
from datafusion import functions as F
@@ -29,6 +30,8 @@ def _doctest_namespace(doctest_namespace: dict) -> None:
2930
"""Add common imports to the doctest namespace."""
3031
doctest_namespace["dfn"] = dfn
3132
doctest_namespace["np"] = np
33+
doctest_namespace["pa"] = pa
3234
doctest_namespace["col"] = col
3335
doctest_namespace["lit"] = lit
3436
doctest_namespace["F"] = F
37+
doctest_namespace["ctx"] = dfn.SessionContext()

python/datafusion/context.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,17 @@ def register_batch(self, name: str, batch: pa.RecordBatch) -> None:
900900
Args:
901901
name: Name of the resultant table.
902902
batch: Record batch to register as a table.
903+
904+
Examples:
905+
>>> batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3]})
906+
>>> ctx.register_batch("batch_tbl", batch)
907+
>>> ctx.sql("SELECT * FROM batch_tbl").collect()[0].column(0)
908+
<pyarrow.lib.Int64Array object at ...>
909+
[
910+
1,
911+
2,
912+
3
913+
]
903914
"""
904915
self.ctx.register_batch(name, batch)
905916

@@ -1120,6 +1131,22 @@ def register_arrow(
11201131
schema: The data source schema.
11211132
file_extension: File extension to select.
11221133
table_partition_cols: Partition columns.
1134+
1135+
Examples:
1136+
>>> import tempfile, os
1137+
>>> table = pa.table({"x": [10, 20, 30]})
1138+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1139+
... path = os.path.join(tmpdir, "data.arrow")
1140+
... with pa.ipc.new_file(path, table.schema) as writer:
1141+
... writer.write_table(table)
1142+
... ctx.register_arrow("arrow_tbl", path)
1143+
... ctx.sql("SELECT * FROM arrow_tbl").collect()[0].column(0)
1144+
<pyarrow.lib.Int64Array object at ...>
1145+
[
1146+
10,
1147+
20,
1148+
30
1149+
]
11231150
"""
11241151
if table_partition_cols is None:
11251152
table_partition_cols = []
@@ -1381,6 +1408,22 @@ def read_arrow(
13811408
13821409
Returns:
13831410
DataFrame representation of the read Arrow IPC file.
1411+
1412+
Examples:
1413+
>>> import tempfile, os
1414+
>>> table = pa.table({"a": [1, 2, 3]})
1415+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1416+
... path = os.path.join(tmpdir, "data.arrow")
1417+
... with pa.ipc.new_file(path, table.schema) as writer:
1418+
... writer.write_table(table)
1419+
... df = ctx.read_arrow(path)
1420+
... df.collect()[0].column(0)
1421+
<pyarrow.lib.Int64Array object at ...>
1422+
[
1423+
1,
1424+
2,
1425+
3
1426+
]
13841427
"""
13851428
if file_partition_cols is None:
13861429
file_partition_cols = []
@@ -1396,6 +1439,14 @@ def read_empty(self) -> DataFrame:
13961439
13971440
Returns:
13981441
An empty DataFrame.
1442+
1443+
Examples:
1444+
>>> df = ctx.read_empty()
1445+
>>> result = df.collect()
1446+
>>> len(result)
1447+
1
1448+
>>> result[0].num_columns
1449+
0
13991450
"""
14001451
return self.empty_table()
14011452

0 commit comments

Comments
 (0)