Skip to content

Commit 07d2718

Browse files
committed
UNPICK changes to review
1 parent 13c484e commit 07d2718

File tree

18 files changed

+156
-485
lines changed

18 files changed

+156
-485
lines changed

docs/source/contributor-guide/ffi.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ as performant as possible and to utilize the features of DataFusion, you may dec
3434
your source in Rust and then expose it through `PyO3 <https://pyo3.rs>`_ as a Python library.
3535

3636
At first glance, it may appear the best way to do this is to add the ``datafusion-python``
37-
crate as a dependency, produce a DataFusion table in Rust, and then register it with the
37+
crate as a dependency, provide a ``PyTable``, and then to register it with the
3838
``SessionContext``. Unfortunately, this will not work.
3939

4040
When you produce your code as a Python library and it needs to interact with the DataFusion

docs/source/user-guide/data-sources.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,9 @@ as Delta Lake. This will require a recent version of
152152
.. code-block:: python
153153
154154
from deltalake import DeltaTable
155-
from datafusion import TableProvider
156155
157156
delta_table = DeltaTable("path_to_table")
158-
provider = TableProvider.from_capsule(delta_table.__datafusion_table_provider__())
159-
ctx.register_table("my_delta_table", provider)
157+
ctx.register_table_provider("my_delta_table", delta_table)
160158
df = ctx.table("my_delta_table")
161159
df.show()
162160

docs/source/user-guide/io/table_provider.rst

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -39,40 +39,20 @@ A complete example can be found in the `examples folder <https://github.com/apac
3939
) -> PyResult<Bound<'py, PyCapsule>> {
4040
let name = CString::new("datafusion_table_provider").unwrap();
4141
42-
let provider = Arc::new(self.clone());
43-
let provider = FFI_TableProvider::new(provider, false, None);
42+
let provider = Arc::new(self.clone())
43+
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
44+
let provider = FFI_TableProvider::new(Arc::new(provider), false);
4445
4546
PyCapsule::new_bound(py, provider, Some(name.clone()))
4647
}
4748
}
4849
49-
Once you have this library available, you can construct a
50-
:py:class:`~datafusion.TableProvider` in Python and register it with the
51-
``SessionContext``. Table providers can be created either from the PyCapsule exposed by
52-
your Rust provider or from an existing :py:class:`~datafusion.dataframe.DataFrame`.
53-
Call the provider's ``__datafusion_table_provider__()`` method to obtain the capsule
54-
before constructing a ``TableProvider``. The ``TableProvider.from_view()`` helper is
55-
deprecated; instead use ``TableProvider.from_dataframe()`` or ``DataFrame.into_view()``.
50+
Once you have this library available, in python you can register your table provider
51+
to the ``SessionContext``.
5652

5753
.. code-block:: python
5854
59-
from datafusion import SessionContext, TableProvider
60-
61-
ctx = SessionContext()
6255
provider = MyTableProvider()
56+
ctx.register_table_provider("my_table", provider)
6357
64-
capsule = provider.__datafusion_table_provider__()
65-
capsule_provider = TableProvider.from_capsule(capsule)
66-
67-
df = ctx.from_pydict({"a": [1]})
68-
view_provider = TableProvider.from_dataframe(df)
69-
# or: view_provider = df.into_view()
70-
71-
ctx.register_table("capsule_table", capsule_provider)
72-
ctx.register_table("view_table", view_provider)
73-
74-
ctx.table("capsule_table").show()
75-
ctx.table("view_table").show()
76-
77-
Both ``TableProvider.from_capsule()`` and ``TableProvider.from_dataframe()`` create
78-
table providers that can be registered with the SessionContext using ``register_table()``.
58+
ctx.table("my_table").show()

examples/datafusion-ffi-example/python/tests/_test_table_function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_ffi_table_function_call_directly():
5353
table_udtf = udtf(table_func, "my_table_func")
5454

5555
my_table = table_udtf()
56-
ctx.register_table("t", my_table)
56+
ctx.register_table_provider("t", my_table)
5757
result = ctx.table("t").collect()
5858

5959
assert len(result) == 2

examples/datafusion-ffi-example/python/tests/_test_table_provider.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,14 @@
1818
from __future__ import annotations
1919

2020
import pyarrow as pa
21-
from datafusion import SessionContext, TableProvider
21+
from datafusion import SessionContext
2222
from datafusion_ffi_example import MyTableProvider
2323

2424

2525
def test_table_loading():
2626
ctx = SessionContext()
2727
table = MyTableProvider(3, 2, 4)
28-
ctx.register_table(
29-
"t", TableProvider.from_capsule(table.__datafusion_table_provider__())
30-
)
28+
ctx.register_table_provider("t", table)
3129
result = ctx.table("t").collect()
3230

3331
assert len(result) == 4

python/datafusion/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
from .io import read_avro, read_csv, read_json, read_parquet
5555
from .plan import ExecutionPlan, LogicalPlan
5656
from .record_batch import RecordBatch, RecordBatchStream
57-
from .table_provider import TableProvider
5857
from .user_defined import (
5958
Accumulator,
6059
AggregateUDF,
@@ -91,7 +90,6 @@
9190
"SessionContext",
9291
"Table",
9392
"TableFunction",
94-
"TableProvider",
9593
"WindowFrame",
9694
"WindowUDF",
9795
"catalog",

python/datafusion/catalog.py

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
if TYPE_CHECKING:
2828
import pyarrow as pa
2929

30-
from datafusion import TableProvider
31-
from datafusion.context import TableProviderExportable
32-
3330
try:
3431
from warnings import deprecated # Python 3.13+
3532
except ImportError:
@@ -85,11 +82,7 @@ def database(self, name: str = "public") -> Schema:
8582
"""Returns the database with the given ``name`` from this catalog."""
8683
return self.schema(name)
8784

88-
def register_schema(
89-
self,
90-
name: str,
91-
schema: Schema | SchemaProvider | SchemaProviderExportable,
92-
) -> Schema | None:
85+
def register_schema(self, name, schema) -> Schema | None:
9386
"""Register a schema with this catalog."""
9487
if isinstance(schema, Schema):
9588
return self.catalog.register_schema(name, schema._raw_schema)
@@ -129,14 +122,8 @@ def table(self, name: str) -> Table:
129122
"""Return the table with the given ``name`` from this schema."""
130123
return Table(self._raw_schema.table(name))
131124

132-
def register_table(
133-
self, name: str, table: Table | TableProvider | TableProviderExportable
134-
) -> None:
135-
"""Register a table or table provider in this schema.
136-
137-
Objects implementing ``__datafusion_table_provider__`` are also supported
138-
and treated as :class:`TableProvider` instances.
139-
"""
125+
def register_table(self, name, table) -> None:
126+
"""Register a table provider in this schema."""
140127
if isinstance(table, Table):
141128
return self._raw_schema.register_table(name, table.table)
142129
return self._raw_schema.register_table(name, table)
@@ -232,19 +219,14 @@ def table(self, name: str) -> Table | None:
232219
"""Retrieve a specific table from this schema."""
233220
...
234221

235-
def register_table( # noqa: B027
236-
self, name: str, table: Table | TableProvider | TableProviderExportable
237-
) -> None:
238-
"""Add a table to this schema.
222+
def register_table(self, name: str, table: Table) -> None: # noqa: B027
223+
"""Add a table from this schema.
239224
240225
This method is optional. If your schema provides a fixed list of tables, you do
241226
not need to implement this method.
242-
243-
Objects implementing ``__datafusion_table_provider__`` are also supported
244-
and treated as :class:`TableProvider` instances.
245227
"""
246228

247-
def deregister_table(self, name: str, cascade: bool) -> None: # noqa: B027
229+
def deregister_table(self, name, cascade: bool) -> None: # noqa: B027
248230
"""Remove a table from this schema.
249231
250232
This method is optional. If your schema provides a fixed list of tables, you do

python/datafusion/context.py

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
import pandas as pd
4747
import polars as pl
4848

49-
from datafusion import TableProvider
5049
from datafusion.plan import ExecutionPlan, LogicalPlan
5150

5251

@@ -735,7 +734,7 @@ def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame:
735734
# https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
736735
# is the discussion on how we arrived at adding register_view
737736
def register_view(self, name: str, df: DataFrame) -> None:
738-
"""Register a :py:class:`~datafusion.dataframe.DataFrame` as a view.
737+
"""Register a :py:class: `~datafusion.detaframe.DataFrame` as a view.
739738
740739
Args:
741740
name (str): The name to register the view under.
@@ -744,31 +743,16 @@ def register_view(self, name: str, df: DataFrame) -> None:
744743
view = df.into_view()
745744
self.ctx.register_table(name, view)
746745

747-
def register_table(
748-
self, name: str, table: Table | TableProvider | TableProviderExportable
749-
) -> None:
750-
"""Register a :py:class:`~datafusion.catalog.Table` or ``TableProvider``.
751-
752-
The registered table can be referenced from SQL statements executed against
753-
this context.
754-
755-
Plain :py:class:`~datafusion.dataframe.DataFrame` objects are not supported;
756-
convert them first with :meth:`datafusion.dataframe.DataFrame.into_view` or
757-
:meth:`datafusion.catalog.TableProvider.from_dataframe`.
746+
def register_table(self, name: str, table: Table) -> None:
747+
"""Register a :py:class: `~datafusion.catalog.Table` as a table.
758748
759-
Objects implementing ``__datafusion_table_provider__`` are also supported
760-
and treated as :class:`~datafusion.catalog.TableProvider` instances.
749+
The registered table can be referenced from SQL statement executed against.
761750
762751
Args:
763752
name: Name of the resultant table.
764-
table: DataFusion :class:`Table`, :class:`TableProvider`, or any object
765-
implementing ``__datafusion_table_provider__`` to add to the session
766-
context.
753+
table: DataFusion table to add to the session context.
767754
"""
768-
if isinstance(table, Table):
769-
self.ctx.register_table(name, table.table)
770-
else:
771-
self.ctx.register_table(name, table)
755+
self.ctx.register_table(name, table.table)
772756

773757
def deregister_table(self, name: str) -> None:
774758
"""Remove a table from the session."""
@@ -788,21 +772,14 @@ def register_catalog_provider(
788772
self.ctx.register_catalog_provider(name, provider)
789773

790774
def register_table_provider(
791-
self, name: str, provider: Table | TableProvider | TableProviderExportable
775+
self, name: str, provider: TableProviderExportable
792776
) -> None:
793777
"""Register a table provider.
794778
795-
Deprecated: use :meth:`register_table` instead.
796-
797-
Objects implementing ``__datafusion_table_provider__`` are also supported
798-
and treated as :class:`~datafusion.catalog.TableProvider` instances.
779+
This table provider must have a method called ``__datafusion_table_provider__``
780+
which returns a PyCapsule that exposes a ``FFI_TableProvider``.
799781
"""
800-
warnings.warn(
801-
"register_table_provider is deprecated; use register_table",
802-
DeprecationWarning,
803-
stacklevel=2,
804-
)
805-
self.register_table(name, provider)
782+
self.ctx.register_table_provider(name, provider)
806783

807784
def register_udtf(self, func: TableFunction) -> None:
808785
"""Register a user defined table function."""

python/datafusion/dataframe.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
import pyarrow as pa
5454

5555
from datafusion._internal import expr as expr_internal
56-
from datafusion.table_provider import TableProvider
5756

5857
from enum import Enum
5958

@@ -308,17 +307,9 @@ def __init__(self, df: DataFrameInternal) -> None:
308307
"""
309308
self.df = df
310309

311-
def into_view(self) -> TableProvider:
312-
"""Convert ``DataFrame`` into a ``TableProvider`` view for registration.
313-
314-
This is the preferred way to obtain a view for
315-
:py:meth:`~datafusion.context.SessionContext.register_table`.
316-
``TableProvider.from_dataframe`` calls this method under the hood,
317-
and the older ``TableProvider.from_view`` helper is deprecated.
318-
"""
319-
from datafusion.table_provider import TableProvider as _TableProvider
320-
321-
return _TableProvider(self.df.into_view())
310+
def into_view(self) -> pa.Table:
311+
"""Convert DataFrame as a ViewTable which can be used in register_table."""
312+
return self.df.into_view()
322313

323314
def __getitem__(self, key: str | list[str]) -> DataFrame:
324315
"""Return a new :py:class`DataFrame` with the specified column or columns.

python/datafusion/table_provider.py

Lines changed: 0 additions & 104 deletions
This file was deleted.

0 commit comments

Comments
 (0)