kosiew
diff --git a/‎docs/source/conf.py‎
Lines changed: 0 additions & 8 deletions b/‎docs/source/conf.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎docs/source/contributor-guide/ffi.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/contributor-guide/ffi.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 35 deletions b/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 35 deletions
diff --git a/‎docs/source/user-guide/io/table_provider.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/user-guide/io/table_provider.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/datafusion/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎python/datafusion/__init__.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎python/datafusion/dataframe.py‎
Lines changed: 7 additions & 44 deletions b/‎python/datafusion/dataframe.py‎
Lines changed: 7 additions & 44 deletions
diff --git a/‎python/datafusion/record_batch.py‎
Lines changed: 9 additions & 36 deletions b/‎python/datafusion/record_batch.py‎
Lines changed: 9 additions & 36 deletions
diff --git a/‎python/tests/conftest.py‎
Lines changed: 1 addition & 10 deletions b/‎python/tests/conftest.py‎
Lines changed: 1 addition & 10 deletions
@@ -72,14 +72,6 @@
 suppress_warnings = ["autoapi.python_import_resolution"]
 autoapi_python_class_content = "both"
 autoapi_keep_files = False  # set to True for debugging generated files
-autoapi_options = [
-    "members",
-    "undoc-members",
-    "special-members",
-    "show-inheritance",
-    "show-module-summary",
-    "imported-members",
-]
 
 
 def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa: ARG001
 
@@ -161,8 +161,8 @@ for our provider thusly:
 
 .. code-block:: rust
 
-    let name = pyo3::ffi::c_str!("datafusion_table_provider");
-    let my_capsule = PyCapsule::new_bound(py, provider, Some(name.to_owned()))?;
+    let name = CString::new("datafusion_table_provider")?;
+    let my_capsule = PyCapsule::new_bound(py, provider, Some(name))?;
 
 On the receiving side, turn this pycapsule object into the ``FFI_TableProvider``, which
 can then be turned into a ``ForeignTableProvider`` the associated code is:
 
@@ -145,44 +145,10 @@ To materialize the results of your DataFrame operations:
     
     # Display results
     df.show()                         # Print tabular format to console
-
+    
     # Count rows
     count = df.count()
 
-PyArrow Streaming
------------------
-
-DataFusion DataFrames implement the ``__arrow_c_stream__`` protocol, enabling
-zero-copy streaming into libraries like `PyArrow <https://arrow.apache.org/>`_.
-Earlier versions eagerly converted the entire DataFrame when exporting to
-PyArrow, which could exhaust memory on large datasets. With streaming, batches
-are produced lazily so you can process arbitrarily large results without
-out-of-memory errors.
-
-.. code-block:: python
-
-    import pyarrow as pa
-
-    # Create a PyArrow RecordBatchReader without materializing all batches
-    reader = pa.RecordBatchReader.from_stream(df)
-    for batch in reader:
-        ...  # process each batch as it is produced
-
-Note that streams retain the originating ``SessionContext`` internally, so the
-context can be safely dropped once the stream has been obtained.
-
-DataFrames are also iterable, yielding :class:`datafusion.RecordBatch` objects
-that implement the Arrow C data interface. These batches can be consumed by
-libraries like PyArrow without copying:
-
-.. code-block:: python
-
-    for batch in df:
-        pa_batch = batch.to_pyarrow()  # optional conversion
-        ...  # process each batch as it is produced
-
-See :doc:`../io/arrow` for additional details on the Arrow interface.
-
 HTML Rendering
 --------------
 
 
@@ -37,13 +37,13 @@ A complete example can be found in the `examples folder <https://github.com/apac
             &self,
             py: Python<'py>,
         ) -> PyResult<Bound<'py, PyCapsule>> {
-            let name = pyo3::ffi::c_str!("datafusion_table_provider");
+            let name = CString::new("datafusion_table_provider").unwrap();
 
             let provider = Arc::new(self.clone())
                 .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
             let provider = FFI_TableProvider::new(Arc::new(provider), false);
 
-            PyCapsule::new_bound(py, provider, Some(name.to_owned()))
+            PyCapsule::new_bound(py, provider, Some(name.clone()))
         }
     }
 
 
@@ -53,7 +53,7 @@
 )
 from .io import read_avro, read_csv, read_json, read_parquet
 from .plan import ExecutionPlan, LogicalPlan
-from .record_batch import RecordBatch, RecordBatchStream, to_record_batch_stream
+from .record_batch import RecordBatch, RecordBatchStream
 from .user_defined import (
     Accumulator,
     AggregateUDF,
@@ -107,7 +107,6 @@
     "read_json",
     "read_parquet",
     "substrait",
-    "to_record_batch_stream",
     "udaf",
     "udf",
     "udtf",
 
@@ -25,9 +25,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    AsyncIterator,
     Iterable,
-    Iterator,
     Literal,
     Optional,
     Union,
@@ -44,11 +42,7 @@
 from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 from datafusion.plan import ExecutionPlan, LogicalPlan
-from datafusion.record_batch import (
-    RecordBatch,
-    RecordBatchStream,
-    to_record_batch_stream,
-)
+from datafusion.record_batch import RecordBatchStream
 
 if TYPE_CHECKING:
     import pathlib
@@ -59,7 +53,6 @@
     import pyarrow as pa
 
     from datafusion._internal import expr as expr_internal
-    from datafusion.record_batch import RecordBatch
 
 from enum import Enum
 
@@ -296,9 +289,6 @@ def __init__(
 class DataFrame:
     """Two dimensional table representation of data.
 
-    DataFrame objects are iterable; iterating over a DataFrame yields
-    :class:`pyarrow.RecordBatch` instances lazily.
-
     See :ref:`user_guide_concepts` in the online documentation for more information.
     """
 
@@ -1108,48 +1098,21 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram
         return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls))
 
     def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
-        """Export the DataFrame as an Arrow C Stream.
-
-        The DataFrame is executed using DataFusion's streaming APIs and exposed via
-        Arrow's C Stream interface. Record batches are produced incrementally, so the
-        full result set is never materialized in memory. When ``requested_schema`` is
-        provided, only straightforward projections such as column selection or
-        reordering are applied.
+        """Export an Arrow PyCapsule Stream.
 
-        The returned capsule holds a reference to the originating
-        :class:`SessionContext`, keeping it alive until the stream is fully
-        consumed. The stream is explicitly closed before the context is
-        released, so it is safe to drop the original context after obtaining the
-        stream.
+        This will execute and collect the DataFrame. We will attempt to respect the
+        requested schema, but only trivial transformations will be applied such as only
+        returning the fields listed in the requested schema if their data types match
+        those in the DataFrame.
 
         Args:
             requested_schema: Attempt to provide the DataFrame using this schema.
 
         Returns:
-            Arrow PyCapsule object representing an ``ArrowArrayStream``.
+            Arrow PyCapsule object.
         """
-        # ``DataFrame.__arrow_c_stream__`` in the Rust extension leverages
-        # ``execute_stream_partitioned`` under the hood to stream batches while
-        # preserving the original partition order.
         return self.df.__arrow_c_stream__(requested_schema)
 
-    def __iter__(self) -> Iterator[pa.RecordBatch]:
-        """Iterate over :class:`pyarrow.RecordBatch` objects.
-
-        Results are streamed without materializing the full DataFrame. This
-        implementation delegates to :func:`to_record_batch_stream`, which executes
-        the :class:`DataFrame` and returns a :class:`RecordBatchStream`.
-        """
-        return to_record_batch_stream(self).__iter__()
-
-    def __aiter__(self) -> AsyncIterator[RecordBatch]:
-        """Asynchronously yield record batches from the DataFrame.
-
-        This delegates to :func:`to_record_batch_stream` to obtain a
-        :class:`RecordBatchStream` and returns its asynchronous iterator.
-        """
-        return to_record_batch_stream(self).__aiter__()
-
     def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
         """Apply a function to the current DataFrame which returns another DataFrame.
 
 
@@ -25,13 +25,11 @@
 
 from typing import TYPE_CHECKING
 
-import datafusion._internal as df_internal
-
 if TYPE_CHECKING:
     import pyarrow as pa
     import typing_extensions
 
-    from datafusion.dataframe import DataFrame
+    import datafusion._internal as df_internal
 
 
 class RecordBatch:
@@ -54,28 +52,25 @@ class RecordBatchStream:
 
     These are typically the result of a
     :py:func:`~datafusion.dataframe.DataFrame.execute_stream` operation.
-
-    Call :py:meth:`close` when finished consuming the stream to avoid
-    lingering background tasks.
     """
 
     def __init__(self, record_batch_stream: df_internal.RecordBatchStream) -> None:
         """This constructor is typically not called by the end user."""
         self.rbs = record_batch_stream
 
-    def next(self) -> pa.RecordBatch:
-        """Retrieve the next :py:class:`pa.RecordBatch`."""
+    def next(self) -> RecordBatch:
+        """See :py:func:`__next__` for the iterator function."""
         return next(self)
 
-    async def __anext__(self) -> pa.RecordBatch:
-        """Async iterator returning :py:class:`pa.RecordBatch`."""
+    async def __anext__(self) -> RecordBatch:
+        """Async iterator function."""
         next_batch = await self.rbs.__anext__()
-        return next_batch.to_pyarrow()
+        return RecordBatch(next_batch)
 
-    def __next__(self) -> pa.RecordBatch:
-        """Iterator returning :py:class:`pa.RecordBatch`."""
+    def __next__(self) -> RecordBatch:
+        """Iterator function."""
         next_batch = next(self.rbs)
-        return next_batch.to_pyarrow()
+        return RecordBatch(next_batch)
 
     def __aiter__(self) -> typing_extensions.Self:
         """Async iterator function."""
@@ -84,25 +79,3 @@ def __aiter__(self) -> typing_extensions.Self:
     def __iter__(self) -> typing_extensions.Self:
         """Iterator function."""
         return self
-
-    def close(self) -> None:
-        """Close the stream and release associated resources.
-
-        This drains any remaining batches and allows the underlying
-        :class:`SessionContext` to be released. Call this when you are
-        done consuming the stream to avoid leaving tasks running in the
-        background.
-        """
-        self.rbs.close()
-
-
-def to_record_batch_stream(df: DataFrame) -> RecordBatchStream:
-    """Convert a DataFrame into a RecordBatchStream.
-
-    Args:
-        df: DataFrame to convert.
-
-    Returns:
-        A RecordBatchStream representing the DataFrame.
-    """
-    return df.execute_stream()
@@ -17,7 +17,7 @@
 
 import pyarrow as pa
 import pytest
-from datafusion import DataFrame, SessionContext
+from datafusion import SessionContext
 from pyarrow.csv import write_csv
 
 
@@ -49,12 +49,3 @@ def database(ctx, tmp_path):
         delimiter=",",
         schema_infer_max_records=10,
     )
-
-
-@pytest.fixture
-def fail_collect(monkeypatch):
-    def _fail_collect(self, *args, **kwargs):  # pragma: no cover - failure path
-        msg = "collect should not be called"
-        raise AssertionError(msg)
-
-    monkeypatch.setattr(DataFrame, "collect", _fail_collect)
Original file line number	Diff line number	Diff line change
@@ -37,13 +37,13 @@ A complete example can be found in the `examples folder <https://github.com/apac
`37`	`37`	`&self,`
`38`	`38`	`py: Python<'py>,`
`39`	`39`	`) -> PyResult<Bound<'py, PyCapsule>> {`
`40`		`- let name = pyo3::ffi::c_str!("datafusion_table_provider");`
	`40`	`+ let name = CString::new("datafusion_table_provider").unwrap();`
`41`	`41`
`42`	`42`	`let provider = Arc::new(self.clone())`
`43`	`43`	`.map_err(\|e\| PyRuntimeError::new_err(e.to_string()))?;`
`44`	`44`	`let provider = FFI_TableProvider::new(Arc::new(provider), false);`
`45`	`45`
`46`		`- PyCapsule::new_bound(py, provider, Some(name.to_owned()))`
	`46`	`+ PyCapsule::new_bound(py, provider, Some(name.clone()))`
`47`	`47`	`}`
`48`	`48`	`}`
`49`	`49`