kosiew
diff --git a/‎README.md‎
Lines changed: 0 additions & 7 deletions b/‎README.md‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎docs/mdbook/src/installation.md‎
Lines changed: 0 additions & 11 deletions b/‎docs/mdbook/src/installation.md‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 0 additions & 4 deletions b/‎docs/source/conf.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎docs/source/user-guide/common-operations/functions.rst‎
Lines changed: 0 additions & 4 deletions b/‎docs/source/user-guide/common-operations/functions.rst‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎docs/source/user-guide/data-sources.rst‎
Lines changed: 1 addition & 6 deletions b/‎docs/source/user-guide/data-sources.rst‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 40 deletions b/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 40 deletions
diff --git a/‎docs/source/user-guide/introduction.rst‎
Lines changed: 1 addition & 6 deletions b/‎docs/source/user-guide/introduction.rst‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎python/datafusion/catalog.py‎
Lines changed: 2 additions & 2 deletions b/‎python/datafusion/catalog.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/datafusion/context.py‎
Lines changed: 17 additions & 24 deletions b/‎python/datafusion/context.py‎
Lines changed: 17 additions & 24 deletions
@@ -187,23 +187,16 @@ See [examples](examples/README.md) for more information.
 
 ## How to install
 
-DataFusion works with any library exposing the Arrow PyCapsule interface. If you
-need `pyarrow`, install the optional extra.
-
 ### uv
 
 ```bash
 uv add datafusion
-# or with PyArrow support
-uv add "datafusion[pyarrow]"
 ```
 
 ### Pip
 
 ```bash
 pip install datafusion
-# or with PyArrow support
-pip install "datafusion[pyarrow]"
 # or
 python -m pip install datafusion
 ```
 
@@ -18,13 +18,6 @@
 
 DataFusion is easy to install, just like any other Python library.
 
-DataFusion works with any library exposing the Arrow PyCapsule interface. If
-you rely on `pyarrow`, install the optional extra:
-
-```bash
-uv pip install "datafusion[pyarrow]"
-```
-
 ## Using uv
 
 If you do not yet have a virtual environment, create one:
@@ -43,16 +36,12 @@ Or, to add to a project:
 
 ```bash
 uv add datafusion
-# or with PyArrow support
-uv add "datafusion[pyarrow]"
 ```
 
 ## Using pip
 
 ``` bash
 pip install datafusion
-# or with PyArrow support
-pip install "datafusion[pyarrow]"
 ```
 
 ## uv & JupyterLab setup
 
@@ -72,10 +72,6 @@
 suppress_warnings = ["autoapi.python_import_resolution"]
 autoapi_python_class_content = "both"
 autoapi_keep_files = False  # set to True for debugging generated files
-autoapi_options = ["members", "undoc-members", "special-members"]
-autoapi_member_options = {
-    "special-members": "__iter__,__aiter__,__arrow_c_array__,__arrow_c_stream__"
-}
 
 
 def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa: ARG001
 
@@ -109,10 +109,6 @@ Casting
 
 Casting expressions to different data types using :py:func:`~datafusion.functions.arrow_cast`
 
-DataFusion's :class:`~datafusion.types.DataType` can be constructed from any
-object implementing ``__arrow_c_schema__`` and passed to ``arrow_cast`` without
-requiring :mod:`pyarrow`.
-
 .. ipython:: python
 
     df.select(
 
@@ -158,12 +158,7 @@ as Delta Lake. This will require a recent version of
     df = ctx.table("my_delta_table")
     df.show()
 
-Any Python object that implements the
-``__arrow_c_stream__`` protocol can be registered with
-``register_dataset``. This includes scanners from libraries such as
-``nanoarrow``, ``Polars``, or ``DuckDB``.
-
-On older versions of ``deltalake`` (prior to 0.22) you can use the
+On older versions of ``deltalake`` (prior to 0.22) you can use the 
 `Arrow DataSet <https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Dataset.html>`_
 interface to import to DataFusion, but this does not support features such as filter push down
 which can lead to a significant performance difference.
 
@@ -145,49 +145,10 @@ To materialize the results of your DataFrame operations:
     
     # Display results
     df.show()                         # Print tabular format to console
-
+    
     # Count rows
     count = df.count()
 
-PyArrow Streaming
------------------
-
-DataFusion DataFrames implement the ``__arrow_c_stream__`` protocol, enabling
-zero-copy streaming into libraries like `PyArrow <https://arrow.apache.org/>`_.
-Earlier versions eagerly converted the entire DataFrame when exporting to
-PyArrow, which could exhaust memory on large datasets. With streaming, batches
-are produced lazily so you can process arbitrarily large results without
-out-of-memory errors.
-
-.. code-block:: python
-
-    import pyarrow as pa
-
-    # Create a PyArrow RecordBatchReader without materializing all batches
-    reader = pa.RecordBatchReader._import_from_c_capsule(df.__arrow_c_stream__())
-    for batch in reader:
-        ...  # process each batch as it is produced
-
-DataFrames expose :py:meth:`~datafusion.DataFrame.to_stream`, which returns a
-``RecordBatchStream`` for lazily processing results without materializing them
-all at once:
-
-.. code-block:: python
-
-    stream = df.to_stream()
-    for batch in stream:
-        ...  # process each batch as it is produced
-
-DataFrames themselves are also iterable and delegate to ``to_stream()`` under
-the hood:
-
-.. code-block:: python
-
-    for batch in df:
-        ...  # process each batch as it is produced
-
-See :doc:`../io/arrow` for additional details on the Arrow interface.
-
 HTML Rendering
 --------------
 
 
@@ -26,16 +26,11 @@ DataFusion through various examples and highlight the most effective ways of usi
 Installation
 ------------
 
-DataFusion is a Python library and, as such, can be installed via pip from
-`PyPI <https://pypi.org/project/datafusion>`__. DataFusion works with any
-library exposing the Arrow PyCapsule interface. If you need ``pyarrow``,
-install the optional extra.
+DataFusion is a Python library and, as such, can be installed via pip from `PyPI <https://pypi.org/project/datafusion>`__.
 
 .. code-block:: shell
 
     pip install datafusion
-    # or with PyArrow support
-    pip install "datafusion[pyarrow]"
 
 You can verify the installation by running:
 
 
@@ -38,22 +38,19 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-"Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Programming Language :: Python",
     "Programming Language :: Rust",
 ]
-dependencies = ["typing-extensions;python_version<'3.13'"]
+dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
 dynamic = ["version"]
 
 [project.urls]
 homepage = "https://datafusion.apache.org/python"
 documentation = "https://datafusion.apache.org/python"
 repository = "https://github.com/apache/datafusion-python"
 
-[project.optional-dependencies]
-pyarrow = ["pyarrow>=11.0.0"]
-
 [tool.isort]
 profile = "black"
 
 
@@ -150,8 +150,8 @@ def __repr__(self) -> str:
         return self.table.__repr__()
 
     @staticmethod
-    def from_dataset(dataset: object) -> Table:
-        """Turn any ``__arrow_c_stream__`` source into a Table."""
+    def from_dataset(dataset: pa.dataset.Dataset) -> Table:
+        """Turn a pyarrow Dataset into a Table."""
         return Table(df_internal.catalog.RawTable.from_dataset(dataset))
 
     @property
 
@@ -22,8 +22,7 @@
 import warnings
 from typing import TYPE_CHECKING, Any, Protocol
 
-from datafusion.common import DataTypeMap
-from datafusion.types import ensure_pyarrow_type
+import pyarrow as pa
 
 try:
     from warnings import deprecated  # Python 3.13+
@@ -46,7 +45,6 @@
 
     import pandas as pd
     import polars as pl
-    import pyarrow as pa
 
     from datafusion.plan import ExecutionPlan, LogicalPlan
 
@@ -552,7 +550,7 @@ def register_listing_table(
         self,
         name: str,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_extension: str = ".parquet",
         schema: pa.Schema | None = None,
         file_sort_order: list[list[Expr | SortExpr]] | None = None,
@@ -805,7 +803,7 @@ def register_parquet(
         self,
         name: str,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
@@ -897,7 +895,7 @@ def register_json(
         schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> None:
         """Register a JSON file as a table.
@@ -935,7 +933,7 @@ def register_avro(
         path: str | pathlib.Path,
         schema: pa.Schema | None = None,
         file_extension: str = ".avro",
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     ) -> None:
         """Register an Avro file as a table.
 
@@ -956,16 +954,12 @@ def register_avro(
             name, str(path), schema, file_extension, table_partition_cols
         )
 
-    def register_dataset(self, name: str, dataset: object) -> None:
-        """Register any ``__arrow_c_stream__`` source as a table.
-
-        Any Python object implementing the Arrow ``__arrow_c_stream__`` protocol
-        can be registered, including objects from libraries such as nanoarrow,
-        Polars, DuckDB, or :py:mod:`pyarrow.dataset`.
+    def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None:
+        """Register a :py:class:`pa.dataset.Dataset` as a table.
 
         Args:
             name: Name of the table to register.
-            dataset: Object exposing ``__arrow_c_stream__``.
+            dataset: PyArrow dataset.
         """
         self.ctx.register_dataset(name, dataset)
 
@@ -1015,7 +1009,7 @@ def read_json(
         schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> DataFrame:
         """Read a line-delimited JSON data source.
@@ -1055,7 +1049,7 @@ def read_csv(
         delimiter: str = ",",
         schema_infer_max_records: int = 1000,
         file_extension: str = ".csv",
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> DataFrame:
         """Read a CSV data source.
@@ -1100,7 +1094,7 @@ def read_csv(
     def read_parquet(
         self,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
@@ -1151,7 +1145,7 @@ def read_avro(
         self,
         path: str | pathlib.Path,
         schema: pa.Schema | None = None,
-        file_partition_cols: list[tuple[str, str | DataTypeMap | Any]] | None = None,
+        file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_extension: str = ".avro",
     ) -> DataFrame:
         """Create a :py:class:`DataFrame` for reading Avro data source.
@@ -1187,27 +1181,26 @@ def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
 
     @staticmethod
     def _convert_table_partition_cols(
-        table_partition_cols: list[tuple[str, str | DataTypeMap | Any]],
-    ) -> list[tuple[str, Any]]:
+        table_partition_cols: list[tuple[str, str | pa.DataType]],
+    ) -> list[tuple[str, pa.DataType]]:
         warn = False
         converted_table_partition_cols = []
 
         for col, data_type in table_partition_cols:
             if isinstance(data_type, str):
                 warn = True
                 if data_type == "string":
-                    mapped = DataTypeMap.py_map_from_arrow_type_str("utf8")
+                    converted_data_type = pa.string()
                 elif data_type == "int":
-                    mapped = DataTypeMap.py_map_from_arrow_type_str("int32")
+                    converted_data_type = pa.int32()
                 else:
                     message = (
                         f"Unsupported literal data type '{data_type}' for partition "
                         "column. Supported types are 'string' and 'int'"
                     )
                     raise ValueError(message)
-                converted_data_type = ensure_pyarrow_type(mapped)
             else:
-                converted_data_type = ensure_pyarrow_type(data_type)
+                converted_data_type = data_type
 
             converted_table_partition_cols.append((col, converted_data_type))