Skip to content

Commit 3475086

Browse files
committed
docs: update Arrow C Array export documentation to clarify return values
1 parent a3194ba commit 3475086

File tree

5 files changed

+24
-5
lines changed

5 files changed

+24
-5
lines changed

docs/source/user-guide/io/arrow.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ To import an Arrow table, use :py:func:`datafusion.context.SessionContext.from_a
3535
This will accept any Python object that implements
3636
`__arrow_c_stream__ <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowstream-export>`_
3737
or `__arrow_c_array__ <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export>`_
38-
and returns a ``StructArray``. Common pyarrow sources you can use are:
38+
and returns a ``StructArray``.
39+
40+
.. note::
41+
``__arrow_c_array__`` must return ``(schema_capsule, array_capsule)``.
42+
43+
Common pyarrow sources you can use are:
3944

4045
- `Array <https://arrow.apache.org/docs/python/generated/pyarrow.Array.html>`_ (but it must return a Struct Array)
4146
- `Record Batch <https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatch.html>`_

python/datafusion/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@
3030
except ImportError:
3131
import importlib_metadata
3232

33-
from . import functions, object_store, substrait, unparser
33+
from . import functions, object_store, unparser
34+
35+
try: # pragma: no cover - optional dependency
36+
from . import substrait
37+
except ImportError: # pragma: no cover - substrait not built
38+
substrait = None
3439

3540
# The following imports are okay to remain as opaque to the user.
3641
from ._internal import Config

python/datafusion/context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class ArrowArrayExportable(Protocol):
6464
"""Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface.
6565
6666
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
67+
68+
The method should return ``(schema_capsule, array_capsule)``.
6769
"""
6870

6971
def __arrow_c_array__( # noqa: D105

python/datafusion/record_batch.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,15 @@ def __arrow__(self, *args: object, **kwargs: object) -> pa.RecordBatch:
5757
def __arrow_c_array__(
5858
self, requested_schema: object | None = None
5959
) -> tuple[object, object]:
60-
"""Arrow C Data Interface export."""
61-
return self.record_batch.__arrow_c_array__(requested_schema)
60+
"""Arrow C Data Interface export.
61+
62+
Returns:
63+
tuple[object, object]: ``(schema_capsule, array_capsule)``
64+
"""
65+
schema_capsule, array_capsule = self.record_batch.__arrow_c_array__(
66+
requested_schema
67+
)
68+
return schema_capsule, array_capsule
6269

6370

6471
class RecordBatchStream:

src/record_batch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ impl PyRecordBatch {
9898
let data = struct_array.to_data();
9999
let array = FFI_ArrowArray::new(&data);
100100
let schema =
101-
FFI_ArrowSchema::try_from(data.data_type()).map_err(PyDataFusionError::from)?;
101+
FFI_ArrowSchema::try_from(self.batch.schema().as_ref()).map_err(PyDataFusionError::from)?;
102102

103103
let array_ptr = Box::into_raw(Box::new(array));
104104
let schema_ptr = Box::into_raw(Box::new(schema));

0 commit comments

Comments
 (0)