Skip to content

Commit a10ee5a

Browse files
committed
More user documentation
1 parent 27fa92a commit a10ee5a

File tree

3 files changed

+27
-13
lines changed

3 files changed

+27
-13
lines changed

python/datafusion/user_defined.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,16 @@ class Accumulator(metaclass=ABCMeta):
298298

299299
@abstractmethod
300300
def state(self) -> list[pa.Scalar]:
301-
"""Return the current state."""
301+
"""Return the current state.
302+
303+
While this function template expects PyArrow Scalar values return type,
304+
you can return any value that can be converted into a Scalar. This
305+
includes basic Python data types such as integers and strings. In
306+
addition to primitive types, we currently support PyArrow, nanoarrow,
307+
and arro3 objects in addition to primitive data types. Other objects
308+
that support the Arrow FFI standard will be given a "best attempt" at
309+
conversion to scalar objects.
310+
"""
302311

303312
@abstractmethod
304313
def update(self, *values: pa.Array) -> None:
@@ -312,18 +321,13 @@ def merge(self, states: list[pa.Array]) -> None:
312321
def evaluate(self) -> pa.Scalar:
313322
"""Return the resultant value.
314323
315-
If you need to return a list, wrap it in a scalar with the correct
316-
list type, for example::
317-
318-
import pyarrow as pa
319-
320-
return pa.scalar(
321-
[pa.scalar("2024-01-01T00:00:00Z")],
322-
type=pa.list_(pa.timestamp("ms")),
323-
)
324-
325-
Returning a ``pyarrow.Array`` from ``evaluate`` is not supported unless
326-
you explicitly convert it to a list-valued scalar.
324+
While this function template expects a PyArrow Scalar value return type,
325+
you can return any value that can be converted into a Scalar. This
326+
includes basic Python data types such as integers and strings. In
327+
addition to primitive types, we currently support PyArrow, nanoarrow,
328+
and arro3 objects in addition to primitive data types. Other objects
329+
that support the Arrow FFI standard will be given a "best attempt" at
330+
conversion to scalar objects.
327331
"""
328332

329333

src/common/data_type.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ use datafusion::logical_expr::expr::NullTreatment as DFNullTreatment;
2222
use pyo3::exceptions::{PyNotImplementedError, PyValueError};
2323
use pyo3::prelude::*;
2424

25+
/// A [`ScalarValue`] wrapped in a Python object. This struct allows for conversion
26+
/// from a variety of Python objects into a [`ScalarValue`]. See
27+
/// ``FromPyArrow::from_pyarrow_bound`` conversion details.
2528
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
2629
pub struct PyScalarValue(pub ScalarValue);
2730

src/pyarrow_util.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ use pyo3::{Bound, FromPyObject, PyAny, PyResult, Python};
3131
use crate::common::data_type::PyScalarValue;
3232
use crate::errors::PyDataFusionError;
3333

34+
/// Helper function to turn an Array into a ScalarValue. If ``as_list_array`` is true,
35+
/// the array will be turned into a ``ListArray``. Otherwise, we extract the first value
36+
/// from the array.
3437
fn array_to_scalar_value(array: ArrayRef, as_list_array: bool) -> PyResult<PyScalarValue> {
3538
if as_list_array {
3639
let field = Arc::new(Field::new_list_field(
@@ -46,6 +49,10 @@ fn array_to_scalar_value(array: ArrayRef, as_list_array: bool) -> PyResult<PySca
4649
}
4750
}
4851

52+
/// Helper function to take any Python object that contains an Arrow PyCapsule
53+
/// interface and attempt to extract a scalar value from it. If `as_list_array`
54+
/// is true, the array will be turned into a ``ListArray``. Otherwise, we extract
55+
/// the first value from the array.
4956
fn pyobj_extract_scalar_via_capsule(
5057
value: &Bound<'_, PyAny>,
5158
as_list_array: bool,

0 commit comments

Comments
 (0)