|
17 | 17 |
|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
20 | | -use datafusion::arrow::array::{Array, ArrayRef}; |
| 20 | +use datafusion::arrow::array::ArrayRef; |
21 | 21 | use datafusion::arrow::datatypes::DataType; |
22 | | -use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; |
| 22 | +use datafusion::arrow::pyarrow::PyArrowType; |
23 | 23 | use datafusion::common::ScalarValue; |
24 | 24 | use datafusion::error::{DataFusionError, Result}; |
25 | 25 | use datafusion::logical_expr::{ |
26 | 26 | create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, |
27 | 27 | }; |
28 | 28 | use datafusion_ffi::udaf::{FFI_AggregateUDF, ForeignAggregateUDF}; |
29 | 29 | use pyo3::prelude::*; |
30 | | -use pyo3::types::{PyCapsule, PyDict, PyTuple, PyType}; |
| 30 | +use pyo3::types::{PyCapsule, PyTuple}; |
31 | 31 |
|
32 | 32 | use crate::common::data_type::PyScalarValue; |
33 | 33 | use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; |
34 | 34 | use crate::expr::PyExpr; |
35 | | -use crate::utils::{parse_volatility, validate_pycapsule}; |
| 35 | +use crate::utils::{parse_volatility, py_obj_to_scalar_value, validate_pycapsule}; |
36 | 36 |
|
37 | 37 | #[derive(Debug)] |
38 | 38 | struct RustAccumulator { |
39 | 39 | accum: Py<PyAny>, |
40 | | - return_type: DataType, |
41 | | - pyarrow_array_type: Option<Py<PyType>>, |
42 | | - pyarrow_chunked_array_type: Option<Py<PyType>>, |
43 | 40 | } |
44 | 41 |
|
45 | 42 | impl RustAccumulator { |
46 | | - fn new(accum: Py<PyAny>, return_type: DataType) -> Self { |
47 | | - Self { |
48 | | - accum, |
49 | | - return_type, |
50 | | - pyarrow_array_type: None, |
51 | | - pyarrow_chunked_array_type: None, |
52 | | - } |
53 | | - } |
54 | | - |
55 | | - fn ensure_pyarrow_types(&mut self, py: Python<'_>) -> PyResult<(Py<PyType>, Py<PyType>)> { |
56 | | - if self.pyarrow_array_type.is_none() || self.pyarrow_chunked_array_type.is_none() { |
57 | | - let pyarrow = PyModule::import(py, "pyarrow")?; |
58 | | - let array_attr = pyarrow.getattr("Array")?; |
59 | | - let array_type = array_attr.downcast::<PyType>()?; |
60 | | - let chunked_array_attr = pyarrow.getattr("ChunkedArray")?; |
61 | | - let chunked_array_type = chunked_array_attr.downcast::<PyType>()?; |
62 | | - self.pyarrow_array_type = Some(array_type.clone().unbind()); |
63 | | - self.pyarrow_chunked_array_type = Some(chunked_array_type.clone().unbind()); |
64 | | - } |
65 | | - Ok(( |
66 | | - self.pyarrow_array_type |
67 | | - .as_ref() |
68 | | - .expect("array type set") |
69 | | - .clone_ref(py), |
70 | | - self.pyarrow_chunked_array_type |
71 | | - .as_ref() |
72 | | - .expect("chunked array type set") |
73 | | - .clone_ref(py), |
74 | | - )) |
75 | | - } |
76 | | - |
77 | | - fn is_pyarrow_array_like( |
78 | | - &mut self, |
79 | | - py: Python<'_>, |
80 | | - value: &Bound<'_, PyAny>, |
81 | | - ) -> PyResult<bool> { |
82 | | - let (array_type, chunked_array_type) = self.ensure_pyarrow_types(py)?; |
83 | | - let array_type = array_type.bind(py); |
84 | | - let chunked_array_type = chunked_array_type.bind(py); |
85 | | - Ok(value.is_instance(array_type)? || value.is_instance(chunked_array_type)?) |
| 43 | + fn new(accum: Py<PyAny>) -> Self { |
| 44 | + Self { accum } |
86 | 45 | } |
87 | 46 | } |
88 | 47 |
|
89 | 48 | impl Accumulator for RustAccumulator { |
90 | 49 | fn state(&mut self) -> Result<Vec<ScalarValue>> { |
91 | 50 | Python::attach(|py| { |
92 | | - self.accum |
93 | | - .bind(py) |
94 | | - .call_method0("state")? |
95 | | - .extract::<Vec<PyScalarValue>>() |
| 51 | + let values = self.accum.bind(py).call_method0("state")?; |
| 52 | + let mut scalars = Vec::new(); |
| 53 | + for item in values.iter()? { |
| 54 | + let item = item?; |
| 55 | + let scalar = match item.extract::<PyScalarValue>() { |
| 56 | + Ok(py_scalar) => py_scalar.0, |
| 57 | + Err(_) => py_obj_to_scalar_value(py, item.into_py(py))?, |
| 58 | + }; |
| 59 | + scalars.push(scalar); |
| 60 | + } |
| 61 | + Ok(scalars) |
96 | 62 | }) |
97 | | - .map(|v| v.into_iter().map(|x| x.0).collect()) |
98 | 63 | .map_err(|e| DataFusionError::Execution(format!("{e}"))) |
99 | 64 | } |
100 | 65 |
|
101 | 66 | fn evaluate(&mut self) -> Result<ScalarValue> { |
102 | 67 | Python::attach(|py| { |
103 | 68 | let value = self.accum.bind(py).call_method0("evaluate")?; |
104 | | - let is_list_type = matches!( |
105 | | - self.return_type, |
106 | | - DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) |
107 | | - ); |
108 | | - if is_list_type && self.is_pyarrow_array_like(py, &value)? { |
109 | | - let pyarrow = PyModule::import(py, "pyarrow")?; |
110 | | - let list_value = value.call_method0("to_pylist")?; |
111 | | - let py_type = self.return_type.to_pyarrow(py)?; |
112 | | - let kwargs = PyDict::new(py); |
113 | | - kwargs.set_item("type", py_type)?; |
114 | | - return pyarrow |
115 | | - .getattr("scalar")? |
116 | | - .call((list_value,), Some(&kwargs))? |
117 | | - .extract::<PyScalarValue>(); |
| 69 | + match value.extract::<PyScalarValue>() { |
| 70 | + Ok(py_scalar) => Ok(py_scalar.0), |
| 71 | + Err(_) => py_obj_to_scalar_value(py, value.into_py(py)), |
118 | 72 | } |
119 | | - value.extract::<PyScalarValue>() |
120 | 73 | }) |
121 | | - .map(|v| v.0) |
122 | 74 | .map_err(|e| DataFusionError::Execution(format!("{e}"))) |
123 | 75 | } |
124 | 76 |
|
@@ -204,10 +156,7 @@ pub fn to_rust_accumulator(accum: Py<PyAny>) -> AccumulatorFactoryFunction { |
204 | 156 | .call0(py) |
205 | 157 | .map_err(|e| DataFusionError::Execution(format!("{e}"))) |
206 | 158 | })?; |
207 | | - Ok(Box::new(RustAccumulator::new( |
208 | | - accum, |
209 | | - args.return_type().clone(), |
210 | | - ))) |
| 159 | + Ok(Box::new(RustAccumulator::new(accum))) |
211 | 160 | }) |
212 | 161 | } |
213 | 162 |
|
|
0 commit comments