From 37c70fb87bc274427cdebe7910d2368ecbea6c68 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Fri, 22 Jan 2021 17:52:38 +0100 Subject: [PATCH 1/5] Add from_iter_values to create arrays from (non null) values --- rust/arrow/src/array/array_primitive.rs | 41 +++++++++++++++++++++++++ rust/arrow/src/array/array_string.rs | 41 +++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 3dcb187495f..f03b24858da 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -94,6 +94,35 @@ impl PrimitiveArray { let offset = i + self.offset(); unsafe { *self.raw_values.as_ptr().add(offset) } } + + /// Creates a PrimitiveArray based on an iterator of values without nulls + pub fn from_iter_values>(iter: I) -> Self + where + Ptr: Borrow<::Native>, + { + let iter = iter.into_iter(); + let (_, data_len) = iter.size_hint(); + let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. + + let mut val_buf = MutableBuffer::new( + data_len * mem::size_of::<::Native>(), + ); + + iter.for_each(|item| { + val_buf.push(*item.borrow()); + }); + + let data = ArrayData::new( + T::DATA_TYPE, + data_len, + None, + None, + 0, + vec![val_buf.into()], + vec![], + ); + PrimitiveArray::from(Arc::new(data)) + } } impl Array for PrimitiveArray { @@ -820,6 +849,18 @@ mod tests { } } + #[test] + fn test_primitive_from_iter_values() { + // Test building a primitive array with from_iter_values + + let arr: PrimitiveArray = PrimitiveArray::from_iter_values(0..10); + assert_eq!(10, arr.len()); + assert_eq!(0, arr.null_count()); + for i in 0..10i32 { + assert_eq!(i, arr.value(i as usize)); + } + } + #[test] #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ (values buffer)")] diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index f1523e35c3b..7d097157b4a 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -146,6 +146,36 @@ impl GenericStringArray { pub(crate) fn from_opt_vec(v: Vec>) -> Self { v.into_iter().collect() } + + /// Creates a `GenericStringArray` based on an iterator of values without nulls + pub fn from_iter_values>(iter: I) -> Self + where + Ptr: AsRef, + { + let iter = iter.into_iter(); + let (_, data_len) = iter.size_hint(); + let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. + + let mut offsets = + MutableBuffer::new((data_len + 1) * std::mem::size_of::()); + let mut values = MutableBuffer::new(0); + + let mut length_so_far = OffsetSize::zero(); + offsets.push(length_so_far); + + for i in iter { + let s = i.as_ref(); + length_so_far += OffsetSize::from_usize(s.len()).unwrap(); + offsets.push(length_so_far); + values.extend_from_slice(s.as_bytes()); + } + let array_data = ArrayData::builder(OffsetSize::DATA_TYPE) + .len(data_len) + .add_buffer(offsets.into()) + .add_buffer(values.into()) + .build(); + Self::from(array_data) + } } impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator> @@ -411,6 +441,7 @@ mod tests { ); } + #[test] fn test_string_array_from_iter() { let data = vec![Some("hello"), None, Some("arrow")]; // from Vec> @@ -424,4 +455,14 @@ mod tests { assert_eq!(array1, array2); assert_eq!(array2, array3); } + + #[test] + fn test_string_array_from_iter_values() { + let data = vec!["hello", "hello2"]; + // from Vec> + let array1 = StringArray::from_iter_values(data.iter()); + + assert_eq!(array1.value(0), "hello"); + assert_eq!(array1.value(1), "hello2"); + } } From 8cd118d3bcffdca209aa082b701ab5b0a7cead18 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Fri, 22 Jan 2021 18:02:29 +0100 Subject: [PATCH 2/5] Remove borrow (they are primitive types anyway) --- rust/arrow/src/array/array_primitive.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index f03b24858da..775d36239be 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -96,10 +96,7 @@ impl PrimitiveArray { } /// Creates a PrimitiveArray based on an iterator of values without nulls - pub fn from_iter_values>(iter: I) -> Self - where - Ptr: Borrow<::Native>, - { + pub fn from_iter_values>(iter: I) -> Self { let iter = iter.into_iter(); let (_, data_len) = iter.size_hint(); let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. @@ -109,7 +106,7 @@ impl PrimitiveArray { ); iter.for_each(|item| { - val_buf.push(*item.borrow()); + val_buf.push(item); }); let data = ArrayData::new( From 3a6397425da5c790d0c8b2f1ff10f65dcf4d5af4 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Fri, 22 Jan 2021 18:03:31 +0100 Subject: [PATCH 3/5] Fix comment --- rust/arrow/src/array/array_string.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index 7d097157b4a..4b98fafdeaf 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -459,7 +459,6 @@ mod tests { #[test] fn test_string_array_from_iter_values() { let data = vec!["hello", "hello2"]; - // from Vec> let array1 = StringArray::from_iter_values(data.iter()); assert_eq!(array1.value(0), "hello"); From 941ee5d16abc8dea540e8ffaef73df498b47ea01 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 23 Jan 2021 13:33:39 +0100 Subject: [PATCH 4/5] Use extend --- rust/arrow/src/array/array_primitive.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 775d36239be..e03bd4b5a14 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -101,13 +101,9 @@ impl PrimitiveArray { let (_, data_len) = iter.size_hint(); let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. - let mut val_buf = MutableBuffer::new( - data_len * mem::size_of::<::Native>(), - ); + let mut val_buf = MutableBuffer::new(data_len); - iter.for_each(|item| { - val_buf.push(item); - }); + val_buf.extend(iter); let data = ArrayData::new( T::DATA_TYPE, From a37941c77713a1d883b1851b140c426e01f65438 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 23 Jan 2021 14:06:32 +0100 Subject: [PATCH 5/5] Use .collect() api --- rust/arrow/src/array/array_primitive.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index e03bd4b5a14..984454ce779 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -97,21 +97,14 @@ impl PrimitiveArray { /// Creates a PrimitiveArray based on an iterator of values without nulls pub fn from_iter_values>(iter: I) -> Self { - let iter = iter.into_iter(); - let (_, data_len) = iter.size_hint(); - let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. - - let mut val_buf = MutableBuffer::new(data_len); - - val_buf.extend(iter); - + let val_buf: Buffer = iter.into_iter().collect(); let data = ArrayData::new( T::DATA_TYPE, - data_len, + val_buf.len() / mem::size_of::<::Native>(), None, None, 0, - vec![val_buf.into()], + vec![val_buf], vec![], ); PrimitiveArray::from(Arc::new(data))