From 3f07ecfd975359689524c9f9cbc52d0ecd01a607 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 09:06:18 -0800 Subject: [PATCH 1/7] add tests for slices and structs --- rust/arrow/src/compute/kernels/concat.rs | 122 +++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index 08240424e82..d1b16dbcbb3 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -157,6 +157,38 @@ mod tests { Ok(()) } + #[test] + fn test_concat_primitive_array_slices() -> Result<()> { + let input_1 = PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + ]).slice(1, 3); + + let input_2 = PrimitiveArray::::from(vec![ + Some(101), + Some(102), + Some(103), + None, + ]).slice(1, 3); + let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?; + + let expected_output = Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(2), + None, + Some(102), + Some(103), + None, + ])) as ArrayRef; + + assert_eq!(&arr, &expected_output); + + Ok(()) + } + #[test] fn test_concat_boolean_primitive_arrays() -> Result<()> { let arr = concat(&[ @@ -254,4 +286,94 @@ mod tests { Ok(()) } + + #[test] + fn test_concat_struct_arrays() -> Result<()> { + let field = Field::new("field", DataType::Int64, true); + let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + ])); + let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); + + let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ + Some(101), + Some(102), + Some(103), + None, + ])); + let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); + + let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::::from(vec![Some(256), Some(512), Some(1024)])); + let input_struct_3 = StructArray::from(vec![(field.clone(), input_primitive_3)]); + + let arr = concat(&[ + &input_struct_1, + &input_struct_2, + &input_struct_3, + ])?; + + let expected_primitive_output = Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + Some(101), + Some(102), + Some(103), + None, + Some(256), + Some(512), + Some(1024), + ])) as ArrayRef; + + let actual_primitive = arr.as_any().downcast_ref::().unwrap().column(0); + assert_eq!(actual_primitive, &expected_primitive_output); + + Ok(()) + } + + #[test] + fn test_concat_struct_array_slices() -> Result<()> { + let field = Field::new("field", DataType::Int64, true); + let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + ])); + let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); + + let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ + Some(101), + Some(102), + Some(103), + None, + ])); + let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); + + let arr = concat(&[ + input_struct_1.slice(1, 3).as_ref(), + input_struct_2.slice(1, 2).as_ref(), + ])?; + + let expected_primitive_output = Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(2), + None, + Some(102), + Some(103), + None, + ])) as ArrayRef; + + let actual_primitive = arr.as_any().downcast_ref::().unwrap().column(0); + assert_eq!(actual_primitive, &expected_primitive_output); + + Ok(()) + } } From bb89b199678f52cf58563581cad645792ca91db1 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 09:20:08 -0800 Subject: [PATCH 2/7] attempt at fixing struct concat --- rust/arrow/src/array/transform/mod.rs | 42 +++++++++++++++++++++ rust/arrow/src/array/transform/structure.rs | 4 +- rust/arrow/src/compute/kernels/concat.rs | 1 - 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 2505068f10f..22a32ff33f3 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -714,6 +714,48 @@ mod tests { assert_eq!(array, expected) } + #[test] + fn test_struct_offset() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![ + Some("joe"), + None, + None, + Some("mark"), + Some("doe"), + ])); + let ints: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + Some(3), + Some(4), + Some(5), + ])); + + let array = + StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) + .unwrap() + .slice(1, 3) + .data(); + let arrays = vec![array.as_ref()]; + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + // Struct equality doesn't seem to work when using slices? + + // let expected = StructArray::try_from(vec![ + // ("f1", strings.slice(2, 2)), + // ("f2", ints.slice(2, 2)), + // ]) + // .unwrap(); + // + // assert_eq!(array, expected) + // assert_eq!(array.column(0), &strings.slice(2, 2)); + assert_eq!(array.column(1), &ints.slice(2, 2)); + } + #[test] fn test_struct_nulls() { let strings: ArrayRef = Arc::new(StringArray::from(vec![ diff --git a/rust/arrow/src/array/transform/structure.rs b/rust/arrow/src/array/transform/structure.rs index 5c41d76a7f1..1b2260c0b0f 100644 --- a/rust/arrow/src/array/transform/structure.rs +++ b/rust/arrow/src/array/transform/structure.rs @@ -29,7 +29,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { mutable .child_data .iter_mut() - .for_each(|child| child.extend(index, start, start + len)) + .for_each(|child| child.extend(index, array.offset() + start, array.offset() + start + len)) }, ) } else { @@ -38,7 +38,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { index: usize, start: usize, len: usize| { - (start..start + len).for_each(|i| { + (array.offset() + start..array.offset() + start + len).for_each(|i| { if array.is_valid(i) { mutable .child_data diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index d1b16dbcbb3..2b362321269 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -368,7 +368,6 @@ mod tests { None, Some(102), Some(103), - None, ])) as ArrayRef; let actual_primitive = arr.as_any().downcast_ref::().unwrap().column(0); From eb82fdbe97ecfe2acfd1e8b14151b54ee4577833 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 09:46:48 -0800 Subject: [PATCH 3/7] format --- rust/arrow/src/array/transform/structure.rs | 11 ++- rust/arrow/src/compute/kernels/concat.rs | 89 ++++++++++++--------- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/rust/arrow/src/array/transform/structure.rs b/rust/arrow/src/array/transform/structure.rs index 1b2260c0b0f..c019f5ac6a9 100644 --- a/rust/arrow/src/array/transform/structure.rs +++ b/rust/arrow/src/array/transform/structure.rs @@ -26,10 +26,13 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { index: usize, start: usize, len: usize| { - mutable - .child_data - .iter_mut() - .for_each(|child| child.extend(index, array.offset() + start, array.offset() + start + len)) + mutable.child_data.iter_mut().for_each(|child| { + child.extend( + index, + array.offset() + start, + array.offset() + start + len, + ) + }) }, ) } else { diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index 2b362321269..2db9e373d3c 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -165,14 +165,16 @@ mod tests { Some(2), None, None, - ]).slice(1, 3); + ]) + .slice(1, 3); - let input_2 = PrimitiveArray::::from(vec![ + let input_2 = PrimitiveArray::::from(vec![ Some(101), Some(102), Some(103), None, - ]).slice(1, 3); + ]) + .slice(1, 3); let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?; let expected_output = Arc::new(PrimitiveArray::::from(vec![ @@ -290,31 +292,34 @@ mod tests { #[test] fn test_concat_struct_arrays() -> Result<()> { let field = Field::new("field", DataType::Int64, true); - let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ - Some(-1), - Some(-1), - Some(2), - None, - None, - ])); + let input_primitive_1: ArrayRef = + Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + ])); let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); - let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ - Some(101), - Some(102), - Some(103), - None, - ])); + let input_primitive_2: ArrayRef = + Arc::new(PrimitiveArray::::from(vec![ + Some(101), + Some(102), + Some(103), + None, + ])); let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); - let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::::from(vec![Some(256), Some(512), Some(1024)])); + let input_primitive_3: ArrayRef = + Arc::new(PrimitiveArray::::from(vec![ + Some(256), + Some(512), + Some(1024), + ])); let input_struct_3 = StructArray::from(vec![(field.clone(), input_primitive_3)]); - let arr = concat(&[ - &input_struct_1, - &input_struct_2, - &input_struct_3, - ])?; + let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?; let expected_primitive_output = Arc::new(PrimitiveArray::::from(vec![ Some(-1), @@ -331,7 +336,11 @@ mod tests { Some(1024), ])) as ArrayRef; - let actual_primitive = arr.as_any().downcast_ref::().unwrap().column(0); + let actual_primitive = arr + .as_any() + .downcast_ref::() + .unwrap() + .column(0); assert_eq!(actual_primitive, &expected_primitive_output); Ok(()) @@ -340,21 +349,23 @@ mod tests { #[test] fn test_concat_struct_array_slices() -> Result<()> { let field = Field::new("field", DataType::Int64, true); - let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ - Some(-1), - Some(-1), - Some(2), - None, - None, - ])); + let input_primitive_1: ArrayRef = + Arc::new(PrimitiveArray::::from(vec![ + Some(-1), + Some(-1), + Some(2), + None, + None, + ])); let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); - let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::::from(vec![ - Some(101), - Some(102), - Some(103), - None, - ])); + let input_primitive_2: ArrayRef = + Arc::new(PrimitiveArray::::from(vec![ + Some(101), + Some(102), + Some(103), + None, + ])); let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); let arr = concat(&[ @@ -370,7 +381,11 @@ mod tests { Some(103), ])) as ArrayRef; - let actual_primitive = arr.as_any().downcast_ref::().unwrap().column(0); + let actual_primitive = arr + .as_any() + .downcast_ref::() + .unwrap() + .column(0); assert_eq!(actual_primitive, &expected_primitive_output); Ok(()) From c22bc4d08015f20e641e86782c83d0f5ea21ae27 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 10:00:52 -0800 Subject: [PATCH 4/7] construct expected strings directly --- rust/arrow/src/array/transform/mod.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 22a32ff33f3..08672bc3170 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -745,15 +745,14 @@ mod tests { // Struct equality doesn't seem to work when using slices? - // let expected = StructArray::try_from(vec![ - // ("f1", strings.slice(2, 2)), - // ("f2", ints.slice(2, 2)), - // ]) - // .unwrap(); - // - // assert_eq!(array, expected) - // assert_eq!(array.column(0), &strings.slice(2, 2)); - assert_eq!(array.column(1), &ints.slice(2, 2)); + let expected_strings: ArrayRef = Arc::new(StringArray::from(vec![None, Some("mark")])); + let expected = StructArray::try_from(vec![ + ("f1", expected_strings), + ("f2", ints.slice(2, 2)), + ]) + .unwrap(); + + assert_eq!(array, expected); } #[test] From 3943a708f353a2900d4c09274911cbdb76231c05 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 11:17:33 -0800 Subject: [PATCH 5/7] format --- rust/arrow/src/array/transform/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 08672bc3170..029af511f2f 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -745,12 +745,13 @@ mod tests { // Struct equality doesn't seem to work when using slices? - let expected_strings: ArrayRef = Arc::new(StringArray::from(vec![None, Some("mark")])); + let expected_strings: ArrayRef = + Arc::new(StringArray::from(vec![None, Some("mark")])); let expected = StructArray::try_from(vec![ ("f1", expected_strings), ("f2", ints.slice(2, 2)), ]) - .unwrap(); + .unwrap(); assert_eq!(array, expected); } From 97969a0241d862bd7470ae40f9b9260bccd77137 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 11:31:31 -0800 Subject: [PATCH 6/7] remove defunct comment --- rust/arrow/src/array/transform/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 029af511f2f..6ca52b321e8 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -743,8 +743,6 @@ mod tests { let data = mutable.freeze(); let array = StructArray::from(Arc::new(data)); - // Struct equality doesn't seem to work when using slices? - let expected_strings: ArrayRef = Arc::new(StringArray::from(vec![None, Some("mark")])); let expected = StructArray::try_from(vec![ From 0b7415d1d804ee47ddd2af0db8fcc8104d9592ce Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Wed, 27 Jan 2021 16:26:07 -0800 Subject: [PATCH 7/7] fix clippy --- rust/arrow/src/compute/kernels/concat.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index 2db9e373d3c..a51831c744e 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -317,7 +317,7 @@ mod tests { Some(512), Some(1024), ])); - let input_struct_3 = StructArray::from(vec![(field.clone(), input_primitive_3)]); + let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]); let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?; @@ -366,7 +366,7 @@ mod tests { Some(103), None, ])); - let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); + let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]); let arr = concat(&[ input_struct_1.slice(1, 3).as_ref(),