diff --git a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index 8aaba2d9..5552d59f 100644 --- a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -107,6 +107,8 @@ public static IArrowArray BuildArray(ArrayData data) return new FixedSizeListArray(data); case ArrowTypeId.Interval: return IntervalArray.Create(data); + case ArrowTypeId.RunEndEncoded: + return new RunEndEncodedArray(data); default: throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}."); } diff --git a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs new file mode 100644 index 00000000..08e2683d --- /dev/null +++ b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; + +namespace Apache.Arrow; + +/// +/// Represents a run-end encoded array. +/// A run-end encoded array stores consecutive runs of the same value more efficiently. +/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any type). +/// The run_ends array stores the cumulative end positions of each run. +/// +public class RunEndEncodedArray : Array +{ + /// + /// Gets the run ends array (Int16Array, Int32Array, or Int64Array). + /// This array contains the cumulative end indices for each run. + /// + public IArrowArray RunEnds { get; } + + /// + /// Gets the values array. + /// This array contains the actual values that are run-length encoded. + /// + public IArrowArray Values { get; } + + /// + /// Creates a new RunEndEncodedArray from ArrayData. + /// + /// The array data containing run ends and values as children. + public RunEndEncodedArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), ArrowArrayFactory.BuildArray(data.Children[1])) + { + } + + /// + /// Creates a new RunEndEncodedArray with specified run ends and values arrays. + /// + /// The run ends array (must be Int16Array, Int32Array, or Int64Array). + /// The values array (can be any type). + public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values) + : this(CreateArrayData(runEnds, values), runEnds, values) + { + } + + private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, IArrowArray values) + : base(data) + { + data.EnsureBufferCount(0); // REE arrays have no buffers, only children + data.EnsureDataType(ArrowTypeId.RunEndEncoded); + + ValidateRunEndsType(runEnds); + RunEnds = runEnds; + Values = values; + } + + private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray values) + { + ValidateRunEndsType(runEnds); + + // The logical length of a REE array is determined by the last value in run_ends + int logicalLength = GetLogicalLength(runEnds); + + var dataType = new RunEndEncodedType(runEnds.Data.DataType, values.Data.DataType); + + return new ArrayData( + dataType, + logicalLength, + nullCount: 0, // REE arrays don't have a validity bitmap + offset: 0, + buffers: [], + children: [runEnds.Data, values.Data]); + } + + private static void ValidateRunEndsType(IArrowArray runEnds) + { + ArrowTypeId typeId = runEnds.Data.DataType.TypeId; + if (typeId != ArrowTypeId.Int16 && + typeId != ArrowTypeId.Int32 && + typeId != ArrowTypeId.Int64) + { + throw new ArgumentException( + $"Run ends array must be Int16, Int32, or Int64, but got {typeId}", + nameof(runEnds)); + } + } + + private static int GetLogicalLength(IArrowArray runEnds) + { + if (runEnds.Length == 0) + { + return 0; + } + + // Get the last run end value which represents the logical length + switch (runEnds) + { + case Int16Array int16Array: + return int16Array.GetValue(int16Array.Length - 1) ?? 0; + case Int32Array int32Array: + return int32Array.GetValue(int32Array.Length - 1) ?? 0; + case Int64Array int64Array: + { + long? lastValue = int64Array.GetValue(int64Array.Length - 1); + if (lastValue.HasValue && lastValue.Value > int.MaxValue) + { + throw new ArgumentException("Run ends value exceeds maximum supported length."); + } + return (int)(lastValue ?? 0); + } + default: + throw new InvalidOperationException($"Unexpected run ends array type: {runEnds.GetType()}"); + } + } + + /// + /// Finds the physical index in the run_ends array that contains the specified logical index. + /// + /// The logical index in the decoded array. + /// The physical index in the run_ends/values arrays. + public int FindPhysicalIndex(int logicalIndex) + { + if (logicalIndex < 0 || logicalIndex >= Length) + { + throw new ArgumentOutOfRangeException(nameof(logicalIndex)); + } + + // Binary search to find the run that contains this logical index + return RunEnds switch + { + Int16Array int16Array => BinarySearchRunEnds(int16Array, logicalIndex), + Int32Array int32Array => BinarySearchRunEnds(int32Array, logicalIndex), + Int64Array int64Array => BinarySearchRunEnds(int64Array, logicalIndex), + _ => throw new InvalidOperationException($"Unexpected run ends array type: {RunEnds.GetType()}"), + }; + } + + private static int BinarySearchRunEnds(Int16Array runEnds, int logicalIndex) + { + int left = 0; + int right = runEnds.Length - 1; + + while (left < right) + { + int mid = left + (right - left) / 2; + int runEnd = runEnds.GetValue(mid) ?? 0; + + if (logicalIndex < runEnd) + { + right = mid; + } + else + { + left = mid + 1; + } + } + + return left; + } + + private static int BinarySearchRunEnds(Int32Array runEnds, int logicalIndex) + { + int left = 0; + int right = runEnds.Length - 1; + + while (left < right) + { + int mid = left + (right - left) / 2; + int runEnd = runEnds.GetValue(mid) ?? 0; + + if (logicalIndex < runEnd) + { + right = mid; + } + else + { + left = mid + 1; + } + } + + return left; + } + + private static int BinarySearchRunEnds(Int64Array runEnds, int logicalIndex) + { + int left = 0; + int right = runEnds.Length - 1; + + while (left < right) + { + int mid = left + (right - left) / 2; + long runEnd = runEnds.GetValue(mid) ?? 0; + + if (logicalIndex < runEnd) + { + right = mid; + } + else + { + left = mid + 1; + } + } + + return left; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); +} diff --git a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index d5d3758d..3bb24abc 100644 --- a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -269,6 +269,9 @@ private ArrayData LoadField( { case ArrowTypeId.Null: return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, System.Array.Empty()); + case ArrowTypeId.RunEndEncoded: + buffers = 0; + break; case ArrowTypeId.Union: if (version < MetadataVersion.V5) { diff --git a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 6c58c154..479b8841 100644 --- a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -73,7 +73,8 @@ private class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, - IArrowArrayVisitor + IArrowArrayVisitor, + IArrowArrayVisitor { public readonly struct FieldNode { @@ -345,6 +346,15 @@ public void Visit(NullArray array) // There are no buffers for a NullArray } + public void Visit(RunEndEncodedArray array) + { + // REE arrays have no buffers at the top level, only child arrays + // Visit the run_ends array + VisitArray(array.RunEnds); + // Visit the values array + VisitArray(array.Values); + } + private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength) { var requiredBytes = CalculatePaddedBufferLength(sizeof(int) * (arrayLength + 1)); diff --git a/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 503680a2..050c563e 100644 --- a/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -81,7 +81,8 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private FlatBufferBuilder Builder { get; } @@ -343,6 +344,14 @@ public void Visit(NullType type) Flatbuf.Null.EndNull(Builder)); } + public void Visit(RunEndEncodedType type) + { + Flatbuf.RunEndEncoded.StartRunEndEncoded(Builder); + Result = FieldType.Build( + Flatbuf.Type.RunEndEncoded, + Flatbuf.RunEndEncoded.EndRunEndEncoded(Builder)); + } + public void Visit(IArrowType type) { throw new NotImplementedException($"Cannot visit type {type}"); diff --git a/src/Apache.Arrow/Ipc/MessageSerializer.cs b/src/Apache.Arrow/Ipc/MessageSerializer.cs index 7c7f7a38..ab7f3a75 100644 --- a/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -240,6 +240,12 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c } Flatbuf.Map meta = field.Type().Value; return new Types.MapType(childFields[0], meta.KeysSorted); + case Flatbuf.Type.RunEndEncoded: + if (childFields == null || childFields.Length != 2) + { + throw new InvalidDataException($"Run-end encoded type must have exactly two children (run_ends and values)."); + } + return new Types.RunEndEncodedType(childFields[0], childFields[1]); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } diff --git a/src/Apache.Arrow/Types/IArrowType.cs b/src/Apache.Arrow/Types/IArrowType.cs index 657b234b..39e98a49 100644 --- a/src/Apache.Arrow/Types/IArrowType.cs +++ b/src/Apache.Arrow/Types/IArrowType.cs @@ -58,6 +58,7 @@ public enum ArrowTypeId LargeString, Decimal32, Decimal64, + RunEndEncoded, } public interface IArrowType diff --git a/src/Apache.Arrow/Types/RunEndEncodedType.cs b/src/Apache.Arrow/Types/RunEndEncodedType.cs new file mode 100644 index 00000000..f8ed4c40 --- /dev/null +++ b/src/Apache.Arrow/Types/RunEndEncodedType.cs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Types; + +/// +/// Represents a run-end encoded array type. +/// Contains two child arrays: run_ends and values. +/// The run_ends child array must be a 16/32/64-bit signed integer array +/// which encodes the indices at which the run with the value in +/// each corresponding index in the values child array ends. +/// +public sealed class RunEndEncodedType : NestedType +{ + public override ArrowTypeId TypeId => ArrowTypeId.RunEndEncoded; + public override string Name => "run_end_encoded"; + + /// + /// Gets the run ends field (must be Int16, Int32, or Int64). + /// + public Field RunEndsField => Fields[0]; + + /// + /// Gets the values field (can be any type). + /// + public Field ValuesField => Fields[1]; + + /// + /// Gets the data type of the run ends array. + /// + public IArrowType RunEndsDataType => RunEndsField.DataType; + + /// + /// Gets the data type of the values array. + /// + public IArrowType ValuesDataType => ValuesField.DataType; + + /// + /// Creates a new RunEndEncodedType with the specified run ends and values fields. + /// + /// The run ends field (must be Int16, Int32, or Int64). + /// The values field (can be any type). + public RunEndEncodedType(Field runEndsField, Field valuesField) + : base([runEndsField, valuesField]) + { + ValidateRunEndsType(runEndsField.DataType); + } + + /// + /// Creates a new RunEndEncodedType with the specified run ends and values data types. + /// Uses default field names "run_ends" and "values". + /// + /// The run ends data type (must be Int16, Int32, or Int64). + /// The values data type (can be any type). + public RunEndEncodedType(IArrowType runEndsDataType, IArrowType valuesDataType) + : this(new Field("run_ends", runEndsDataType, nullable: false), + new Field("values", valuesDataType, nullable: true)) + { + } + + private static void ValidateRunEndsType(IArrowType runEndsDataType) + { + if (runEndsDataType.TypeId != ArrowTypeId.Int16 && + runEndsDataType.TypeId != ArrowTypeId.Int32 && + runEndsDataType.TypeId != ArrowTypeId.Int64) + { + throw new ArgumentException( + $"Run ends type must be Int16, Int32, or Int64, but got {runEndsDataType.TypeId}", + nameof(runEndsDataType)); + } + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); +} diff --git a/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs new file mode 100644 index 00000000..669aaac5 --- /dev/null +++ b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs @@ -0,0 +1,321 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; +using Apache.Arrow.Ipc; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class RunEndEncodedArrayTests +{ + [Fact] + public void TestRunEndEncodedTypeCreation() + { + // Test with explicit fields + var runEndsField = new Field("run_ends", Int32Type.Default, nullable: false); + var valuesField = new Field("values", StringType.Default, nullable: true); + var reeType = new RunEndEncodedType(runEndsField, valuesField); + + Assert.Equal(ArrowTypeId.RunEndEncoded, reeType.TypeId); + Assert.Equal("run_end_encoded", reeType.Name); + Assert.Equal(runEndsField, reeType.RunEndsField); + Assert.Equal(valuesField, reeType.ValuesField); + Assert.Equal(Int32Type.Default.TypeId, reeType.RunEndsDataType.TypeId); + Assert.Equal(StringType.Default.TypeId, reeType.ValuesDataType.TypeId); + } + + [Fact] + public void TestRunEndEncodedTypeCreationWithDataTypes() + { + // Test with data types (uses default field names) + var reeType = new RunEndEncodedType(Int32Type.Default, StringType.Default); + + Assert.Equal(ArrowTypeId.RunEndEncoded, reeType.TypeId); + Assert.Equal("run_ends", reeType.RunEndsField.Name); + Assert.Equal("values", reeType.ValuesField.Name); + } + + [Fact] + public void TestRunEndEncodedTypeValidation() + { + // Invalid run ends type (must be Int16, Int32, or Int64) + Assert.Throws(() => new RunEndEncodedType(Int8Type.Default, StringType.Default)); + Assert.Throws(() => new RunEndEncodedType(FloatType.Default, StringType.Default)); + Assert.Throws(() => new RunEndEncodedType(StringType.Default, StringType.Default)); + + // Valid run ends types + Assert.NotNull(new RunEndEncodedType(Int16Type.Default, StringType.Default)); // Should not throw + Assert.NotNull(new RunEndEncodedType(Int32Type.Default, StringType.Default)); // Should not throw + Assert.NotNull(new RunEndEncodedType(Int64Type.Default, StringType.Default)); // Should not throw + } + + [Fact] + public void TestRunEndEncodedArrayWithInt32RunEnds() + { + // Create run ends: [3, 7, 10, 15] + // This represents: 3 'A's, 4 'B's, 3 'C's, 5 'D's + var runEndsBuilder = new Int32Array.Builder(); + runEndsBuilder.AppendRange([3, 7, 10, 15]); + Int32Array runEnds = runEndsBuilder.Build(); + + // Create values: ['A', 'B', 'C', 'D'] + var valuesBuilder = new StringArray.Builder(); + valuesBuilder.AppendRange(["A", "B", "C", "D"]); + StringArray values = valuesBuilder.Build(); + + // Create REE array + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Equal(15, reeArray.Length); // Logical length is the last run end value + Assert.Equal(0, reeArray.NullCount); // REE arrays don't have nulls at the top level + Assert.Equal(runEnds, reeArray.RunEnds); + Assert.Equal(values, reeArray.Values); + } + + [Fact] + public void TestRunEndEncodedArrayWithInt16RunEnds() + { + var runEndsBuilder = new Int16Array.Builder(); + runEndsBuilder.AppendRange([2, 5, 8]); + Int16Array runEnds = runEndsBuilder.Build(); + + var valuesBuilder = new Int32Array.Builder(); + valuesBuilder.AppendRange([100, 200, 300]); + Int32Array values = valuesBuilder.Build(); + + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Equal(8, reeArray.Length); + Assert.Equal(runEnds, reeArray.RunEnds); + Assert.Equal(values, reeArray.Values); + } + + [Fact] + public void TestRunEndEncodedArrayWithInt64RunEnds() + { + var runEndsBuilder = new Int64Array.Builder(); + runEndsBuilder.AppendRange([1000, 2000, 3000]); + Int64Array runEnds = runEndsBuilder.Build(); + + var valuesBuilder = new DoubleArray.Builder(); + valuesBuilder.AppendRange([1.5, 2.5, 3.5]); + DoubleArray values = valuesBuilder.Build(); + + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Equal(3000, reeArray.Length); + Assert.Equal(runEnds, reeArray.RunEnds); + Assert.Equal(values, reeArray.Values); + } + + [Fact] + public void TestRunEndEncodedArrayInvalidRunEndsType() + { + Int8Array invalidRunEnds = new Int8Array.Builder().AppendRange([1, 2, 3]).Build(); + StringArray values = new StringArray.Builder().AppendRange(["A", "B", "C"]).Build(); + + Assert.Throws(() => new RunEndEncodedArray(invalidRunEnds, values)); + } + + [Fact] + public void TestRunEndEncodedArrayEmpty() + { + Int32Array runEnds = new Int32Array.Builder().Build(); + StringArray values = new StringArray.Builder().Build(); + + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Equal(0, reeArray.Length); + } + + [Fact] + public void TestFindPhysicalIndexInt32() + { + // Run ends: [3, 7, 10, 15] means: + // Logical indices 0-2 map to physical index 0 (value 'A') + // Logical indices 3-6 map to physical index 1 (value 'B') + // Logical indices 7-9 map to physical index 2 (value 'C') + // Logical indices 10-14 map to physical index 3 (value 'D') + Int32Array runEnds = new Int32Array.Builder() + .AppendRange([3, 7, 10, 15]) + .Build(); + StringArray values = new StringArray.Builder() + .AppendRange(["A", "B", "C", "D"]) + .Build(); + + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Equal(0, reeArray.FindPhysicalIndex(0)); + Assert.Equal(0, reeArray.FindPhysicalIndex(1)); + Assert.Equal(0, reeArray.FindPhysicalIndex(2)); + Assert.Equal(1, reeArray.FindPhysicalIndex(3)); + Assert.Equal(1, reeArray.FindPhysicalIndex(4)); + Assert.Equal(1, reeArray.FindPhysicalIndex(5)); + Assert.Equal(1, reeArray.FindPhysicalIndex(6)); + Assert.Equal(2, reeArray.FindPhysicalIndex(7)); + Assert.Equal(2, reeArray.FindPhysicalIndex(8)); + Assert.Equal(2, reeArray.FindPhysicalIndex(9)); + Assert.Equal(3, reeArray.FindPhysicalIndex(10)); + Assert.Equal(3, reeArray.FindPhysicalIndex(11)); + Assert.Equal(3, reeArray.FindPhysicalIndex(14)); + } + + [Fact] + public void TestFindPhysicalIndexOutOfRange() + { + Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7]).Build(); + StringArray values = new StringArray.Builder().AppendRange(["A", "B"]).Build(); + var reeArray = new RunEndEncodedArray(runEnds, values); + + Assert.Throws(() => reeArray.FindPhysicalIndex(-1)); + Assert.Throws(() => reeArray.FindPhysicalIndex(7)); + Assert.Throws(() => reeArray.FindPhysicalIndex(100)); + } + + [Fact] + public void TestRunEndEncodedArraySerialization() + { + // Create a REE array + Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7, 10]).Build(); + StringArray values = new StringArray.Builder().AppendRange(["foo", "bar", "baz"]).Build(); + var reeArray = new RunEndEncodedArray(runEnds, values); + + // Create a record batch with the REE array + var reeField = new Field("ree_column", reeArray.Data.DataType, nullable: false); + var schema = new Schema([reeField], null); + var recordBatch = new RecordBatch(schema, [reeArray], reeArray.Length); + + // Serialize and deserialize + using var stream = new MemoryStream(); + using (var writer = new ArrowStreamWriter(stream, schema, leaveOpen: true)) + { + writer.WriteRecordBatch(recordBatch); + writer.WriteEnd(); + } + + stream.Position = 0; + + using var reader = new ArrowStreamReader(stream); + RecordBatch readBatch = reader.ReadNextRecordBatch(); + + Assert.NotNull(readBatch); + Assert.Equal(1, readBatch.ColumnCount); + Assert.Equal(10, readBatch.Length); + + var readArray = readBatch.Column(0) as RunEndEncodedArray; + Assert.NotNull(readArray); + Assert.Equal(10, readArray.Length); + Assert.Equal(ArrowTypeId.RunEndEncoded, readArray.Data.DataType.TypeId); + + // Verify run ends + var readRunEnds = readArray.RunEnds as Int32Array; + Assert.NotNull(readRunEnds); + Assert.Equal(3, readRunEnds.Length); + Assert.Equal(3, readRunEnds.GetValue(0)); + Assert.Equal(7, readRunEnds.GetValue(1)); + Assert.Equal(10, readRunEnds.GetValue(2)); + + // Verify values + var readValues = readArray.Values as StringArray; + Assert.NotNull(readValues); + Assert.Equal(3, readValues.Length); + Assert.Equal("foo", readValues.GetString(0)); + Assert.Equal("bar", readValues.GetString(1)); + Assert.Equal("baz", readValues.GetString(2)); + } + + [Fact] + public void TestRunEndEncodedArrayWithDifferentValueTypes() + { + // Test with boolean values + Int32Array runEnds1 = new Int32Array.Builder().AppendRange([5, 10]).Build(); + BooleanArray values1 = new BooleanArray.Builder().AppendRange([true, false]).Build(); + var reeArray1 = new RunEndEncodedArray(runEnds1, values1); + Assert.Equal(10, reeArray1.Length); + + // Test with double values + Int32Array runEnds2 = new Int32Array.Builder().AppendRange([3, 8]).Build(); + DoubleArray values2 = new DoubleArray.Builder().AppendRange([1.5, 2.5]).Build(); + var reeArray2 = new RunEndEncodedArray(runEnds2, values2); + Assert.Equal(8, reeArray2.Length); + + // Test with list values + var listBuilder = new ListArray.Builder(Int32Type.Default); + var int32Builder = (Int32Array.Builder)listBuilder.ValueBuilder; + listBuilder.Append(); + int32Builder.Append(1); + int32Builder.Append(2); + listBuilder.Append(); + int32Builder.Append(3); + int32Builder.Append(4); + ListArray listValues = listBuilder.Build(); + + Int32Array runEnds3 = new Int32Array.Builder().AppendRange([2, 5]).Build(); + var reeArray3 = new RunEndEncodedArray(runEnds3, listValues); + Assert.Equal(5, reeArray3.Length); + } + + [Fact] + public void TestRunEndEncodedArrayFromArrayData() + { + // Create arrays + Int32Array runEnds = new Int32Array.Builder().AppendRange([2, 5]).Build(); + StringArray values = new StringArray.Builder().AppendRange(["X", "Y"]).Build(); + + // Create ArrayData manually + var reeType = new RunEndEncodedType(Int32Type.Default, StringType.Default); + var arrayData = new ArrayData( + reeType, + length: 5, + nullCount: 0, + offset: 0, + buffers: [], + children: [runEnds.Data, values.Data]); + + // Create REE array from ArrayData + var reeArray = new RunEndEncodedArray(arrayData); + + Assert.Equal(5, reeArray.Length); + Assert.Equal(0, reeArray.NullCount); + Assert.IsType(reeArray.RunEnds); + Assert.IsType(reeArray.Values); + } + + [Fact] + public void TestRunEndEncodedArrayFactoryBuild() + { + // Test that ArrowArrayFactory can build REE arrays + Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 6]).Build(); + Int64Array values = new Int64Array.Builder().AppendRange([100, 200]).Build(); + + var reeType = new RunEndEncodedType(Int32Type.Default, Int64Type.Default); + var arrayData = new ArrayData( + reeType, + length: 6, + nullCount: 0, + offset: 0, + buffers: [], + children: [runEnds.Data, values.Data]); + + IArrowArray array = ArrowArrayFactory.BuildArray(arrayData); + + Assert.IsType(array); + var reeArray = (RunEndEncodedArray)array; + Assert.Equal(6, reeArray.Length); + } +}