diff --git a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 8aaba2d9..5552d59f 100644
--- a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -107,6 +107,8 @@ public static IArrowArray BuildArray(ArrayData data)
return new FixedSizeListArray(data);
case ArrowTypeId.Interval:
return IntervalArray.Create(data);
+ case ArrowTypeId.RunEndEncoded:
+ return new RunEndEncodedArray(data);
default:
throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
}
diff --git a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
new file mode 100644
index 00000000..08e2683d
--- /dev/null
+++ b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+///
+/// Represents a run-end encoded array.
+/// A run-end encoded array stores consecutive runs of the same value more efficiently.
+/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any type).
+/// The run_ends array stores the cumulative end positions of each run.
+///
+public class RunEndEncodedArray : Array
+{
+ ///
+ /// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
+ /// This array contains the cumulative end indices for each run.
+ ///
+ public IArrowArray RunEnds { get; }
+
+ ///
+ /// Gets the values array.
+ /// This array contains the actual values that are run-length encoded.
+ ///
+ public IArrowArray Values { get; }
+
+ ///
+ /// Creates a new RunEndEncodedArray from ArrayData.
+ ///
+ /// The array data containing run ends and values as children.
+ public RunEndEncodedArray(ArrayData data)
+ : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), ArrowArrayFactory.BuildArray(data.Children[1]))
+ {
+ }
+
+ ///
+ /// Creates a new RunEndEncodedArray with specified run ends and values arrays.
+ ///
+ /// The run ends array (must be Int16Array, Int32Array, or Int64Array).
+ /// The values array (can be any type).
+ public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values)
+ : this(CreateArrayData(runEnds, values), runEnds, values)
+ {
+ }
+
+ private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, IArrowArray values)
+ : base(data)
+ {
+ data.EnsureBufferCount(0); // REE arrays have no buffers, only children
+ data.EnsureDataType(ArrowTypeId.RunEndEncoded);
+
+ ValidateRunEndsType(runEnds);
+ RunEnds = runEnds;
+ Values = values;
+ }
+
+ private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray values)
+ {
+ ValidateRunEndsType(runEnds);
+
+ // The logical length of a REE array is determined by the last value in run_ends
+ int logicalLength = GetLogicalLength(runEnds);
+
+ var dataType = new RunEndEncodedType(runEnds.Data.DataType, values.Data.DataType);
+
+ return new ArrayData(
+ dataType,
+ logicalLength,
+ nullCount: 0, // REE arrays don't have a validity bitmap
+ offset: 0,
+ buffers: [],
+ children: [runEnds.Data, values.Data]);
+ }
+
+ private static void ValidateRunEndsType(IArrowArray runEnds)
+ {
+ ArrowTypeId typeId = runEnds.Data.DataType.TypeId;
+ if (typeId != ArrowTypeId.Int16 &&
+ typeId != ArrowTypeId.Int32 &&
+ typeId != ArrowTypeId.Int64)
+ {
+ throw new ArgumentException(
+ $"Run ends array must be Int16, Int32, or Int64, but got {typeId}",
+ nameof(runEnds));
+ }
+ }
+
+ private static int GetLogicalLength(IArrowArray runEnds)
+ {
+ if (runEnds.Length == 0)
+ {
+ return 0;
+ }
+
+ // Get the last run end value which represents the logical length
+ switch (runEnds)
+ {
+ case Int16Array int16Array:
+ return int16Array.GetValue(int16Array.Length - 1) ?? 0;
+ case Int32Array int32Array:
+ return int32Array.GetValue(int32Array.Length - 1) ?? 0;
+ case Int64Array int64Array:
+ {
+ long? lastValue = int64Array.GetValue(int64Array.Length - 1);
+ if (lastValue.HasValue && lastValue.Value > int.MaxValue)
+ {
+ throw new ArgumentException("Run ends value exceeds maximum supported length.");
+ }
+ return (int)(lastValue ?? 0);
+ }
+ default:
+ throw new InvalidOperationException($"Unexpected run ends array type: {runEnds.GetType()}");
+ }
+ }
+
+ ///
+ /// Finds the physical index in the run_ends array that contains the specified logical index.
+ ///
+ /// The logical index in the decoded array.
+ /// The physical index in the run_ends/values arrays.
+ public int FindPhysicalIndex(int logicalIndex)
+ {
+ if (logicalIndex < 0 || logicalIndex >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(logicalIndex));
+ }
+
+ // Binary search to find the run that contains this logical index
+ return RunEnds switch
+ {
+ Int16Array int16Array => BinarySearchRunEnds(int16Array, logicalIndex),
+ Int32Array int32Array => BinarySearchRunEnds(int32Array, logicalIndex),
+ Int64Array int64Array => BinarySearchRunEnds(int64Array, logicalIndex),
+ _ => throw new InvalidOperationException($"Unexpected run ends array type: {RunEnds.GetType()}"),
+ };
+ }
+
+ private static int BinarySearchRunEnds(Int16Array runEnds, int logicalIndex)
+ {
+ int left = 0;
+ int right = runEnds.Length - 1;
+
+ while (left < right)
+ {
+ int mid = left + (right - left) / 2;
+ int runEnd = runEnds.GetValue(mid) ?? 0;
+
+ if (logicalIndex < runEnd)
+ {
+ right = mid;
+ }
+ else
+ {
+ left = mid + 1;
+ }
+ }
+
+ return left;
+ }
+
+ private static int BinarySearchRunEnds(Int32Array runEnds, int logicalIndex)
+ {
+ int left = 0;
+ int right = runEnds.Length - 1;
+
+ while (left < right)
+ {
+ int mid = left + (right - left) / 2;
+ int runEnd = runEnds.GetValue(mid) ?? 0;
+
+ if (logicalIndex < runEnd)
+ {
+ right = mid;
+ }
+ else
+ {
+ left = mid + 1;
+ }
+ }
+
+ return left;
+ }
+
+ private static int BinarySearchRunEnds(Int64Array runEnds, int logicalIndex)
+ {
+ int left = 0;
+ int right = runEnds.Length - 1;
+
+ while (left < right)
+ {
+ int mid = left + (right - left) / 2;
+ long runEnd = runEnds.GetValue(mid) ?? 0;
+
+ if (logicalIndex < runEnd)
+ {
+ right = mid;
+ }
+ else
+ {
+ left = mid + 1;
+ }
+ }
+
+ return left;
+ }
+
+ public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+}
diff --git a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index d5d3758d..3bb24abc 100644
--- a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -269,6 +269,9 @@ private ArrayData LoadField(
{
case ArrowTypeId.Null:
return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, System.Array.Empty());
+ case ArrowTypeId.RunEndEncoded:
+ buffers = 0;
+ break;
case ArrowTypeId.Union:
if (version < MetadataVersion.V5)
{
diff --git a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 6c58c154..479b8841 100644
--- a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -73,7 +73,8 @@ private class ArrowRecordBatchFlatBufferBuilder :
IArrowArrayVisitor,
IArrowArrayVisitor,
IArrowArrayVisitor,
- IArrowArrayVisitor
+ IArrowArrayVisitor,
+ IArrowArrayVisitor
{
public readonly struct FieldNode
{
@@ -345,6 +346,15 @@ public void Visit(NullArray array)
// There are no buffers for a NullArray
}
+ public void Visit(RunEndEncodedArray array)
+ {
+ // REE arrays have no buffers at the top level, only child arrays
+ // Visit the run_ends array
+ VisitArray(array.RunEnds);
+ // Visit the values array
+ VisitArray(array.Values);
+ }
+
private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength)
{
var requiredBytes = CalculatePaddedBufferLength(sizeof(int) * (arrayLength + 1));
diff --git a/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 503680a2..050c563e 100644
--- a/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -81,7 +81,8 @@ class TypeVisitor :
IArrowTypeVisitor,
IArrowTypeVisitor,
IArrowTypeVisitor,
- IArrowTypeVisitor
+ IArrowTypeVisitor,
+ IArrowTypeVisitor
{
private FlatBufferBuilder Builder { get; }
@@ -343,6 +344,14 @@ public void Visit(NullType type)
Flatbuf.Null.EndNull(Builder));
}
+ public void Visit(RunEndEncodedType type)
+ {
+ Flatbuf.RunEndEncoded.StartRunEndEncoded(Builder);
+ Result = FieldType.Build(
+ Flatbuf.Type.RunEndEncoded,
+ Flatbuf.RunEndEncoded.EndRunEndEncoded(Builder));
+ }
+
public void Visit(IArrowType type)
{
throw new NotImplementedException($"Cannot visit type {type}");
diff --git a/src/Apache.Arrow/Ipc/MessageSerializer.cs b/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 7c7f7a38..ab7f3a75 100644
--- a/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -240,6 +240,12 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
}
Flatbuf.Map meta = field.Type().Value;
return new Types.MapType(childFields[0], meta.KeysSorted);
+ case Flatbuf.Type.RunEndEncoded:
+ if (childFields == null || childFields.Length != 2)
+ {
+ throw new InvalidDataException($"Run-end encoded type must have exactly two children (run_ends and values).");
+ }
+ return new Types.RunEndEncodedType(childFields[0], childFields[1]);
default:
throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported.");
}
diff --git a/src/Apache.Arrow/Types/IArrowType.cs b/src/Apache.Arrow/Types/IArrowType.cs
index 657b234b..39e98a49 100644
--- a/src/Apache.Arrow/Types/IArrowType.cs
+++ b/src/Apache.Arrow/Types/IArrowType.cs
@@ -58,6 +58,7 @@ public enum ArrowTypeId
LargeString,
Decimal32,
Decimal64,
+ RunEndEncoded,
}
public interface IArrowType
diff --git a/src/Apache.Arrow/Types/RunEndEncodedType.cs b/src/Apache.Arrow/Types/RunEndEncodedType.cs
new file mode 100644
index 00000000..f8ed4c40
--- /dev/null
+++ b/src/Apache.Arrow/Types/RunEndEncodedType.cs
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+
+namespace Apache.Arrow.Types;
+
+///
+/// Represents a run-end encoded array type.
+/// Contains two child arrays: run_ends and values.
+/// The run_ends child array must be a 16/32/64-bit signed integer array
+/// which encodes the indices at which the run with the value in
+/// each corresponding index in the values child array ends.
+///
+public sealed class RunEndEncodedType : NestedType
+{
+ public override ArrowTypeId TypeId => ArrowTypeId.RunEndEncoded;
+ public override string Name => "run_end_encoded";
+
+ ///
+ /// Gets the run ends field (must be Int16, Int32, or Int64).
+ ///
+ public Field RunEndsField => Fields[0];
+
+ ///
+ /// Gets the values field (can be any type).
+ ///
+ public Field ValuesField => Fields[1];
+
+ ///
+ /// Gets the data type of the run ends array.
+ ///
+ public IArrowType RunEndsDataType => RunEndsField.DataType;
+
+ ///
+ /// Gets the data type of the values array.
+ ///
+ public IArrowType ValuesDataType => ValuesField.DataType;
+
+ ///
+ /// Creates a new RunEndEncodedType with the specified run ends and values fields.
+ ///
+ /// The run ends field (must be Int16, Int32, or Int64).
+ /// The values field (can be any type).
+ public RunEndEncodedType(Field runEndsField, Field valuesField)
+ : base([runEndsField, valuesField])
+ {
+ ValidateRunEndsType(runEndsField.DataType);
+ }
+
+ ///
+ /// Creates a new RunEndEncodedType with the specified run ends and values data types.
+ /// Uses default field names "run_ends" and "values".
+ ///
+ /// The run ends data type (must be Int16, Int32, or Int64).
+ /// The values data type (can be any type).
+ public RunEndEncodedType(IArrowType runEndsDataType, IArrowType valuesDataType)
+ : this(new Field("run_ends", runEndsDataType, nullable: false),
+ new Field("values", valuesDataType, nullable: true))
+ {
+ }
+
+ private static void ValidateRunEndsType(IArrowType runEndsDataType)
+ {
+ if (runEndsDataType.TypeId != ArrowTypeId.Int16 &&
+ runEndsDataType.TypeId != ArrowTypeId.Int32 &&
+ runEndsDataType.TypeId != ArrowTypeId.Int64)
+ {
+ throw new ArgumentException(
+ $"Run ends type must be Int16, Int32, or Int64, but got {runEndsDataType.TypeId}",
+ nameof(runEndsDataType));
+ }
+ }
+
+ public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+}
diff --git a/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
new file mode 100644
index 00000000..669aaac5
--- /dev/null
+++ b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
@@ -0,0 +1,321 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class RunEndEncodedArrayTests
+{
+ [Fact]
+ public void TestRunEndEncodedTypeCreation()
+ {
+ // Test with explicit fields
+ var runEndsField = new Field("run_ends", Int32Type.Default, nullable: false);
+ var valuesField = new Field("values", StringType.Default, nullable: true);
+ var reeType = new RunEndEncodedType(runEndsField, valuesField);
+
+ Assert.Equal(ArrowTypeId.RunEndEncoded, reeType.TypeId);
+ Assert.Equal("run_end_encoded", reeType.Name);
+ Assert.Equal(runEndsField, reeType.RunEndsField);
+ Assert.Equal(valuesField, reeType.ValuesField);
+ Assert.Equal(Int32Type.Default.TypeId, reeType.RunEndsDataType.TypeId);
+ Assert.Equal(StringType.Default.TypeId, reeType.ValuesDataType.TypeId);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedTypeCreationWithDataTypes()
+ {
+ // Test with data types (uses default field names)
+ var reeType = new RunEndEncodedType(Int32Type.Default, StringType.Default);
+
+ Assert.Equal(ArrowTypeId.RunEndEncoded, reeType.TypeId);
+ Assert.Equal("run_ends", reeType.RunEndsField.Name);
+ Assert.Equal("values", reeType.ValuesField.Name);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedTypeValidation()
+ {
+ // Invalid run ends type (must be Int16, Int32, or Int64)
+ Assert.Throws(() => new RunEndEncodedType(Int8Type.Default, StringType.Default));
+ Assert.Throws(() => new RunEndEncodedType(FloatType.Default, StringType.Default));
+ Assert.Throws(() => new RunEndEncodedType(StringType.Default, StringType.Default));
+
+ // Valid run ends types
+ Assert.NotNull(new RunEndEncodedType(Int16Type.Default, StringType.Default)); // Should not throw
+ Assert.NotNull(new RunEndEncodedType(Int32Type.Default, StringType.Default)); // Should not throw
+ Assert.NotNull(new RunEndEncodedType(Int64Type.Default, StringType.Default)); // Should not throw
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayWithInt32RunEnds()
+ {
+ // Create run ends: [3, 7, 10, 15]
+ // This represents: 3 'A's, 4 'B's, 3 'C's, 5 'D's
+ var runEndsBuilder = new Int32Array.Builder();
+ runEndsBuilder.AppendRange([3, 7, 10, 15]);
+ Int32Array runEnds = runEndsBuilder.Build();
+
+ // Create values: ['A', 'B', 'C', 'D']
+ var valuesBuilder = new StringArray.Builder();
+ valuesBuilder.AppendRange(["A", "B", "C", "D"]);
+ StringArray values = valuesBuilder.Build();
+
+ // Create REE array
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Equal(15, reeArray.Length); // Logical length is the last run end value
+ Assert.Equal(0, reeArray.NullCount); // REE arrays don't have nulls at the top level
+ Assert.Equal(runEnds, reeArray.RunEnds);
+ Assert.Equal(values, reeArray.Values);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayWithInt16RunEnds()
+ {
+ var runEndsBuilder = new Int16Array.Builder();
+ runEndsBuilder.AppendRange([2, 5, 8]);
+ Int16Array runEnds = runEndsBuilder.Build();
+
+ var valuesBuilder = new Int32Array.Builder();
+ valuesBuilder.AppendRange([100, 200, 300]);
+ Int32Array values = valuesBuilder.Build();
+
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Equal(8, reeArray.Length);
+ Assert.Equal(runEnds, reeArray.RunEnds);
+ Assert.Equal(values, reeArray.Values);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayWithInt64RunEnds()
+ {
+ var runEndsBuilder = new Int64Array.Builder();
+ runEndsBuilder.AppendRange([1000, 2000, 3000]);
+ Int64Array runEnds = runEndsBuilder.Build();
+
+ var valuesBuilder = new DoubleArray.Builder();
+ valuesBuilder.AppendRange([1.5, 2.5, 3.5]);
+ DoubleArray values = valuesBuilder.Build();
+
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Equal(3000, reeArray.Length);
+ Assert.Equal(runEnds, reeArray.RunEnds);
+ Assert.Equal(values, reeArray.Values);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayInvalidRunEndsType()
+ {
+ Int8Array invalidRunEnds = new Int8Array.Builder().AppendRange([1, 2, 3]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["A", "B", "C"]).Build();
+
+ Assert.Throws(() => new RunEndEncodedArray(invalidRunEnds, values));
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayEmpty()
+ {
+ Int32Array runEnds = new Int32Array.Builder().Build();
+ StringArray values = new StringArray.Builder().Build();
+
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Equal(0, reeArray.Length);
+ }
+
+ [Fact]
+ public void TestFindPhysicalIndexInt32()
+ {
+ // Run ends: [3, 7, 10, 15] means:
+ // Logical indices 0-2 map to physical index 0 (value 'A')
+ // Logical indices 3-6 map to physical index 1 (value 'B')
+ // Logical indices 7-9 map to physical index 2 (value 'C')
+ // Logical indices 10-14 map to physical index 3 (value 'D')
+ Int32Array runEnds = new Int32Array.Builder()
+ .AppendRange([3, 7, 10, 15])
+ .Build();
+ StringArray values = new StringArray.Builder()
+ .AppendRange(["A", "B", "C", "D"])
+ .Build();
+
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Equal(0, reeArray.FindPhysicalIndex(0));
+ Assert.Equal(0, reeArray.FindPhysicalIndex(1));
+ Assert.Equal(0, reeArray.FindPhysicalIndex(2));
+ Assert.Equal(1, reeArray.FindPhysicalIndex(3));
+ Assert.Equal(1, reeArray.FindPhysicalIndex(4));
+ Assert.Equal(1, reeArray.FindPhysicalIndex(5));
+ Assert.Equal(1, reeArray.FindPhysicalIndex(6));
+ Assert.Equal(2, reeArray.FindPhysicalIndex(7));
+ Assert.Equal(2, reeArray.FindPhysicalIndex(8));
+ Assert.Equal(2, reeArray.FindPhysicalIndex(9));
+ Assert.Equal(3, reeArray.FindPhysicalIndex(10));
+ Assert.Equal(3, reeArray.FindPhysicalIndex(11));
+ Assert.Equal(3, reeArray.FindPhysicalIndex(14));
+ }
+
+ [Fact]
+ public void TestFindPhysicalIndexOutOfRange()
+ {
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["A", "B"]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Throws(() => reeArray.FindPhysicalIndex(-1));
+ Assert.Throws(() => reeArray.FindPhysicalIndex(7));
+ Assert.Throws(() => reeArray.FindPhysicalIndex(100));
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArraySerialization()
+ {
+ // Create a REE array
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7, 10]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["foo", "bar", "baz"]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ // Create a record batch with the REE array
+ var reeField = new Field("ree_column", reeArray.Data.DataType, nullable: false);
+ var schema = new Schema([reeField], null);
+ var recordBatch = new RecordBatch(schema, [reeArray], reeArray.Length);
+
+ // Serialize and deserialize
+ using var stream = new MemoryStream();
+ using (var writer = new ArrowStreamWriter(stream, schema, leaveOpen: true))
+ {
+ writer.WriteRecordBatch(recordBatch);
+ writer.WriteEnd();
+ }
+
+ stream.Position = 0;
+
+ using var reader = new ArrowStreamReader(stream);
+ RecordBatch readBatch = reader.ReadNextRecordBatch();
+
+ Assert.NotNull(readBatch);
+ Assert.Equal(1, readBatch.ColumnCount);
+ Assert.Equal(10, readBatch.Length);
+
+ var readArray = readBatch.Column(0) as RunEndEncodedArray;
+ Assert.NotNull(readArray);
+ Assert.Equal(10, readArray.Length);
+ Assert.Equal(ArrowTypeId.RunEndEncoded, readArray.Data.DataType.TypeId);
+
+ // Verify run ends
+ var readRunEnds = readArray.RunEnds as Int32Array;
+ Assert.NotNull(readRunEnds);
+ Assert.Equal(3, readRunEnds.Length);
+ Assert.Equal(3, readRunEnds.GetValue(0));
+ Assert.Equal(7, readRunEnds.GetValue(1));
+ Assert.Equal(10, readRunEnds.GetValue(2));
+
+ // Verify values
+ var readValues = readArray.Values as StringArray;
+ Assert.NotNull(readValues);
+ Assert.Equal(3, readValues.Length);
+ Assert.Equal("foo", readValues.GetString(0));
+ Assert.Equal("bar", readValues.GetString(1));
+ Assert.Equal("baz", readValues.GetString(2));
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayWithDifferentValueTypes()
+ {
+ // Test with boolean values
+ Int32Array runEnds1 = new Int32Array.Builder().AppendRange([5, 10]).Build();
+ BooleanArray values1 = new BooleanArray.Builder().AppendRange([true, false]).Build();
+ var reeArray1 = new RunEndEncodedArray(runEnds1, values1);
+ Assert.Equal(10, reeArray1.Length);
+
+ // Test with double values
+ Int32Array runEnds2 = new Int32Array.Builder().AppendRange([3, 8]).Build();
+ DoubleArray values2 = new DoubleArray.Builder().AppendRange([1.5, 2.5]).Build();
+ var reeArray2 = new RunEndEncodedArray(runEnds2, values2);
+ Assert.Equal(8, reeArray2.Length);
+
+ // Test with list values
+ var listBuilder = new ListArray.Builder(Int32Type.Default);
+ var int32Builder = (Int32Array.Builder)listBuilder.ValueBuilder;
+ listBuilder.Append();
+ int32Builder.Append(1);
+ int32Builder.Append(2);
+ listBuilder.Append();
+ int32Builder.Append(3);
+ int32Builder.Append(4);
+ ListArray listValues = listBuilder.Build();
+
+ Int32Array runEnds3 = new Int32Array.Builder().AppendRange([2, 5]).Build();
+ var reeArray3 = new RunEndEncodedArray(runEnds3, listValues);
+ Assert.Equal(5, reeArray3.Length);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayFromArrayData()
+ {
+ // Create arrays
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([2, 5]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["X", "Y"]).Build();
+
+ // Create ArrayData manually
+ var reeType = new RunEndEncodedType(Int32Type.Default, StringType.Default);
+ var arrayData = new ArrayData(
+ reeType,
+ length: 5,
+ nullCount: 0,
+ offset: 0,
+ buffers: [],
+ children: [runEnds.Data, values.Data]);
+
+ // Create REE array from ArrayData
+ var reeArray = new RunEndEncodedArray(arrayData);
+
+ Assert.Equal(5, reeArray.Length);
+ Assert.Equal(0, reeArray.NullCount);
+ Assert.IsType(reeArray.RunEnds);
+ Assert.IsType(reeArray.Values);
+ }
+
+ [Fact]
+ public void TestRunEndEncodedArrayFactoryBuild()
+ {
+ // Test that ArrowArrayFactory can build REE arrays
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 6]).Build();
+ Int64Array values = new Int64Array.Builder().AppendRange([100, 200]).Build();
+
+ var reeType = new RunEndEncodedType(Int32Type.Default, Int64Type.Default);
+ var arrayData = new ArrayData(
+ reeType,
+ length: 6,
+ nullCount: 0,
+ offset: 0,
+ buffers: [],
+ children: [runEnds.Data, values.Data]);
+
+ IArrowArray array = ArrowArrayFactory.BuildArray(arrayData);
+
+ Assert.IsType(array);
+ var reeArray = (RunEndEncodedArray)array;
+ Assert.Equal(6, reeArray.Length);
+ }
+}