From a9f48e34506a2ad9964b34394f776e65b894bda2 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 20 Mar 2025 18:32:01 -0400 Subject: [PATCH 1/5] feat(arrow/memory): experimenting with addcleanup --- arrow/array/array_test.go | 277 +++++++++++++------------- arrow/array/binary_test.go | 73 +++---- arrow/memory/buffer.go | 16 +- arrow/memory/buffer_cleanup.go | 27 +++ arrow/memory/buffer_cleanup_go1_23.go | 27 +++ arrow/memory/buffer_test.go | 59 +++--- 6 files changed, 282 insertions(+), 197 deletions(-) create mode 100644 arrow/memory/buffer_cleanup.go create mode 100644 arrow/memory/buffer_cleanup_go1_23.go diff --git a/arrow/array/array_test.go b/arrow/array/array_test.go index 9509e314..dab77225 100644 --- a/arrow/array/array_test.go +++ b/arrow/array/array_test.go @@ -17,6 +17,7 @@ package array_test import ( + "runtime" "testing" "github.com/apache/arrow-go/v18/arrow" @@ -202,147 +203,153 @@ func TestArraySlice(t *testing.T) { vs = []float64{1, 2, 3, 0, 4, 5} ) - b := array.NewFloat64Builder(pool) - defer b.Release() - - for _, tc := range []struct { - i, j int - panics bool - len int - }{ - {i: 0, j: len(valids), panics: false, len: len(valids)}, - {i: len(valids), j: len(valids), panics: false, len: 0}, - {i: 0, j: 1, panics: false, len: 1}, - {i: 1, j: 1, panics: false, len: 0}, - {i: 0, j: len(valids) + 1, panics: true}, - {i: 2, j: 1, panics: true}, - {i: len(valids) + 1, j: len(valids) + 1, panics: true}, - } { - t.Run("", func(t *testing.T) { - b.AppendValues(vs, valids) - - arr := b.NewFloat64Array() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if tc.panics { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not") - } - }() - } - - slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64) - defer slice.Release() - - if got, want := slice.Len(), tc.len; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - }) + { + b := array.NewFloat64Builder(pool) + defer b.Release() + + for _, tc := range []struct { + i, j int + panics bool + len int + }{ + {i: 0, j: len(valids), panics: false, len: len(valids)}, + {i: len(valids), j: len(valids), panics: false, len: 0}, + {i: 0, j: 1, panics: false, len: 1}, + {i: 1, j: 1, panics: false, len: 0}, + {i: 0, j: len(valids) + 1, panics: true}, + {i: 2, j: 1, panics: true}, + {i: len(valids) + 1, j: len(valids) + 1, panics: true}, + } { + t.Run("", func(t *testing.T) { + b.AppendValues(vs, valids) + + arr := b.NewFloat64Array() + defer arr.Release() + + if got, want := arr.Len(), len(valids); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + if tc.panics { + defer func() { + e := recover() + if e == nil { + t.Fatalf("this should have panicked, but did not") + } + }() + } + + slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64) + defer slice.Release() + + if got, want := slice.Len(), tc.len; got != want { + t.Fatalf("invalid slice length: got=%d, want=%d", got, want) + } + }) + } } + runtime.GC() } func TestArraySliceTypes(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) - valids := []bool{true, true, true, false, true, true} - - for _, tc := range []struct { - values interface{} - builder array.Builder - append func(b array.Builder, vs interface{}) - }{ - { - values: []bool{true, false, true, false, true, false}, - builder: array.NewBooleanBuilder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) }, - }, - { - values: []uint8{1, 2, 3, 0, 4, 5}, - builder: array.NewUint8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) }, - }, - { - values: []uint16{1, 2, 3, 0, 4, 5}, - builder: array.NewUint16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) }, - }, - { - values: []uint32{1, 2, 3, 0, 4, 5}, - builder: array.NewUint32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) }, - }, - { - values: []uint64{1, 2, 3, 0, 4, 5}, - builder: array.NewUint64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) }, - }, - { - values: []int8{1, 2, 3, 0, 4, 5}, - builder: array.NewInt8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) }, - }, - { - values: []int16{1, 2, 3, 0, 4, 5}, - builder: array.NewInt16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) }, - }, - { - values: []int32{1, 2, 3, 0, 4, 5}, - builder: array.NewInt32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) }, - }, - { - values: []int64{1, 2, 3, 0, 4, 5}, - builder: array.NewInt64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) }, - }, - { - values: []float32{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) }, - }, - { - values: []float64{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) }, - }, - } { - t.Run("", func(t *testing.T) { - defer tc.builder.Release() - - b := tc.builder - tc.append(b, tc.values) - - arr := b.NewArray() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("invalid length: got=%d, want=%d", got, want) - } - - slice := array.NewSlice(arr, 2, 5) - defer slice.Release() - - if got, want := slice.Len(), 3; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - - shortSlice := array.NewSlice(arr, 2, 3) - defer shortSlice.Release() - - sliceOfShortSlice := array.NewSlice(shortSlice, 0, 1) - defer sliceOfShortSlice.Release() - - if got, want := sliceOfShortSlice.Len(), 1; got != want { - t.Fatalf("invalid short slice length: got=%d, want=%d", got, want) - } - }) + { + valids := []bool{true, true, true, false, true, true} + + for _, tc := range []struct { + values interface{} + builder array.Builder + append func(b array.Builder, vs interface{}) + }{ + { + values: []bool{true, false, true, false, true, false}, + builder: array.NewBooleanBuilder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) }, + }, + { + values: []uint8{1, 2, 3, 0, 4, 5}, + builder: array.NewUint8Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) }, + }, + { + values: []uint16{1, 2, 3, 0, 4, 5}, + builder: array.NewUint16Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) }, + }, + { + values: []uint32{1, 2, 3, 0, 4, 5}, + builder: array.NewUint32Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) }, + }, + { + values: []uint64{1, 2, 3, 0, 4, 5}, + builder: array.NewUint64Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) }, + }, + { + values: []int8{1, 2, 3, 0, 4, 5}, + builder: array.NewInt8Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) }, + }, + { + values: []int16{1, 2, 3, 0, 4, 5}, + builder: array.NewInt16Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) }, + }, + { + values: []int32{1, 2, 3, 0, 4, 5}, + builder: array.NewInt32Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) }, + }, + { + values: []int64{1, 2, 3, 0, 4, 5}, + builder: array.NewInt64Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) }, + }, + { + values: []float32{1, 2, 3, 0, 4, 5}, + builder: array.NewFloat32Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) }, + }, + { + values: []float64{1, 2, 3, 0, 4, 5}, + builder: array.NewFloat64Builder(pool), + append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) }, + }, + } { + t.Run("", func(t *testing.T) { + defer tc.builder.Release() + + b := tc.builder + tc.append(b, tc.values) + + arr := b.NewArray() + defer arr.Release() + + if got, want := arr.Len(), len(valids); got != want { + t.Fatalf("invalid length: got=%d, want=%d", got, want) + } + + slice := array.NewSlice(arr, 2, 5) + defer slice.Release() + + if got, want := slice.Len(), 3; got != want { + t.Fatalf("invalid slice length: got=%d, want=%d", got, want) + } + + shortSlice := array.NewSlice(arr, 2, 3) + defer shortSlice.Release() + + sliceOfShortSlice := array.NewSlice(shortSlice, 0, 1) + defer sliceOfShortSlice.Release() + + if got, want := sliceOfShortSlice.Len(), 1; got != want { + t.Fatalf("invalid short slice length: got=%d, want=%d", got, want) + } + }) + } } + runtime.GC() } diff --git a/arrow/array/binary_test.go b/arrow/array/binary_test.go index 15cf8692..75ba5c18 100644 --- a/arrow/array/binary_test.go +++ b/arrow/array/binary_test.go @@ -18,6 +18,7 @@ package array import ( "reflect" + "runtime" "testing" "github.com/apache/arrow-go/v18/arrow" @@ -29,43 +30,45 @@ import ( func TestBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) + { + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - values := [][]byte{ - []byte("AAA"), - nil, - []byte("BBBB"), + values := [][]byte{ + []byte("AAA"), + nil, + []byte("BBBB"), + } + valid := []bool{true, false, true} + b.AppendValues(values, valid) + + b.Retain() + b.Release() + + a := b.NewBinaryArray() + assert.Equal(t, 3, a.Len()) + assert.Equal(t, 1, a.NullN()) + assert.Equal(t, []byte("AAA"), a.Value(0)) + assert.Equal(t, []byte{}, a.Value(1)) + assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, NullValueStr, a.ValueStr(1)) + a.Release() + + // Test builder reset and NewArray API. + b.AppendValues(values, valid) + a = b.NewArray().(*Binary) + assert.Equal(t, 3, a.Len()) + assert.Equal(t, 1, a.NullN()) + assert.Equal(t, []byte("AAA"), a.Value(0)) + assert.Equal(t, []byte{}, a.Value(1)) + assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, NullValueStr, a.ValueStr(1)) + a.Release() + + b.Release() } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - - b.Retain() - b.Release() - - a := b.NewBinaryArray() - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*Binary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - b.Release() + runtime.GC() } func TestLargeBinary(t *testing.T) { diff --git a/arrow/memory/buffer.go b/arrow/memory/buffer.go index 592da70c..c059ef1b 100644 --- a/arrow/memory/buffer.go +++ b/arrow/memory/buffer.go @@ -31,6 +31,8 @@ type Buffer struct { mem Allocator parent *Buffer + + cleanupHndl cleanup } // NewBufferWithAllocator returns a buffer with the mutable flag set @@ -85,6 +87,7 @@ func (b *Buffer) Release() { if b.refCount.Add(-1) == 0 { if b.mem != nil { + b.cleanupHndl.Stop() b.mem.Free(b.buf) } else { b.parent.Release() @@ -124,11 +127,15 @@ func (b *Buffer) Cap() int { return len(b.buf) } func (b *Buffer) Reserve(capacity int) { if capacity > len(b.buf) { newCap := roundUpToMultipleOf64(capacity) + var buf []byte if len(b.buf) == 0 { - b.buf = b.mem.Allocate(newCap) + buf = b.mem.Allocate(newCap) } else { - b.buf = b.mem.Reallocate(newCap, b.buf) + b.cleanupHndl.Stop() + buf = b.mem.Reallocate(newCap, b.buf) } + b.cleanupHndl = addCleanup(b, b.mem.Free, buf) + b.buf = buf } } @@ -151,11 +158,14 @@ func (b *Buffer) resize(newSize int, shrink bool) { // excess space. newCap := roundUpToMultipleOf64(newSize) if len(b.buf) != newCap { + b.cleanupHndl.Stop() if newSize == 0 { b.mem.Free(b.buf) b.buf = nil } else { - b.buf = b.mem.Reallocate(newCap, b.buf) + buf := b.mem.Reallocate(newCap, b.buf) + b.cleanupHndl = addCleanup(b, b.mem.Free, buf) + b.buf = buf } } } diff --git a/arrow/memory/buffer_cleanup.go b/arrow/memory/buffer_cleanup.go new file mode 100644 index 00000000..85e3a222 --- /dev/null +++ b/arrow/memory/buffer_cleanup.go @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.24 + +package memory + +import "runtime" + +type cleanup = runtime.Cleanup + +func addCleanup[T, S any](ptr *T, cleanup func(S), arg S) cleanup { + return runtime.AddCleanup(ptr, cleanup, arg) +} diff --git a/arrow/memory/buffer_cleanup_go1_23.go b/arrow/memory/buffer_cleanup_go1_23.go new file mode 100644 index 00000000..a11a67bb --- /dev/null +++ b/arrow/memory/buffer_cleanup_go1_23.go @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !go1.24 + +package memory + +type cleanup struct{} + +func (cleanup) Stop() {} + +func addCleanup[T, S any](*T, func(S), S) cleanup { + return cleanup{} +} diff --git a/arrow/memory/buffer_test.go b/arrow/memory/buffer_test.go index f8dccf0b..7dbb93bf 100644 --- a/arrow/memory/buffer_test.go +++ b/arrow/memory/buffer_test.go @@ -17,6 +17,7 @@ package memory_test import ( + "runtime" "testing" "github.com/apache/arrow-go/v18/arrow/memory" @@ -27,45 +28,55 @@ func TestNewResizableBuffer(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) - buf := memory.NewResizableBuffer(mem) - buf.Retain() // refCount == 2 + { + buf := memory.NewResizableBuffer(mem) + buf.Retain() // refCount == 2 - exp := 10 - buf.Resize(exp) - assert.NotNil(t, buf.Bytes()) - assert.Equal(t, exp, len(buf.Bytes())) - assert.Equal(t, exp, buf.Len()) + exp := 10 + buf.Resize(exp) + assert.NotNil(t, buf.Bytes()) + assert.Equal(t, exp, len(buf.Bytes())) + assert.Equal(t, exp, buf.Len()) - buf.Release() // refCount == 1 - assert.NotNil(t, buf.Bytes()) + buf.Release() // refCount == 1 + assert.NotNil(t, buf.Bytes()) - buf.Release() // refCount == 0 - assert.Nil(t, buf.Bytes()) - assert.Zero(t, buf.Len()) + buf.Release() // refCount == 0 + assert.Nil(t, buf.Bytes()) + assert.Zero(t, buf.Len()) + } + runtime.GC() } func TestBufferReset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) - buf := memory.NewResizableBuffer(mem) + { + buf := memory.NewResizableBuffer(mem) - newBytes := []byte("some-new-bytes") - buf.Reset(newBytes) - assert.Equal(t, newBytes, buf.Bytes()) - assert.Equal(t, len(newBytes), buf.Len()) + newBytes := []byte("some-new-bytes") + buf.Reset(newBytes) + assert.Equal(t, newBytes, buf.Bytes()) + assert.Equal(t, len(newBytes), buf.Len()) + } + runtime.GC() } func TestBufferSlice(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) - buf := memory.NewResizableBuffer(mem) - buf.Resize(1024) - assert.Equal(t, 1024, mem.CurrentAlloc()) + { + buf := memory.NewResizableBuffer(mem) + buf.Resize(1024) + assert.Equal(t, 1024, mem.CurrentAlloc()) - slice := memory.SliceBuffer(buf, 512, 256) - buf.Release() - assert.Equal(t, 1024, mem.CurrentAlloc()) - slice.Release() + slice := memory.SliceBuffer(buf, 512, 256) + buf.Release() + assert.Equal(t, 1024, mem.CurrentAlloc()) + assert.Equal(t, 256, slice.Len()) + slice.Release() + } + runtime.GC() } From 4072fa8f4874cdc26b34372f9cc833e4141fb7ed Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 20 Mar 2025 18:40:02 -0400 Subject: [PATCH 2/5] use RecordEqual and ArrayEqual --- arrow/extensions/bool8_test.go | 4 ++-- arrow/extensions/uuid_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow/extensions/bool8_test.go b/arrow/extensions/bool8_test.go index e9c6ebbc..6d9dcbf6 100644 --- a/arrow/extensions/bool8_test.go +++ b/arrow/extensions/bool8_test.go @@ -60,7 +60,7 @@ func TestBool8ExtensionBuilder(t *testing.T) { require.NoError(t, err) defer arr1.Release() - require.Equal(t, arr, arr1) + require.True(t, array.Equal(arr, arr1)) } func TestBool8ExtensionRecordBuilder(t *testing.T) { @@ -83,7 +83,7 @@ func TestBool8ExtensionRecordBuilder(t *testing.T) { require.NoError(t, err) defer record1.Release() - require.Equal(t, record, record1) + require.True(t, array.RecordEqual(record, record1)) require.NoError(t, builder.UnmarshalJSON([]byte(`{"bool8":true}`))) record = builder.NewRecordBatch() diff --git a/arrow/extensions/uuid_test.go b/arrow/extensions/uuid_test.go index a76b77a9..df1f62e8 100644 --- a/arrow/extensions/uuid_test.go +++ b/arrow/extensions/uuid_test.go @@ -74,7 +74,7 @@ func TestUUIDExtensionRecordBuilder(t *testing.T) { require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n,{\"uuid\":null}\n,{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b)) record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b)) require.NoError(t, err) - require.Equal(t, record, record1) + require.True(t, array.RecordEqual(record, record1)) } func TestUUIDStringRoundTrip(t *testing.T) { From 5c1a547e1a56449e702be2bcaad1a7de27cec591 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 20 Mar 2025 19:00:22 -0400 Subject: [PATCH 3/5] fix arraydata equal func --- arrow/array.go | 2 ++ arrow/array/data.go | 54 +++++++++++++++++++++++++++++++++ arrow/compute/exec/span_test.go | 3 +- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/arrow/array.go b/arrow/array.go index d42ca6d0..8902cec9 100644 --- a/arrow/array.go +++ b/arrow/array.go @@ -83,6 +83,8 @@ type ArrayData interface { Dictionary() ArrayData // SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes. SizeInBytes() uint64 + + Equal(ArrayData) bool } // Array represents an immutable sequence of values using the Arrow in-memory format. diff --git a/arrow/array/data.go b/arrow/array/data.go index 6dafd8a9..5df7178a 100644 --- a/arrow/array/data.go +++ b/arrow/array/data.go @@ -17,6 +17,7 @@ package array import ( + "bytes" "hash/maphash" "math/bits" "sync/atomic" @@ -78,6 +79,59 @@ func NewDataWithDictionary(dtype arrow.DataType, length int, buffers []*memory.B return data } +func (d *Data) Equal(other arrow.ArrayData) bool { + rhs, ok := other.(*Data) + if !ok { + return false + } + + if d == rhs { + return true + } + + switch { + case !arrow.TypeEqual(d.dtype, rhs.dtype): + return false + case d.length != rhs.length || d.nulls != rhs.nulls || d.offset != rhs.offset: + return false + case len(d.buffers) != len(rhs.buffers): + return false + case len(d.childData) != len(rhs.childData): + return false + case d.dictionary != nil && rhs.dictionary == nil: + return false + case d.dictionary == nil && rhs.dictionary != nil: + return false + } + + if d.dictionary != nil { + if !d.dictionary.Equal(rhs.dictionary) { + return false + } + } + + for i := range d.childData { + if !d.childData[i].Equal(rhs.childData[i]) { + return false + } + } + + for i, b := range d.buffers { + switch { + case b == nil: + if rhs.buffers[i] != nil { + return false + } + case rhs.buffers[i] == nil: + return false + case !bytes.Equal(b.Bytes(), rhs.buffers[i].Bytes()): + return false + } + } + + return true +} + func (d *Data) Copy() *Data { // don't pass the slices directly, otherwise it retains the connection // we need to make new slices and populate them with the same pointers diff --git a/arrow/compute/exec/span_test.go b/arrow/compute/exec/span_test.go index 6b93da7d..263c3d7c 100644 --- a/arrow/compute/exec/span_test.go +++ b/arrow/compute/exec/span_test.go @@ -406,7 +406,8 @@ func TestArraySpan_MakeData(t *testing.T) { } got := a.MakeData() want := tt.want(mem) - if !reflect.DeepEqual(got, want) { + + if !got.Equal(want) { t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) } want.Release() From 5de26d6a5acf768c874583f2e4d527111a11116b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 3 Nov 2025 15:33:26 -0500 Subject: [PATCH 4/5] upgrade tinygo version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d062e2ed..50324566 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -316,7 +316,7 @@ jobs: name: TinyGo runs-on: ubuntu-latest env: - TINYGO_VERSION: 0.38.0 + TINYGO_VERSION: 0.39.0 timeout-minutes: 20 steps: - name: Checkout From 3191f1b488c031062d568e708b7321a3fea8a5dc Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 3 Nov 2025 15:37:39 -0500 Subject: [PATCH 5/5] fix tinygo handling --- .github/workflows/test.yml | 2 +- arrow/memory/buffer_cleanup.go | 2 +- arrow/memory/buffer_cleanup_go1_23.go | 2 +- parquet/metadata/cleanup_bloom_filter.go | 2 +- parquet/metadata/cleanup_bloom_filter_go1.23.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 50324566..d062e2ed 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -316,7 +316,7 @@ jobs: name: TinyGo runs-on: ubuntu-latest env: - TINYGO_VERSION: 0.39.0 + TINYGO_VERSION: 0.38.0 timeout-minutes: 20 steps: - name: Checkout diff --git a/arrow/memory/buffer_cleanup.go b/arrow/memory/buffer_cleanup.go index 85e3a222..c529bb72 100644 --- a/arrow/memory/buffer_cleanup.go +++ b/arrow/memory/buffer_cleanup.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.24 +//go:build go1.24 && !tinygo package memory diff --git a/arrow/memory/buffer_cleanup_go1_23.go b/arrow/memory/buffer_cleanup_go1_23.go index a11a67bb..950d0f56 100644 --- a/arrow/memory/buffer_cleanup_go1_23.go +++ b/arrow/memory/buffer_cleanup_go1_23.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !go1.24 +//go:build !go1.24 || tinygo package memory diff --git a/parquet/metadata/cleanup_bloom_filter.go b/parquet/metadata/cleanup_bloom_filter.go index ed835c3b..4779e686 100644 --- a/parquet/metadata/cleanup_bloom_filter.go +++ b/parquet/metadata/cleanup_bloom_filter.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.24 +//go:build go1.24 && !tinygo package metadata diff --git a/parquet/metadata/cleanup_bloom_filter_go1.23.go b/parquet/metadata/cleanup_bloom_filter_go1.23.go index b4bffbe7..c7007727 100644 --- a/parquet/metadata/cleanup_bloom_filter_go1.23.go +++ b/parquet/metadata/cleanup_bloom_filter_go1.23.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !go1.24 +//go:build !go1.24 || tinygo package metadata