Skip to content

Commit 384ea25

Browse files
authored
GH-49071: [Ruby] Add support for writing list and large list arrays (#49072)
### Rationale for this change They use different offset size. ### What changes are included in this PR? * Add `ArrowFormat::ListType#to_flatbuffers` * Add `ArrowFormat::LargeListType#to_flatbuffers` * Add `ArrowFormat::VariableSizeListArray#child` * Add `ArrowFormat::VariableSizeListArray#each_buffer` * `garrow_array_get_null_bitmap()` returns `NULL` when null bitmap doesn't exist * Add `garrow_list_array_get_value_offsets_buffer()` * Add `garrow_large_list_array_get_value_offsets_buffer()` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #49071 Authored-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent acb6288 commit 384ea25

File tree

8 files changed

+123
-4
lines changed

8 files changed

+123
-4
lines changed

c_glib/arrow-glib/basic-array.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,11 @@ garrow_array_get_null_bitmap(GArrowArray *array)
11141114

11151115
auto arrow_array = garrow_array_get_raw(array);
11161116
auto arrow_null_bitmap = arrow_array->null_bitmap();
1117-
return garrow_buffer_new_raw(&arrow_null_bitmap);
1117+
if (arrow_null_bitmap) {
1118+
return garrow_buffer_new_raw(&arrow_null_bitmap);
1119+
} else {
1120+
return nullptr;
1121+
}
11181122
}
11191123

11201124
/**

c_glib/arrow-glib/composite-array.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,22 @@ garrow_base_list_array_get_value_offsets(GArrowArray *array, gint64 *n_offsets)
188188
return arrow_list_array->raw_value_offsets();
189189
};
190190

191+
template <typename LIST_ARRAY_CLASS>
192+
GArrowBuffer *
193+
garrow_base_list_array_get_value_offsets_buffer(GArrowArray *array)
194+
{
195+
GArrowBuffer *buffer = nullptr;
196+
g_object_get(array, "buffer1", &buffer, nullptr);
197+
if (buffer) {
198+
return buffer;
199+
}
200+
201+
auto arrow_array = garrow_array_get_raw(array);
202+
auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
203+
auto arrow_buffer = arrow_list_array->value_offsets();
204+
return garrow_buffer_new_raw(&arrow_buffer);
205+
};
206+
191207
G_BEGIN_DECLS
192208

193209
static void
@@ -385,6 +401,21 @@ garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets)
385401
n_offsets);
386402
}
387403

404+
/**
405+
* garrow_list_array_get_value_offsets_buffer:
406+
* @array: A #GArrowListArray.
407+
*
408+
* Returns: (transfer full) (nullable): The value offsets buffer.
409+
*
410+
* Since: 24.0.0
411+
*/
412+
GArrowBuffer *
413+
garrow_list_array_get_value_offsets_buffer(GArrowListArray *array)
414+
{
415+
return garrow_base_list_array_get_value_offsets_buffer<arrow::ListArray>(
416+
GARROW_ARRAY(array));
417+
}
418+
388419
typedef struct GArrowLargeListArrayPrivate_
389420
{
390421
GArrowArray *raw_values;
@@ -602,6 +633,21 @@ garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n
602633
return reinterpret_cast<const gint64 *>(value_offsets);
603634
}
604635

636+
/**
637+
* garrow_large_list_array_get_value_offsets_buffer:
638+
* @array: A #GArrowLargeListArray.
639+
*
640+
* Returns: (transfer full) (nullable): The value offsets buffer.
641+
*
642+
* Since: 24.0.0
643+
*/
644+
GArrowBuffer *
645+
garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array)
646+
{
647+
return garrow_base_list_array_get_value_offsets_buffer<arrow::LargeListArray>(
648+
GARROW_ARRAY(array));
649+
}
650+
605651
typedef struct GArrowFixedSizeListArrayPrivate_
606652
{
607653
GArrowArray *raw_values;

c_glib/arrow-glib/composite-array.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ GARROW_AVAILABLE_IN_2_0
6868
const gint32 *
6969
garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets);
7070

71+
GARROW_AVAILABLE_IN_24_0
72+
GArrowBuffer *
73+
garrow_list_array_get_value_offsets_buffer(GArrowListArray *array);
74+
7175
#define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type())
7276
GARROW_AVAILABLE_IN_0_16
7377
G_DECLARE_DERIVABLE_TYPE(
@@ -110,6 +114,10 @@ GARROW_AVAILABLE_IN_2_0
110114
const gint64 *
111115
garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets);
112116

117+
GARROW_AVAILABLE_IN_24_0
118+
GArrowBuffer *
119+
garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array);
120+
113121
#define GARROW_TYPE_FIXED_SIZE_LIST_ARRAY (garrow_fixed_size_list_array_get_type())
114122
GARROW_AVAILABLE_IN_23_0
115123
G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeListArray,

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,20 @@ class Decimal256Array < DecimalArray
370370
end
371371

372372
class VariableSizeListArray < Array
373+
attr_reader :child
373374
def initialize(type, size, validity_buffer, offsets_buffer, child)
374375
super(type, size, validity_buffer)
375376
@offsets_buffer = offsets_buffer
376377
@child = child
377378
end
378379

380+
def each_buffer(&block)
381+
return to_enum(__method__) unless block_given?
382+
383+
yield(@validity_buffer)
384+
yield(@offsets_buffer)
385+
end
386+
379387
def to_a
380388
child_values = @child.to_a
381389
values = @offsets_buffer.

ruby/red-arrow-format/lib/arrow-format/field.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ def to_flatbuffers
4949
else
5050
fb_field.type = @type.to_flatbuffers
5151
end
52-
if @type.respond_to?(:children)
52+
if @type.respond_to?(:child)
53+
fb_field.children = [@type.child.to_flatbuffers]
54+
elsif @type.respond_to?(:children)
5355
fb_field.children = @type.children.collect(&:to_flatbuffers)
5456
end
5557
# fb_field.custom_metadata = @custom_metadata

ruby/red-arrow-format/lib/arrow-format/record-batch.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ def all_columns_enumerator
7070
Enumerator.new do |yielder|
7171
traverse = lambda do |array|
7272
yielder << array
73-
if array.respond_to?(:children)
73+
if array.respond_to?(:child)
74+
traverse.call(array.child)
75+
elsif array.respond_to?(:children)
7476
array.children.each do |child_array|
7577
traverse.call(child_array)
7678
end

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,6 @@ def initialize(child)
707707
super()
708708
@child = child
709709
end
710-
711710
end
712711

713712
class ListType < VariableSizeListType
@@ -718,6 +717,10 @@ def name
718717
def build_array(size, validity_buffer, offsets_buffer, child)
719718
ListArray.new(self, size, validity_buffer, offsets_buffer, child)
720719
end
720+
721+
def to_flatbuffers
722+
FB::List::Data.new
723+
end
721724
end
722725

723726
class LargeListType < VariableSizeListType
@@ -728,6 +731,10 @@ def name
728731
def build_array(size, validity_buffer, offsets_buffer, child)
729732
LargeListArray.new(self, size, validity_buffer, offsets_buffer, child)
730733
end
734+
735+
def to_flatbuffers
736+
FB::LargeList::Data.new
737+
end
731738
end
732739

733740
class StructType < Type

ruby/red-arrow-format/test/test-writer.rb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,22 @@ def convert_type(red_arrow_type)
8383
red_arrow_type.scale)
8484
when Arrow::FixedSizeBinaryDataType
8585
ArrowFormat::FixedSizeBinaryType.new(red_arrow_type.byte_width)
86+
when Arrow::ListDataType
87+
ArrowFormat::ListType.new(convert_field(red_arrow_type.field))
88+
when Arrow::LargeListDataType
89+
ArrowFormat::LargeListType.new(convert_field(red_arrow_type.field))
8690
else
8791
raise "Unsupported type: #{red_arrow_type.inspect}"
8892
end
8993
end
9094

95+
def convert_field(red_arrow_field)
96+
ArrowFormat::Field.new(red_arrow_field.name,
97+
convert_type(red_arrow_field.data_type),
98+
red_arrow_field.nullable?,
99+
nil)
100+
end
101+
91102
def convert_buffer(buffer)
92103
return nil if buffer.nil?
93104
IO::Buffer.for(buffer.data.to_s)
@@ -111,6 +122,11 @@ def convert_array(red_arrow_array)
111122
type.build_array(red_arrow_array.size,
112123
convert_buffer(red_arrow_array.null_bitmap),
113124
convert_buffer(red_arrow_array.data_buffer))
125+
when ArrowFormat::VariableSizeListType
126+
type.build_array(red_arrow_array.size,
127+
convert_buffer(red_arrow_array.null_bitmap),
128+
convert_buffer(red_arrow_array.value_offsets_buffer),
129+
convert_array(red_arrow_array.values_raw))
114130
else
115131
raise "Unsupported array #{red_arrow_array.inspect}"
116132
end
@@ -706,6 +722,32 @@ def test_write
706722
@values)
707723
end
708724
end
725+
726+
sub_test_case("List") do
727+
def build_array
728+
data_type = Arrow::ListDataType.new(name: "count", type: :int8)
729+
Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
730+
end
731+
732+
def test_write
733+
assert_equal([[-128, 127], nil, [-1, 0, 1]],
734+
@values)
735+
end
736+
end
737+
738+
sub_test_case("LargeList") do
739+
def build_array
740+
data_type = Arrow::LargeListDataType.new(name: "count",
741+
type: :int8)
742+
Arrow::LargeListArray.new(data_type,
743+
[[-128, 127], nil, [-1, 0, 1]])
744+
end
745+
746+
def test_write
747+
assert_equal([[-128, 127], nil, [-1, 0, 1]],
748+
@values)
749+
end
750+
end
709751
end
710752
end
711753
end

0 commit comments

Comments
 (0)