Skip to content

Commit 3a0d927

Browse files
committed
rebase, introduce arrow-variant module, add complex tests
1 parent 27ee846 commit 3a0d927

File tree

15 files changed

+1287
-6
lines changed

15 files changed

+1287
-6
lines changed

arrow-variant/pom.xml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one
4+
or more contributor license agreements. See the NOTICE file
5+
distributed with this work for additional information
6+
regarding copyright ownership. The ASF licenses this file
7+
to you under the Apache License, Version 2.0 (the
8+
"License"); you may not use this file except in compliance
9+
with the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing,
14+
software distributed under the License is distributed on an
15+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
KIND, either express or implied. See the License for the
17+
specific language governing permissions and limitations
18+
under the License.
19+
-->
20+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>org.apache.arrow</groupId>
24+
<artifactId>arrow-java-root</artifactId>
25+
<version>19.0.0-SNAPSHOT</version>
26+
</parent>
27+
<artifactId>arrow-variant</artifactId>
28+
<name>Arrow Variant</name>
29+
<description>Arrow Variant type support.</description>
30+
31+
<dependencies>
32+
<dependency>
33+
<groupId>org.apache.arrow</groupId>
34+
<artifactId>arrow-memory-core</artifactId>
35+
</dependency>
36+
<dependency>
37+
<groupId>org.apache.parquet</groupId>
38+
<artifactId>parquet-variant</artifactId>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.apache.arrow</groupId>
42+
<artifactId>arrow-memory-unsafe</artifactId>
43+
<scope>test</scope>
44+
</dependency>
45+
</dependencies>
46+
47+
<build>
48+
<plugins>
49+
<plugin>
50+
<groupId>org.apache.maven.plugins</groupId>
51+
<artifactId>maven-jar-plugin</artifactId>
52+
<executions>
53+
<execution>
54+
<goals>
55+
<goal>test-jar</goal>
56+
</goals>
57+
</execution>
58+
</executions>
59+
</plugin>
60+
</plugins>
61+
</build>
62+
</project>
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
@SuppressWarnings("requires-automatic")
19+
module org.apache.arrow.variant {
20+
exports org.apache.arrow.vector.variant;
21+
22+
requires org.apache.arrow.memory.core;
23+
requires parquet.variant;
24+
}
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.variant;
18+
19+
import java.math.BigDecimal;
20+
import java.nio.ByteBuffer;
21+
import java.util.Objects;
22+
import java.util.UUID;
23+
import org.apache.arrow.memory.ArrowBuf;
24+
25+
/**
26+
* Wrapper around parquet-variant's Variant implementation.
27+
*
28+
* <p>This wrapper exists to isolate the parquet-variant dependency from Arrow's public API,
29+
* allowing the vector module to expose variant functionality without requiring users to depend on
30+
* parquet-variant directly. It also ensures that nested variant values (from arrays and objects)
31+
* are consistently wrapped.
32+
*/
33+
public class Variant {
34+
35+
private final org.apache.parquet.variant.Variant delegate;
36+
37+
/** Creates a Variant from raw metadata and value byte arrays. */
38+
public Variant(byte[] metadata, byte[] value) {
39+
this.delegate = new org.apache.parquet.variant.Variant(value, metadata);
40+
}
41+
42+
/** Creates a Variant by copying data from ArrowBuf instances. */
43+
public Variant(
44+
ArrowBuf metadataBuffer,
45+
int metadataStart,
46+
int metadataEnd,
47+
ArrowBuf valueBuffer,
48+
int valueStart,
49+
int valueEnd) {
50+
byte[] metadata = new byte[metadataEnd - metadataStart];
51+
byte[] value = new byte[valueEnd - valueStart];
52+
metadataBuffer.getBytes(metadataStart, metadata);
53+
valueBuffer.getBytes(valueStart, value);
54+
this.delegate = new org.apache.parquet.variant.Variant(value, metadata);
55+
}
56+
57+
private Variant(org.apache.parquet.variant.Variant delegate) {
58+
this.delegate = delegate;
59+
}
60+
61+
public ByteBuffer getValueBuffer() {
62+
return delegate.getValueBuffer();
63+
}
64+
65+
public ByteBuffer getMetadataBuffer() {
66+
return delegate.getMetadataBuffer();
67+
}
68+
69+
public boolean getBoolean() {
70+
return delegate.getBoolean();
71+
}
72+
73+
public byte getByte() {
74+
return delegate.getByte();
75+
}
76+
77+
public short getShort() {
78+
return delegate.getShort();
79+
}
80+
81+
public int getInt() {
82+
return delegate.getInt();
83+
}
84+
85+
public long getLong() {
86+
return delegate.getLong();
87+
}
88+
89+
public double getDouble() {
90+
return delegate.getDouble();
91+
}
92+
93+
public BigDecimal getDecimal() {
94+
return delegate.getDecimal();
95+
}
96+
97+
public float getFloat() {
98+
return delegate.getFloat();
99+
}
100+
101+
public ByteBuffer getBinary() {
102+
return delegate.getBinary();
103+
}
104+
105+
public UUID getUUID() {
106+
return delegate.getUUID();
107+
}
108+
109+
public String getString() {
110+
return delegate.getString();
111+
}
112+
113+
public Type getType() {
114+
return Type.fromParquet(delegate.getType());
115+
}
116+
117+
public int numObjectElements() {
118+
return delegate.numObjectElements();
119+
}
120+
121+
public Variant getFieldByKey(String key) {
122+
org.apache.parquet.variant.Variant result = delegate.getFieldByKey(key);
123+
return result != null ? wrap(result) : null;
124+
}
125+
126+
public ObjectField getFieldAtIndex(int idx) {
127+
org.apache.parquet.variant.Variant.ObjectField field = delegate.getFieldAtIndex(idx);
128+
return new ObjectField(field.key, wrap(field.value));
129+
}
130+
131+
public int numArrayElements() {
132+
return delegate.numArrayElements();
133+
}
134+
135+
public Variant getElementAtIndex(int index) {
136+
org.apache.parquet.variant.Variant result = delegate.getElementAtIndex(index);
137+
return result != null ? wrap(result) : null;
138+
}
139+
140+
private static Variant wrap(org.apache.parquet.variant.Variant parquetVariant) {
141+
return new Variant(parquetVariant);
142+
}
143+
144+
@Override
145+
public boolean equals(Object o) {
146+
if (this == o) {
147+
return true;
148+
}
149+
if (o == null || getClass() != o.getClass()) {
150+
return false;
151+
}
152+
Variant variant = (Variant) o;
153+
return delegate.getMetadataBuffer().equals(variant.delegate.getMetadataBuffer())
154+
&& delegate.getValueBuffer().equals(variant.delegate.getValueBuffer());
155+
}
156+
157+
@Override
158+
public int hashCode() {
159+
return Objects.hash(delegate.getMetadataBuffer(), delegate.getValueBuffer());
160+
}
161+
162+
@Override
163+
public String toString() {
164+
return "Variant{type=" + getType() + '}';
165+
}
166+
167+
public enum Type {
168+
OBJECT,
169+
ARRAY,
170+
NULL,
171+
BOOLEAN,
172+
BYTE,
173+
SHORT,
174+
INT,
175+
LONG,
176+
STRING,
177+
DOUBLE,
178+
DECIMAL4,
179+
DECIMAL8,
180+
DECIMAL16,
181+
DATE,
182+
TIMESTAMP_TZ,
183+
TIMESTAMP_NTZ,
184+
FLOAT,
185+
BINARY,
186+
TIME,
187+
TIMESTAMP_NANOS_TZ,
188+
TIMESTAMP_NANOS_NTZ,
189+
UUID;
190+
191+
static Type fromParquet(org.apache.parquet.variant.Variant.Type parquetType) {
192+
return Type.valueOf(parquetType.name());
193+
}
194+
}
195+
196+
public static final class ObjectField {
197+
public final String key;
198+
public final Variant value;
199+
200+
public ObjectField(String key, Variant value) {
201+
this.key = key;
202+
this.value = value;
203+
}
204+
}
205+
}

0 commit comments

Comments
 (0)