diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java index f327851e5db..0098d8da5e0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java @@ -546,7 +546,7 @@ public String toString() { @Override public int compareTo(Fixed that) { - return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); + return BinaryData.compareBytes(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java index 9ce68e9f4cf..0df469d4957 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java @@ -156,8 +156,7 @@ private static int compare(Decoders d, Schema schema) throws IOException { } case FIXED: { int size = schema.getFixedSize(); - int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() + size, d.d2.getBuf(), d.d2.getPos(), - d.d2.getPos() + size); + int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(), d.d2.getPos(), size); d.d1.skipFixed(size); d.d2.skipFixed(size); return c; @@ -166,8 +165,7 @@ private static int compare(Decoders d, Schema schema) throws IOException { case BYTES: { int l1 = d1.readInt(); int l2 = d2.readInt(); - int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() + l1, d.d2.getBuf(), d.d2.getPos(), - d.d2.getPos() + l2); + int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(), d.d2.getPos(), l2); d.d1.skipFixed(l1); d.d2.skipFixed(l2); return c; @@ -181,10 +179,10 @@ private static int compare(Decoders d, Schema schema) throws IOException { /** * Lexicographically compare bytes. If equal, return zero. If greater-than, - * return a positive value, if less than return a negative value. + * return a positive value, if less than, return a negative value. */ public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - return Arrays.compare(b1, s1, s1 + l1, b2, s2, s2 + l2); + return Arrays.compareUnsigned(b1, s1, s1 + l1, b2, s2, s2 + l2); } private static class HashData { diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index 916c15efbf9..1810b27e06a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -30,6 +30,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.BinaryData; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.FixedSize; @@ -995,7 +996,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) { break; byte[] b1 = (byte[]) o1; byte[] b2 = (byte[]) o2; - return Arrays.compare(b1, 0, b1.length, b2, 0, b2.length); + return BinaryData.compareBytes(b1, 0, b1.length, b2, 0, b2.length); } return super.compare(o1, o2, s, equals); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java index 98a553eecbf..9984acb5689 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java @@ -24,6 +24,7 @@ import java.util.Arrays; import org.apache.avro.Schema; import org.apache.avro.generic.GenericFixed; +import org.apache.avro.io.BinaryData; /** Base class for generated fixed-sized data classes. */ public abstract class SpecificFixed implements GenericFixed, Comparable, Externalizable { @@ -69,7 +70,7 @@ public String toString() { @Override public int compareTo(SpecificFixed that) { - return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); + return BinaryData.compareBytes(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index 5e5af1a1114..a5c4ece29d7 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -25,6 +25,7 @@ import java.util.Arrays; import org.apache.avro.SystemLimitException; +import org.apache.avro.io.BinaryData; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -195,7 +196,7 @@ public int hashCode() { @Override public int compareTo(Utf8 that) { - return Arrays.compare(this.bytes, 0, this.length, that.bytes, 0, that.length); + return BinaryData.compareBytes(this.bytes, 0, this.length, that.bytes, 0, that.length); } // CharSequence implementation diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java index 167cd724630..595f8c31280 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java @@ -18,10 +18,11 @@ package org.apache.avro.io; +import org.junit.jupiter.api.Test; + import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestBinaryData { @@ -60,4 +61,34 @@ void testIntLongVleEquality() { BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0); assertArrayEquals(intResult, longResult); } + + @Test + void testCompareBytesUnsigned() { + // Test case: byte value 0xFF (-1 as signed, 255 as unsigned) + // should be greater than 0x7F (127) + byte[] b1 = new byte[] { (byte) 0xFF }; + byte[] b2 = new byte[] { (byte) 0x7F }; + int result = BinaryData.compareBytes(b1, 0, 1, b2, 0, 1); + assertTrue(result > 0, "0xFF (255 unsigned) should be greater than 0x7F (127)"); + result = BinaryData.compareBytes(b2, 0, 1, b1, 0, 1); + assertTrue(result < 0, "0x7F (127) should be less than 0xFF (255 unsigned)"); + result = BinaryData.compareBytes(b1, 0, 1, b1, 0, 1); + assertEquals(0, result, "Equal byte arrays should return 0"); + + // Test with multiple bytes: {0x00, 0xFF} vs {0x00, 0x7F} + byte[] b3 = new byte[] { 0x00, (byte) 0xFF }; + byte[] b4 = new byte[] { 0x00, (byte) 0x7F }; + byte[] b5 = new byte[] { (byte) 0xFF, 0x00 }; + byte[] b6 = new byte[] { (byte) 0x7F, 0x00 }; + result = BinaryData.compareBytes(b3, 0, 2, b4, 0, 2); + assertTrue(result > 1, "{0x00, 0xFF} should be greater than {0x00, 0x7F}"); + result = BinaryData.compareBytes(b5, 0, 2, b6, 0, 2); + assertTrue(result > 1, "{0xFF, 0x00} should be greater than {0x7F, 0x00}"); + + // Test with negative byte values: -1 (0xFF) should be greater than -128 (0x80) + byte[] b7 = new byte[] { (byte) -1 }; + byte[] b8 = new byte[] { (byte) -128 }; + result = BinaryData.compareBytes(b7, 0, 1, b8, 0, 1); + assertTrue(result > 0, "-1 (0xFF=255 unsigned) should be greater than -128 (0x80=128 unsigned)"); + } }