diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java index 05f0e2e3a4..8eb5f7f17b 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java @@ -700,6 +700,11 @@ public SELF withByteStreamSplitEncoding(boolean enableByteStreamSplit) { return self(); } + public SELF withByteStreamSplitEncoding(String columnPath, boolean enableByteStreamSplit) { + encodingPropsBuilder.withByteStreamSplitEncoding(columnPath, enableByteStreamSplit); + return self(); + } + /** * Enable or disable dictionary encoding of the specified column for the constructed writer. * diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java index 9a69ee478a..03cd98ac6e 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java @@ -626,6 +626,44 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception { } } + @Test + public void testByteStreamSplitEncodingControl() throws Exception { + MessageType schema = Types.buildMessage() + .required(FLOAT) + .named("float_field") + .required(INT32) + .named("int32_field") + .named("test_schema"); + + File file = temp.newFile(); + temp.delete(); + + Path path = new Path(file.getAbsolutePath()); + SimpleGroupFactory factory = new SimpleGroupFactory(schema); + try (ParquetWriter writer = ExampleParquetWriter.builder(path) + .withType(schema) + .withByteStreamSplitEncoding(true) + .withByteStreamSplitEncoding("int32_field", true) + .build()) { + writer.write(factory.newGroup().append("float_field", 0.3f).append("int32_field", 42)); + } + + try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()))) { + for (BlockMetaData block : reader.getFooter().getBlocks()) { + for (ColumnChunkMetaData column : block.getColumns()) { + assertTrue(column.getEncodings().contains(Encoding.BYTE_STREAM_SPLIT)); + } + } + } + + try (ParquetReader reader = + ParquetReader.builder(new GroupReadSupport(), path).build()) { + Group group = reader.read(); + assertEquals(0.3f, group.getFloat("float_field", 0), 0.0); + assertEquals(42, group.getInteger("int32_field", 0)); + } + } + @Test public void testV2WriteAllNullValues() throws Exception { testV2WriteAllNullValues(null, null);