Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,11 @@ public SELF withByteStreamSplitEncoding(boolean enableByteStreamSplit) {
return self();
}

public SELF withByteStreamSplitEncoding(String columnPath, boolean enableByteStreamSplit) {
encodingPropsBuilder.withByteStreamSplitEncoding(columnPath, enableByteStreamSplit);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding this! Could you please check if there is any test case covering it? If not, it would be good to add one to make sure it takes effect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the corresponding test. Thanks for your reminder

return self();
}

/**
* Enable or disable dictionary encoding of the specified column for the constructed writer.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,44 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception {
}
}

@Test
public void testByteStreamSplitEncodingControl() throws Exception {
MessageType schema = Types.buildMessage()
.required(FLOAT)
.named("float_field")
.required(INT32)
.named("int32_field")
.named("test_schema");

File file = temp.newFile();
temp.delete();

Path path = new Path(file.getAbsolutePath());
SimpleGroupFactory factory = new SimpleGroupFactory(schema);
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(path)
.withType(schema)
.withByteStreamSplitEncoding(true)
.withByteStreamSplitEncoding("int32_field", true)
.build()) {
writer.write(factory.newGroup().append("float_field", 0.3f).append("int32_field", 42));
}

try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()))) {
for (BlockMetaData block : reader.getFooter().getBlocks()) {
for (ColumnChunkMetaData column : block.getColumns()) {
assertTrue(column.getEncodings().contains(Encoding.BYTE_STREAM_SPLIT));
}
}
}

try (ParquetReader<Group> reader =
ParquetReader.builder(new GroupReadSupport(), path).build()) {
Group group = reader.read();
assertEquals(0.3f, group.getFloat("float_field", 0), 0.0);
assertEquals(42, group.getInteger("int32_field", 0));
}
}

@Test
public void testV2WriteAllNullValues() throws Exception {
testV2WriteAllNullValues(null, null);
Expand Down