@@ -64,6 +64,7 @@ public class ParquetProperties {
6464 public static final boolean DEFAULT_ADAPTIVE_BLOOM_FILTER_ENABLED = false ;
6565 public static final int DEFAULT_BLOOM_FILTER_CANDIDATES_NUMBER = 5 ;
6666 public static final boolean DEFAULT_STATISTICS_ENABLED = true ;
67+ public static final boolean DEFAULT_SIZE_STATISTICS_ENABLED = true ;
6768
6869 public static final boolean DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED = true ;
6970
@@ -112,6 +113,7 @@ public static WriterVersion fromString(String name) {
112113 private final int columnIndexTruncateLength ;
113114 private final int statisticsTruncateLength ;
114115 private final boolean statisticsEnabled ;
116+ private final boolean sizeStatisticsEnabled ;
115117
116118 // The expected NDV (number of distinct values) for each columns
117119 private final ColumnProperty <Long > bloomFilterNDVs ;
@@ -125,6 +127,7 @@ public static WriterVersion fromString(String name) {
125127 private final ColumnProperty <ByteStreamSplitMode > byteStreamSplitEnabled ;
126128 private final Map <String , String > extraMetaData ;
127129 private final ColumnProperty <Boolean > statistics ;
130+ private final ColumnProperty <Boolean > sizeStatistics ;
128131
129132 private ParquetProperties (Builder builder ) {
130133 this .pageSizeThreshold = builder .pageSize ;
@@ -143,6 +146,7 @@ private ParquetProperties(Builder builder) {
143146 this .columnIndexTruncateLength = builder .columnIndexTruncateLength ;
144147 this .statisticsTruncateLength = builder .statisticsTruncateLength ;
145148 this .statisticsEnabled = builder .statisticsEnabled ;
149+ this .sizeStatisticsEnabled = builder .sizeStatisticsEnabled ;
146150 this .bloomFilterNDVs = builder .bloomFilterNDVs .build ();
147151 this .bloomFilterFPPs = builder .bloomFilterFPPs .build ();
148152 this .bloomFilterEnabled = builder .bloomFilterEnabled .build ();
@@ -154,6 +158,7 @@ private ParquetProperties(Builder builder) {
154158 this .byteStreamSplitEnabled = builder .byteStreamSplitEnabled .build ();
155159 this .extraMetaData = builder .extraMetaData ;
156160 this .statistics = builder .statistics .build ();
161+ this .sizeStatistics = builder .sizeStatistics .build ();
157162 }
158163
159164 public static Builder builder () {
@@ -345,6 +350,14 @@ public boolean getStatisticsEnabled(ColumnDescriptor column) {
345350 return statisticsEnabled ;
346351 }
347352
353+ public boolean getSizeStatisticsEnabled (ColumnDescriptor column ) {
354+ Boolean columnSetting = sizeStatistics .getValue (column );
355+ if (columnSetting != null ) {
356+ return columnSetting ;
357+ }
358+ return sizeStatisticsEnabled ;
359+ }
360+
348361 @ Override
349362 public String toString () {
350363 return "Parquet page size to " + getPageSizeThreshold () + '\n'
@@ -361,7 +374,9 @@ public String toString() {
361374 + "Bloom filter expected number of distinct values are: " + bloomFilterNDVs + '\n'
362375 + "Bloom filter false positive probabilities are: " + bloomFilterFPPs + '\n'
363376 + "Page row count limit to " + getPageRowCountLimit () + '\n'
364- + "Writing page checksums is: " + (getPageWriteChecksumEnabled () ? "on" : "off" );
377+ + "Writing page checksums is: " + (getPageWriteChecksumEnabled () ? "on" : "off" ) + '\n'
378+ + "Statistics enabled: " + statisticsEnabled + '\n'
379+ + "Size statistics enabled: " + sizeStatisticsEnabled ;
365380 }
366381
367382 public static class Builder {
@@ -378,6 +393,7 @@ public static class Builder {
378393 private int columnIndexTruncateLength = DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH ;
379394 private int statisticsTruncateLength = DEFAULT_STATISTICS_TRUNCATE_LENGTH ;
380395 private boolean statisticsEnabled = DEFAULT_STATISTICS_ENABLED ;
396+ private boolean sizeStatisticsEnabled = DEFAULT_SIZE_STATISTICS_ENABLED ;
381397 private final ColumnProperty .Builder <Long > bloomFilterNDVs ;
382398 private final ColumnProperty .Builder <Double > bloomFilterFPPs ;
383399 private int maxBloomFilterBytes = DEFAULT_MAX_BLOOM_FILTER_BYTES ;
@@ -389,6 +405,7 @@ public static class Builder {
389405 private final ColumnProperty .Builder <ByteStreamSplitMode > byteStreamSplitEnabled ;
390406 private Map <String , String > extraMetaData = new HashMap <>();
391407 private final ColumnProperty .Builder <Boolean > statistics ;
408+ private final ColumnProperty .Builder <Boolean > sizeStatistics ;
392409
393410 private Builder () {
394411 enableDict = ColumnProperty .<Boolean >builder ().withDefaultValue (DEFAULT_IS_DICTIONARY_ENABLED );
@@ -405,6 +422,7 @@ private Builder() {
405422 numBloomFilterCandidates =
406423 ColumnProperty .<Integer >builder ().withDefaultValue (DEFAULT_BLOOM_FILTER_CANDIDATES_NUMBER );
407424 statistics = ColumnProperty .<Boolean >builder ().withDefaultValue (DEFAULT_STATISTICS_ENABLED );
425+ sizeStatistics = ColumnProperty .<Boolean >builder ().withDefaultValue (DEFAULT_SIZE_STATISTICS_ENABLED );
408426 }
409427
410428 private Builder (ParquetProperties toCopy ) {
@@ -428,6 +446,7 @@ private Builder(ParquetProperties toCopy) {
428446 this .byteStreamSplitEnabled = ColumnProperty .builder (toCopy .byteStreamSplitEnabled );
429447 this .extraMetaData = toCopy .extraMetaData ;
430448 this .statistics = ColumnProperty .builder (toCopy .statistics );
449+ this .sizeStatistics = ColumnProperty .builder (toCopy .sizeStatistics );
431450 }
432451
433452 /**
@@ -693,6 +712,30 @@ public Builder withStatisticsEnabled(boolean enabled) {
693712 return this ;
694713 }
695714
715+ /**
716+ * Sets whether size statistics are enabled globally. When disabled, size statistics will not be collected
717+ * for any column unless explicitly enabled for specific columns.
718+ *
719+ * @param enabled whether to collect size statistics globally
720+ * @return this builder for method chaining
721+ */
722+ public Builder withSizeStatisticsEnabled (boolean enabled ) {
723+ this .sizeStatistics .withDefaultValue (enabled );
724+ return this ;
725+ }
726+
727+ /**
728+ * Sets the size statistics enabled/disabled for the specified column. All column size statistics are enabled by default.
729+ *
730+ * @param columnPath the path of the column (dot-string)
731+ * @param enabled whether to collect size statistics for the column
732+ * @return this builder for method chaining
733+ */
734+ public Builder withSizeStatisticsEnabled (String columnPath , boolean enabled ) {
735+ this .sizeStatistics .withValue (columnPath , enabled );
736+ return this ;
737+ }
738+
696739 public ParquetProperties build () {
697740 ParquetProperties properties = new ParquetProperties (this );
698741 // we pass a constructed but uninitialized factory to ParquetProperties above as currently
0 commit comments