@@ -192,7 +192,12 @@ private RowRanges applyPredicate(
192192 return allRows ();
193193 }
194194
195- return RowRanges .create (rowCount , func .apply (ci ), oi );
195+ if (!isValidIndexSize (ci , oi , columnPath )) {
196+ return allRows ();
197+ }
198+
199+ PrimitiveIterator .OfInt pageIndexes = func .apply (ci );
200+ return RowRanges .create (rowCount , pageIndexes , oi );
196201 }
197202
198203 @ Override
@@ -220,4 +225,32 @@ public RowRanges visit(Not not) {
220225 throw new IllegalArgumentException (
221226 "Predicates containing a NOT must be run through LogicalInverseRewriter. " + not );
222227 }
228+
229+ /**
230+ * Validates that column index and offset index metadata are consistent and can be used safely.
231+ *
232+ * @param columnIndex the column index to validate
233+ * @param offsetIndex the offset index to validate
234+ * @param columnPath the column path for error reporting
235+ * @return true if metadata is valid and safe to use, false if corrupt and should be ignored
236+ */
237+ private static boolean isValidIndexSize (ColumnIndex columnIndex , OffsetIndex offsetIndex , ColumnPath columnPath ) {
238+
239+ int columnIndexSize = columnIndex .getMinValues ().size ();
240+ int offsetIndexSize = offsetIndex .getPageCount ();
241+
242+ if (columnIndexSize != offsetIndexSize ) {
243+ LOGGER .warn (
244+ "Column index and offset index size mismatch for column {}: "
245+ + "column index has {} entries but offset index has {} pages. "
246+ + "This indicates corrupted metadata from the writer. "
247+ + "Ignoring column index for filtering to avoid errors." ,
248+ columnPath ,
249+ columnIndexSize ,
250+ offsetIndexSize );
251+ return false ;
252+ }
253+
254+ return true ;
255+ }
223256}
0 commit comments