apache
diff --git a/‎pyiceberg/expressions/bloom_filter.py‎
Lines changed: 0 additions & 216 deletions b/‎pyiceberg/expressions/bloom_filter.py‎
Lines changed: 0 additions & 216 deletions
diff --git a/‎pyiceberg/manifest.py‎
Lines changed: 0 additions & 38 deletions b/‎pyiceberg/manifest.py‎
Lines changed: 0 additions & 38 deletions
diff --git a/‎pyiceberg/table/__init__.py‎
Lines changed: 0 additions & 28 deletions b/‎pyiceberg/table/__init__.py‎
Lines changed: 0 additions & 28 deletions
@@ -288,13 +288,6 @@ def __repr__(self) -> str:
             required=False,
             doc="ID representing sort order for this file",
         ),
-        NestedField(
-            field_id=146,
-            name="bloom_filter_bytes",
-            field_type=MapType(key_id=147, key_type=IntegerType(), value_id=148, value_type=BinaryType()),
-            required=False,
-            doc="Map of column id to bloom filter",
-        ),
     ),
     3: StructType(
         NestedField(
@@ -418,13 +411,6 @@ def __repr__(self) -> str:
             required=False,
             doc="The length of a referenced content stored in the file; required if content_offset is present",
         ),
-        NestedField(
-            field_id=146,
-            name="bloom_filter_bytes",
-            field_type=MapType(key_id=147, key_type=IntegerType(), value_id=148, value_type=BinaryType()),
-            required=False,
-            doc="Map of column id to bloom filter",
-        ),
     ),
 }
 
@@ -528,17 +514,6 @@ def equality_ids(self) -> List[int] | None:
     def sort_order_id(self) -> int | None:
         return self._data[15]
 
-    @property
-    def bloom_filter_bytes(self) -> Dict[int, bytes] | None:
-        """Get bloom filter bytes for all columns.
-
-        Returns a dict mapping column ID to bloom filter bytes.
-        """
-        # Get bloom_filter_bytes which is the last field in the struct
-        if len(self._data) > 16:
-            return self._data[16]
-        return None
-
     # Spec ID should not be stored in the file
     _spec_id: int
 
@@ -561,19 +536,6 @@ def __hash__(self) -> int:
         """Return the hash of the file path."""
         return hash(self.file_path)
 
-    def get_bloom_filter(self, column_id: int) -> bytes | None:
-        """Get bloom filter bytes for a specific column.
-
-        Args:
-            column_id: The column ID to get the bloom filter for.
-
-        Returns:
-            Bloom filter bytes for the column, or None if not available.
-        """
-        if self.bloom_filter_bytes and column_id in self.bloom_filter_bytes:
-            return self.bloom_filter_bytes[column_id]
-        return None
-
     def __eq__(self, other: Any) -> bool:
         """Compare the datafile with another object.
 
 
@@ -1922,30 +1922,6 @@ def _build_residual_evaluator(self, spec_id: int) -> Callable[[DataFile], Residu
             )
         )
 
-    def _should_keep_file_with_bloom_filter(self, data_file: DataFile) -> bool:
-        """Check if a data file should be kept based on bloom filter evaluation.
-
-        Args:
-            data_file: The data file to evaluate.
-
-        Returns:
-            True if the file should be kept, False if it can be pruned.
-        """
-        if data_file.bloom_filter_bytes is None:
-            # No bloom filter for this file
-            return True
-
-        try:
-            from pyiceberg.expressions.bloom_filter import BloomFilterEvaluator
-            from pyiceberg.expressions.visitors import visit
-
-            # Use the bloom filter evaluator to check if the file might contain matching rows
-            evaluator = BloomFilterEvaluator(data_file, self.table_metadata.schema())
-            return visit(self.row_filter, evaluator)
-        except Exception:
-            # If there's any error evaluating bloom filters, be conservative and keep the file
-            return True
-
     @staticmethod
     def _check_sequence_number(min_sequence_number: int, manifest: ManifestFile) -> bool:
         """Ensure that no manifests are loaded that contain deletes that are older than the data.
@@ -2021,10 +1997,6 @@ def plan_files(self) -> Iterable[FileScanTask]:
         for manifest_entry in chain.from_iterable(self.scan_plan_helper()):
             data_file = manifest_entry.data_file
             if data_file.content == DataFileContent.DATA:
-                # Apply bloom filter evaluation to prune files that definitely don't match the filter
-                if not self._should_keep_file_with_bloom_filter(data_file):
-                    # Skip this file as it cannot contain matching rows
-                    continue
                 data_entries.append(manifest_entry)
             elif data_file.content == DataFileContent.POSITION_DELETES:
                 positional_delete_entries.add(manifest_entry)