diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp
index 64a0616674f040..e71b4a451562a7 100644
--- a/be/src/exec/scan/file_scanner.cpp
+++ b/be/src/exec/scan/file_scanner.cpp
@@ -58,6 +58,7 @@
 #include "exprs/vexpr_fwd.h"
 #include "exprs/vslot_ref.h"
 #include "format/arrow/arrow_stream_reader.h"
+#include "format/count_reader.h"
 #include "format/csv/csv_reader.h"
 #include "format/json/new_json_reader.h"
 #include "format/native/native_reader.h"
@@ -125,6 +126,25 @@ FileScanner::FileScanner(RuntimeState* state, FileScanLocalState* local_state, i
     _input_tuple_desc = state->desc_tbl().get_tuple_descriptor(_params->src_tuple_id);
     _real_tuple_desc = _input_tuple_desc == nullptr ? _output_tuple_desc : _input_tuple_desc;
     _is_load = (_input_tuple_desc != nullptr);
+    _configure_file_scan_handlers();
+}
+
+void FileScanner::_configure_file_scan_handlers() {
+    if (_is_load) {
+        _init_src_block_handler = &FileScanner::_init_src_block_for_load;
+        _process_src_block_after_read_handler =
+                &FileScanner::_process_src_block_after_read_for_load;
+        _should_push_down_predicates_handler = &FileScanner::_should_push_down_predicates_for_load;
+        _should_enable_condition_cache_handler =
+                &FileScanner::_should_enable_condition_cache_for_load;
+    } else {
+        _init_src_block_handler = &FileScanner::_init_src_block_for_query;
+        _process_src_block_after_read_handler =
+                &FileScanner::_process_src_block_after_read_for_query;
+        _should_push_down_predicates_handler = &FileScanner::_should_push_down_predicates_for_query;
+        _should_enable_condition_cache_handler =
+                &FileScanner::_should_enable_condition_cache_for_query;
+    }
 }
 
 Status FileScanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) {
@@ -457,12 +477,6 @@ Status FileScanner::_get_block_wrapped(RuntimeState* state, Block* block, bool*
         // For query job, simply set _src_block_ptr to block.
         size_t read_rows = 0;
         RETURN_IF_ERROR(_init_src_block(block));
-        if (_need_iceberg_rowid_column && _current_range.__isset.table_format_params &&
-            _current_range.table_format_params.table_format_type == "iceberg") {
-            if (auto* iceberg_reader = dynamic_cast<IcebergTableReader*>(_cur_reader.get())) {
-                iceberg_reader->set_row_id_column_position(_iceberg_rowid_column_pos);
-            }
-        }
         {
             SCOPED_TIMER(_get_block_timer);
 
@@ -480,23 +494,7 @@ Status FileScanner::_get_block_wrapped(RuntimeState* state, Block* block, bool*
             // If the push_down_agg_type is COUNT, no need to do the rest,
             // because we only save a number in block.
             if (_get_push_down_agg_type() != TPushAggOp::type::COUNT) {
-                // Convert the src block columns type to string in-place.
-                RETURN_IF_ERROR(_cast_to_input_block(block));
-                // FileReader can fill partition and missing columns itself
-                if (!_cur_reader->fill_all_columns()) {
-                    // Fill rows in src block with partition columns from path. (e.g. Hive partition columns)
-                    RETURN_IF_ERROR(_fill_columns_from_path(read_rows));
-                    // Fill columns not exist in file with null or default value
-                    RETURN_IF_ERROR(_fill_missing_columns(read_rows));
-                }
-                // Apply _pre_conjunct_ctxs to filter src block.
-                RETURN_IF_ERROR(_pre_filter_src_block());
-
-                // Convert src block to output block (dest block), string to dest data type and apply filters.
-                RETURN_IF_ERROR(_convert_to_output_block(block));
-                // Truncate char columns or varchar columns if size is smaller than file columns
-                // or not found in the file column schema.
-                RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
+                RETURN_IF_ERROR(_process_src_block_after_read(block));
             }
         }
         break;
@@ -517,16 +515,15 @@ Status FileScanner::_get_block_wrapped(RuntimeState* state, Block* block, bool*
  * This is a temporary method, and will be replaced by tvf.
  */
 Status FileScanner::_check_output_block_types() {
-    if (_is_load) {
-        TFileFormatType::type format_type = _params->format_type;
-        if (format_type == TFileFormatType::FORMAT_PARQUET ||
-            format_type == TFileFormatType::FORMAT_ORC) {
-            for (auto slot : _output_tuple_desc->slots()) {
-                if (is_complex_type(slot->type()->get_primitive_type())) {
-                    return Status::InternalError(
-                            "Parquet/orc doesn't support complex types in broker/stream load, "
-                            "please use tvf(table value function) to insert complex types.");
-                }
+    // Only called from _init_src_block_for_load, so _is_load is always true.
+    TFileFormatType::type format_type = _params->format_type;
+    if (format_type == TFileFormatType::FORMAT_PARQUET ||
+        format_type == TFileFormatType::FORMAT_ORC) {
+        for (auto slot : _output_tuple_desc->slots()) {
+            if (is_complex_type(slot->type()->get_primitive_type())) {
+                return Status::InternalError(
+                        "Parquet/orc doesn't support complex types in broker/stream load, "
+                        "please use tvf(table value function) to insert complex types.");
             }
         }
     }
@@ -534,29 +531,22 @@ Status FileScanner::_check_output_block_types() {
 }
 
 Status FileScanner::_init_src_block(Block* block) {
-    if (!_is_load) {
-        _src_block_ptr = block;
-
-        bool update_name_to_idx = _src_block_name_to_idx.empty();
-        _iceberg_rowid_column_pos = -1;
-        if (_need_iceberg_rowid_column && _current_range.__isset.table_format_params &&
-            _current_range.table_format_params.table_format_type == "iceberg") {
-            int row_id_idx = block->get_position_by_name(BeConsts::ICEBERG_ROWID_COL);
-            if (row_id_idx >= 0) {
-                _iceberg_rowid_column_pos = row_id_idx;
-                if (!update_name_to_idx &&
-                    !_src_block_name_to_idx.contains(BeConsts::ICEBERG_ROWID_COL)) {
-                    update_name_to_idx = true;
-                }
-            }
-        }
+    DCHECK(_init_src_block_handler != nullptr);
+    return (this->*_init_src_block_handler)(block);
+}
 
-        // Build name to index map only once on first call
-        if (update_name_to_idx) {
-            _src_block_name_to_idx = block->get_name_to_pos_map();
-        }
-        return Status::OK();
+Status FileScanner::_init_src_block_for_query(Block* block) {
+    _src_block_ptr = block;
+
+    // Build name to index map only once on first call.
+    if (_src_block_name_to_idx.empty()) {
+        _src_block_name_to_idx = block->get_name_to_pos_map();
     }
+    return Status::OK();
+}
+
+Status FileScanner::_init_src_block_for_load(Block* block) {
+    static_cast<void>(block);
     RETURN_IF_ERROR(_check_output_block_types());
 
     // if (_src_block_init) {
@@ -605,9 +595,7 @@ Status FileScanner::_init_src_block(Block* block) {
 }
 
 Status FileScanner::_cast_to_input_block(Block* block) {
-    if (!_is_load) {
-        return Status::OK();
-    }
+    // Only called from _process_src_block_after_read_for_load, so _is_load is always true.
     SCOPED_TIMER(_cast_to_input_block_timer);
     // cast primitive type(PT0) to primitive type(PT1)
     uint32_t idx = 0;
@@ -640,84 +628,8 @@ Status FileScanner::_cast_to_input_block(Block* block) {
     return Status::OK();
 }
 
-Status FileScanner::_fill_columns_from_path(size_t rows) {
-    if (!_fill_partition_from_path) {
-        return Status::OK();
-    }
-    DataTypeSerDe::FormatOptions _text_formatOptions;
-    for (auto& kv : _partition_col_descs) {
-        auto doris_column =
-                _src_block_ptr->get_by_position(_src_block_name_to_idx[kv.first]).column;
-        // _src_block_ptr points to a mutable block created by this class itself, so const_cast can be used here.
-        IColumn* col_ptr = const_cast<IColumn*>(doris_column.get());
-        auto& [value, slot_desc] = kv.second;
-        auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
-        Slice slice(value.data(), value.size());
-        uint64_t num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows,
-                                                            &num_deserialized,
-                                                            _text_formatOptions) != Status::OK()) {
-            return Status::InternalError("Failed to fill partition column: {}={}",
-                                         slot_desc->col_name(), value);
-        }
-        if (num_deserialized != rows) {
-            return Status::InternalError(
-                    "Failed to fill partition column: {}={} ."
-                    "Number of rows expected to be written : {}, number of rows actually written : "
-                    "{}",
-                    slot_desc->col_name(), value, num_deserialized, rows);
-        }
-    }
-    return Status::OK();
-}
-
-Status FileScanner::_fill_missing_columns(size_t rows) {
-    if (_missing_cols.empty()) {
-        return Status::OK();
-    }
-
-    SCOPED_TIMER(_fill_missing_columns_timer);
-    for (auto& kv : _missing_col_descs) {
-        if (kv.second == nullptr) {
-            // no default column, fill with null
-            auto mutable_column = _src_block_ptr->get_by_position(_src_block_name_to_idx[kv.first])
-                                          .column->assume_mutable();
-            auto* nullable_column = static_cast<ColumnNullable*>(mutable_column.get());
-            nullable_column->insert_many_defaults(rows);
-        } else {
-            // fill with default value
-            auto& ctx = kv.second;
-            ColumnPtr result_column_ptr;
-            // PT1 => dest primitive type
-            RETURN_IF_ERROR(ctx->execute(_src_block_ptr, result_column_ptr));
-            if (result_column_ptr->use_count() == 1) {
-                // call resize because the first column of _src_block_ptr may not be filled by reader,
-                // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
-                // has only one row.
-                auto mutable_column = result_column_ptr->assume_mutable();
-                mutable_column->resize(rows);
-                // result_column_ptr maybe a ColumnConst, convert it to a normal column
-                result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
-                auto origin_column_type =
-                        _src_block_ptr->get_by_position(_src_block_name_to_idx[kv.first]).type;
-                bool is_nullable = origin_column_type->is_nullable();
-                if (!_src_block_name_to_idx.contains(kv.first)) {
-                    return Status::InternalError("Column {} not found in src block {}", kv.first,
-                                                 _src_block_ptr->dump_structure());
-                }
-                _src_block_ptr->replace_by_position(
-                        _src_block_name_to_idx[kv.first],
-                        is_nullable ? make_nullable(result_column_ptr) : result_column_ptr);
-            }
-        }
-    }
-    return Status::OK();
-}
-
 Status FileScanner::_pre_filter_src_block() {
-    if (!_is_load) {
-        return Status::OK();
-    }
+    // Only called from _process_src_block_after_read_for_load, so _is_load is always true.
     if (!_pre_conjunct_ctxs.empty()) {
         SCOPED_TIMER(_pre_filter_timer);
         auto origin_column_num = _src_block_ptr->columns();
@@ -730,9 +642,7 @@ Status FileScanner::_pre_filter_src_block() {
 }
 
 Status FileScanner::_convert_to_output_block(Block* block) {
-    if (!_is_load) {
-        return Status::OK();
-    }
+    // Only called from _process_src_block_after_read_for_load, so _is_load is always true.
     SCOPED_TIMER(_convert_to_output_block_timer);
     // The block is passed from scanner context's free blocks,
     // which is initialized by output columns
@@ -852,6 +762,118 @@ Status FileScanner::_convert_to_output_block(Block* block) {
     return Status::OK();
 }
 
+Status FileScanner::_process_src_block_after_read(Block* block) {
+    DCHECK(_process_src_block_after_read_handler != nullptr);
+    return (this->*_process_src_block_after_read_handler)(block);
+}
+
+Status FileScanner::_process_src_block_after_read_for_query(Block* block) {
+    // Truncate CHAR/VARCHAR columns when target size is smaller than file schema.
+    // This is needed for external table queries with truncate_char_or_varchar_columns=true.
+    RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
+    return Status::OK();
+}
+
+Status FileScanner::_fill_columns_from_path(size_t rows) {
+    if (_partition_col_descs.empty()) {
+        return Status::OK();
+    }
+    DataTypeSerDe::FormatOptions text_format_options;
+    for (auto& kv : _partition_col_descs) {
+        auto doris_column =
+                _src_block_ptr->get_by_position(_src_block_name_to_idx[kv.first]).column;
+        IColumn* col_ptr = const_cast<IColumn*>(doris_column.get());
+        // Skip if the reader already filled this column (e.g. ORC/Parquet readers
+        // fill partition columns internally via on_fill_partition_columns).
+        if (col_ptr->size() >= rows) {
+            continue;
+        }
+        auto& [value, slot_desc] = kv.second;
+        auto text_serde = slot_desc->get_data_type_ptr()->get_serde();
+        Slice slice(value.data(), value.size());
+        uint64_t num_deserialized = 0;
+        if (_partition_value_is_null.contains(kv.first) && _partition_value_is_null[kv.first]) {
+            col_ptr->insert_many_defaults(rows);
+        } else if (text_serde->deserialize_column_from_fixed_json(
+                           *col_ptr, slice, rows, &num_deserialized, text_format_options) !=
+                   Status::OK()) {
+            return Status::InternalError("Failed to fill partition column: {}={}",
+                                         slot_desc->col_name(), value);
+        } else if (num_deserialized != rows) {
+            return Status::InternalError(
+                    "Failed to fill partition column: {}={}. "
+                    "Number of rows expected: {}, actual: {}",
+                    slot_desc->col_name(), value, rows, num_deserialized);
+        }
+    }
+    return Status::OK();
+}
+
+Status FileScanner::_fill_missing_columns(size_t rows) {
+    // For columns in the table that are not from the file and not partition columns,
+    // fill with default values or NULL.
+    for (const auto& col_desc : _column_descs) {
+        if (col_desc.category != ColumnCategory::REGULAR &&
+            col_desc.category != ColumnCategory::GENERATED) {
+            continue;
+        }
+        if (_is_file_slot.contains(col_desc.slot_desc->id())) {
+            continue;
+        }
+        auto it = _src_block_name_to_idx.find(col_desc.name);
+        if (it == _src_block_name_to_idx.end()) {
+            continue;
+        }
+        auto doris_column = _src_block_ptr->get_by_position(it->second).column;
+        IColumn* col_ptr = const_cast<IColumn*>(doris_column.get());
+        if (col_ptr->size() >= rows) {
+            continue;
+        }
+        size_t need_rows = rows - col_ptr->size();
+        if (col_desc.default_expr != nullptr) {
+            Block default_block;
+            default_block.insert(
+                    ColumnWithTypeAndName(col_desc.slot_desc->get_data_type_ptr()->create_column(),
+                                          col_desc.slot_desc->get_data_type_ptr(), col_desc.name));
+            int result_column_id = 0;
+            RETURN_IF_ERROR(col_desc.default_expr->execute(&default_block, &result_column_id));
+            auto& default_col = default_block.get_by_position(result_column_id).column;
+            for (size_t i = 0; i < need_rows; ++i) {
+                col_ptr->insert_from(*default_col, 0);
+            }
+        } else {
+            col_ptr->insert_many_defaults(need_rows);
+        }
+    }
+    return Status::OK();
+}
+
+Status FileScanner::_process_src_block_after_read_for_load(Block* block) {
+    // Convert the src block columns type in-place.
+    RETURN_IF_ERROR(_cast_to_input_block(block));
+    // Compute row count from file columns (partition columns may be empty at this point).
+    size_t rows = 0;
+    for (size_t i = 0; i < _src_block_ptr->columns(); ++i) {
+        size_t s = _src_block_ptr->get_by_position(i).column->size();
+        if (s > rows) {
+            rows = s;
+        }
+    }
+    // Fill partition columns from path for readers that do not handle them internally
+    // (e.g., CSV, JSON readers in broker/stream load).
+    RETURN_IF_ERROR(_fill_columns_from_path(rows));
+    // Fill missing columns (non-file, non-partition) with default values or NULL.
+    RETURN_IF_ERROR(_fill_missing_columns(rows));
+    // Apply _pre_conjunct_ctxs to filter src block.
+    RETURN_IF_ERROR(_pre_filter_src_block());
+
+    // Convert src block to output block (dest block), then apply filters.
+    RETURN_IF_ERROR(_convert_to_output_block(block));
+    // Truncate CHAR/VARCHAR columns when target size is smaller than file schema.
+    RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
+    return Status::OK();
+}
+
 Status FileScanner::_truncate_char_or_varchar_columns(Block* block) {
     // Truncate char columns or varchar columns if size is smaller than file columns
     // or not found in the file column schema.
@@ -927,6 +949,18 @@ Status FileScanner::_create_row_id_column_iterator() {
     return Status::OK();
 }
 
+void FileScanner::_fill_base_init_context(ReaderInitContext* ctx) {
+    ctx->column_descs = &_column_descs;
+    ctx->col_name_to_block_idx = &_src_block_name_to_idx;
+    ctx->state = _state;
+    ctx->tuple_descriptor = _real_tuple_desc;
+    ctx->row_descriptor = _default_val_row_desc.get();
+    ctx->params = _params;
+    ctx->range = &_current_range;
+    ctx->table_info_node = TableSchemaChangeHelper::ConstNode::get_instance();
+    ctx->push_down_agg_type = _get_push_down_agg_type();
+}
+
 Status FileScanner::_get_next_reader() {
     while (true) {
         if (_cur_reader) {
@@ -949,7 +983,7 @@ Status FileScanner::_get_next_reader() {
         const TFileRangeDesc& range = _current_range;
         _current_range_path = range.path;
 
-        if (!_partition_slot_descs.empty()) {
+        if (!_partition_slot_index_map.empty()) {
             // we need get partition columns first for runtime filter partition pruning
             RETURN_IF_ERROR(_generate_partition_columns());
 
@@ -991,11 +1025,13 @@ Status FileScanner::_get_next_reader() {
             }
         }
 
-        // JNI reader can only push down column value range
-        bool push_down_predicates = !_is_load && format_type != TFileFormatType::FORMAT_JNI;
+        bool push_down_predicates = _should_push_down_predicates(format_type);
         bool need_to_get_parsed_schema = false;
         switch (format_type) {
         case TFileFormatType::FORMAT_JNI: {
+            ReaderInitContext jni_ctx;
+            _fill_base_init_context(&jni_ctx);
+
             if (range.__isset.table_format_params &&
                 range.table_format_params.table_format_type == "max_compute") {
                 const auto* mc_desc = static_cast<const MaxComputeTableDescriptor*>(
@@ -1006,7 +1042,7 @@ Status FileScanner::_get_next_reader() {
                 std::unique_ptr<MaxComputeJniReader> mc_reader = MaxComputeJniReader::create_unique(
                         mc_desc, range.table_format_params.max_compute_params, _file_slot_descs,
                         range, _state, _profile);
-                init_status = mc_reader->init_reader();
+                init_status = static_cast<GenericReader*>(mc_reader.get())->init_reader(&jni_ctx);
                 _cur_reader = std::move(mc_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "paimon") {
@@ -1022,39 +1058,47 @@ Status FileScanner::_get_next_reader() {
                             cpp_reader->set_predicate(std::move(predicate));
                         }
                     }
-                    init_status = cpp_reader->init_reader();
+                    init_status =
+                            static_cast<GenericReader*>(cpp_reader.get())->init_reader(&jni_ctx);
                     _cur_reader = std::move(cpp_reader);
                 } else {
-                    _cur_reader = PaimonJniReader::create_unique(_file_slot_descs, _state, _profile,
-                                                                 range, _params);
-                    init_status = ((PaimonJniReader*)(_cur_reader.get()))->init_reader();
+                    auto paimon_reader = PaimonJniReader::create_unique(_file_slot_descs, _state,
+                                                                        _profile, range, _params);
+                    init_status =
+                            static_cast<GenericReader*>(paimon_reader.get())->init_reader(&jni_ctx);
+                    _cur_reader = std::move(paimon_reader);
                 }
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "hudi") {
-                _cur_reader = HudiJniReader::create_unique(*_params,
-                                                           range.table_format_params.hudi_params,
-                                                           _file_slot_descs, _state, _profile);
-                init_status = ((HudiJniReader*)_cur_reader.get())->init_reader();
-
+                auto hudi_reader = HudiJniReader::create_unique(
+                        *_params, range.table_format_params.hudi_params, _file_slot_descs, _state,
+                        _profile);
+                init_status = static_cast<GenericReader*>(hudi_reader.get())->init_reader(&jni_ctx);
+                _cur_reader = std::move(hudi_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "trino_connector") {
-                _cur_reader = TrinoConnectorJniReader::create_unique(_file_slot_descs, _state,
-                                                                     _profile, range);
-                init_status = ((TrinoConnectorJniReader*)(_cur_reader.get()))->init_reader();
+                auto trino_reader = TrinoConnectorJniReader::create_unique(_file_slot_descs, _state,
+                                                                           _profile, range);
+                init_status =
+                        static_cast<GenericReader*>(trino_reader.get())->init_reader(&jni_ctx);
+                _cur_reader = std::move(trino_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "jdbc") {
                 // Extract jdbc params from table_format_params
                 std::map<std::string, std::string> jdbc_params(
                         range.table_format_params.jdbc_params.begin(),
                         range.table_format_params.jdbc_params.end());
-                _cur_reader = JdbcJniReader::create_unique(_file_slot_descs, _state, _profile,
-                                                           jdbc_params);
-                init_status = ((JdbcJniReader*)(_cur_reader.get()))->init_reader();
+                auto jdbc_reader = JdbcJniReader::create_unique(_file_slot_descs, _state, _profile,
+                                                                jdbc_params);
+                init_status = static_cast<GenericReader*>(jdbc_reader.get())->init_reader(&jni_ctx);
+                _cur_reader = std::move(jdbc_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "iceberg") {
-                _cur_reader = IcebergSysTableJniReader::create_unique(_file_slot_descs, _state,
-                                                                      _profile, range, _params);
-                init_status = ((IcebergSysTableJniReader*)(_cur_reader.get()))->init_reader();
+                auto iceberg_sys_reader = IcebergSysTableJniReader::create_unique(
+                        _file_slot_descs, _state, _profile, range, _params);
+                init_status = static_cast<GenericReader*>(iceberg_sys_reader.get())
+                                      ->init_reader(&jni_ctx);
+                _cur_reader = std::move(iceberg_sys_reader);
             }
             // Set col_name_to_block_idx for JNI readers to avoid repeated map creation
             if (_cur_reader) {
@@ -1068,23 +1112,10 @@ Status FileScanner::_get_next_reader() {
             auto file_meta_cache_ptr = _should_enable_file_meta_cache()
                                                ? ExecEnv::GetInstance()->file_meta_cache()
                                                : nullptr;
-            std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
-                    _profile, *_params, range, _state->query_options().batch_size,
-                    &_state->timezone_obj(), _io_ctx.get(), _state, file_meta_cache_ptr,
-                    _state->query_options().enable_parquet_lazy_mat);
-
-            if (_row_id_column_iterator_pair.second != -1) {
-                RETURN_IF_ERROR(_create_row_id_column_iterator());
-                parquet_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
-            }
-
-            // ATTN: the push down agg type may be set back to NONE,
-            // see IcebergTableReader::init_row_filters for example.
-            parquet_reader->set_push_down_agg_type(_get_push_down_agg_type());
             if (push_down_predicates) {
                 RETURN_IF_ERROR(_process_late_arrival_conjuncts());
             }
-            RETURN_IF_ERROR(_init_parquet_reader(std::move(parquet_reader), file_meta_cache_ptr));
+            RETURN_IF_ERROR(_init_parquet_reader(file_meta_cache_ptr));
 
             need_to_get_parsed_schema = true;
             break;
@@ -1093,20 +1124,10 @@ Status FileScanner::_get_next_reader() {
             auto file_meta_cache_ptr = _should_enable_file_meta_cache()
                                                ? ExecEnv::GetInstance()->file_meta_cache()
                                                : nullptr;
-            std::unique_ptr<OrcReader> orc_reader = OrcReader::create_unique(
-                    _profile, _state, *_params, range, _state->query_options().batch_size,
-                    _state->timezone(), _io_ctx.get(), file_meta_cache_ptr,
-                    _state->query_options().enable_orc_lazy_mat);
-            if (_row_id_column_iterator_pair.second != -1) {
-                RETURN_IF_ERROR(_create_row_id_column_iterator());
-                orc_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
-            }
-
-            orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
             if (push_down_predicates) {
                 RETURN_IF_ERROR(_process_late_arrival_conjuncts());
             }
-            RETURN_IF_ERROR(_init_orc_reader(std::move(orc_reader), file_meta_cache_ptr));
+            RETURN_IF_ERROR(_init_orc_reader(file_meta_cache_ptr));
 
             need_to_get_parsed_schema = true;
             break;
@@ -1122,15 +1143,20 @@ Status FileScanner::_get_next_reader() {
         case TFileFormatType::FORMAT_PROTO: {
             auto reader = CsvReader::create_unique(_state, _profile, &_counter, *_params, range,
                                                    _file_slot_descs, _io_ctx.get());
-
-            init_status = reader->init_reader(_is_load);
+            CsvInitContext csv_ctx;
+            _fill_base_init_context(&csv_ctx);
+            csv_ctx.is_load = _is_load;
+            init_status = static_cast<GenericReader*>(reader.get())->init_reader(&csv_ctx);
             _cur_reader = std::move(reader);
             break;
         }
         case TFileFormatType::FORMAT_TEXT: {
             auto reader = TextReader::create_unique(_state, _profile, &_counter, *_params, range,
                                                     _file_slot_descs, _io_ctx.get());
-            init_status = reader->init_reader(_is_load);
+            CsvInitContext text_ctx;
+            _fill_base_init_context(&text_ctx);
+            text_ctx.is_load = _is_load;
+            init_status = static_cast<GenericReader*>(reader.get())->init_reader(&text_ctx);
             _cur_reader = std::move(reader);
             break;
         }
@@ -1138,39 +1164,53 @@ Status FileScanner::_get_next_reader() {
             _cur_reader =
                     NewJsonReader::create_unique(_state, _profile, &_counter, *_params, range,
                                                  _file_slot_descs, &_scanner_eof, _io_ctx.get());
-            init_status = ((NewJsonReader*)(_cur_reader.get()))
-                                  ->init_reader(_col_default_value_ctx, _is_load);
+            JsonInitContext json_ctx;
+            _fill_base_init_context(&json_ctx);
+            json_ctx.col_default_value_ctx = &_col_default_value_ctx;
+            json_ctx.is_load = _is_load;
+            init_status = _cur_reader->init_reader(&json_ctx);
             break;
         }
 
         case TFileFormatType::FORMAT_WAL: {
             _cur_reader = WalReader::create_unique(_state);
-            init_status = ((WalReader*)(_cur_reader.get()))->init_reader(_output_tuple_desc);
+            WalInitContext wal_ctx;
+            _fill_base_init_context(&wal_ctx);
+            wal_ctx.output_tuple_descriptor = _output_tuple_desc;
+            init_status = _cur_reader->init_reader(&wal_ctx);
             break;
         }
         case TFileFormatType::FORMAT_NATIVE: {
             auto reader =
                     NativeReader::create_unique(_profile, *_params, range, _io_ctx.get(), _state);
-            init_status = reader->init_reader();
+            ReaderInitContext native_ctx;
+            _fill_base_init_context(&native_ctx);
+            init_status = static_cast<GenericReader*>(reader.get())->init_reader(&native_ctx);
             _cur_reader = std::move(reader);
             need_to_get_parsed_schema = false;
             break;
         }
         case TFileFormatType::FORMAT_ARROW: {
+            ReaderInitContext arrow_ctx;
+            _fill_base_init_context(&arrow_ctx);
+
             if (range.__isset.table_format_params &&
                 range.table_format_params.table_format_type == "remote_doris") {
-                _cur_reader =
+                auto doris_reader =
                         RemoteDorisReader::create_unique(_file_slot_descs, _state, _profile, range);
-                init_status = ((RemoteDorisReader*)(_cur_reader.get()))->init_reader();
-                if (_cur_reader) {
-                    static_cast<RemoteDorisReader*>(_cur_reader.get())
-                            ->set_col_name_to_block_idx(&_src_block_name_to_idx);
+                init_status =
+                        static_cast<GenericReader*>(doris_reader.get())->init_reader(&arrow_ctx);
+                if (doris_reader) {
+                    doris_reader->set_col_name_to_block_idx(&_src_block_name_to_idx);
                 }
+                _cur_reader = std::move(doris_reader);
             } else {
-                _cur_reader =
+                auto arrow_reader =
                         ArrowStreamReader::create_unique(_state, _profile, &_counter, *_params,
                                                          range, _file_slot_descs, _io_ctx.get());
-                init_status = ((ArrowStreamReader*)(_cur_reader.get()))->init_reader();
+                init_status =
+                        static_cast<GenericReader*>(arrow_reader.get())->init_reader(&arrow_ctx);
+                _cur_reader = std::move(arrow_reader);
             }
             break;
         }
@@ -1204,16 +1244,13 @@ Status FileScanner::_get_next_reader() {
             return Status::InternalError("failed to init reader, err: {}", init_status.to_string());
         }
 
-        _cur_reader->set_push_down_agg_type(_get_push_down_agg_type());
-        if (_get_push_down_agg_type() == TPushAggOp::type::COUNT &&
-            range.__isset.table_format_params &&
-            range.table_format_params.table_level_row_count >= 0) {
-            // This is a table level count push down operation, no need to call
-            // _set_fill_or_truncate_columns.
-            // in _set_fill_or_truncate_columns, we will use [range.start_offset, end offset]
-            // to filter the row group. But if this is count push down, the offset is undefined,
-            // causing incorrect row group filter and may return empty result.
-        } else {
+        // For table-level COUNT pushdown, offsets are undefined so we must skip
+        // _set_fill_or_truncate_columns (it uses [start_offset, end_offset] to
+        // filter row groups, which would produce incorrect empty results).
+        bool is_table_level_count = _get_push_down_agg_type() == TPushAggOp::type::COUNT &&
+                                    range.__isset.table_format_params &&
+                                    range.table_format_params.table_level_row_count >= 0;
+        if (!is_table_level_count) {
             Status status = _set_fill_or_truncate_columns(need_to_get_parsed_schema);
             if (status.is<END_OF_FILE>()) { // all parquet row groups are filtered
                 continue;
@@ -1222,14 +1259,32 @@ Status FileScanner::_get_next_reader() {
                                              status.to_string());
             }
         }
+
+        // Unified COUNT(*) pushdown: replace the real reader with CountReader
+        // decorator if the reader accepts COUNT and can provide a total row count.
+        if (_cur_reader->get_push_down_agg_type() == TPushAggOp::type::COUNT) {
+            int64_t total_rows = -1;
+            if (is_table_level_count) {
+                // FE-provided count (may account for table-format deletions)
+                total_rows = range.table_format_params.table_level_row_count;
+            } else if (_cur_reader->supports_count_pushdown()) {
+                // File metadata count (ORC footer / Parquet row groups)
+                total_rows = _cur_reader->get_total_rows();
+            }
+            if (total_rows >= 0) {
+                auto batch_size = _state->query_options().batch_size;
+                _cur_reader = std::make_unique<CountReader>(total_rows, batch_size,
+                                                            std::move(_cur_reader));
+            }
+        }
         _cur_reader_eof = false;
         break;
     }
     return Status::OK();
 }
 
-Status FileScanner::_init_parquet_reader(std::unique_ptr<ParquetReader>&& parquet_reader,
-                                         FileMetaCache* file_meta_cache_ptr) {
+Status FileScanner::_init_parquet_reader(FileMetaCache* file_meta_cache_ptr,
+                                         std::unique_ptr<ParquetReader> parquet_reader) {
     const TFileRangeDesc& range = _current_range;
     Status init_status = Status::OK();
 
@@ -1237,227 +1292,237 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr<ParquetReader>&& parque
             _local_state
                     ? _local_state->cast<FileScanLocalState>()._slot_id_to_predicates
                     : phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> {};
+
+    // Build unified ParquetInitContext (shared by all Parquet reader variants)
+    ParquetInitContext pctx;
+    _fill_base_init_context(&pctx);
+    pctx.conjuncts = &_push_down_conjuncts;
+    pctx.slot_id_to_predicates = &slot_id_to_predicates;
+    pctx.colname_to_slot_id = _col_name_to_slot_id;
+    pctx.not_single_slot_filter_conjuncts = &_not_single_slot_filter_conjuncts;
+    pctx.slot_id_to_filter_conjuncts = &_slot_id_to_filter_conjuncts;
+
     if (range.__isset.table_format_params &&
         range.table_format_params.table_format_type == "iceberg") {
+        // IcebergParquetReader IS-A ParquetReader (CRTP mixin), no wrapping needed
         std::unique_ptr<IcebergParquetReader> iceberg_reader = IcebergParquetReader::create_unique(
-                std::move(parquet_reader), _profile, _state, *_params, range, _kv_cache,
-                _io_ctx.get(), file_meta_cache_ptr);
-        if (_need_iceberg_rowid_column) {
-            iceberg_reader->set_need_row_id_column(true);
+                _kv_cache, _profile, *_params, range, _state->query_options().batch_size,
+                &_state->timezone_obj(), _io_ctx.get(), _state, file_meta_cache_ptr);
+
+        // Transfer properties
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            iceberg_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
         }
         if (_row_lineage_columns.row_id_column_idx != -1 ||
             _row_lineage_columns.last_updated_sequence_number_column_idx != -1) {
-            std::shared_ptr<RowLineageColumns> row_lineage_columns;
-            row_lineage_columns = std::make_shared<RowLineageColumns>();
+            auto row_lineage_columns = std::make_shared<RowLineageColumns>();
             row_lineage_columns->row_id_column_idx = _row_lineage_columns.row_id_column_idx;
             row_lineage_columns->last_updated_sequence_number_column_idx =
                     _row_lineage_columns.last_updated_sequence_number_column_idx;
+            const auto& iceberg_params = range.table_format_params.iceberg_params;
+            row_lineage_columns->first_row_id =
+                    iceberg_params.__isset.first_row_id ? iceberg_params.first_row_id : -1;
+            row_lineage_columns->last_updated_sequence_number =
+                    iceberg_params.__isset.last_updated_sequence_number
+                            ? iceberg_params.last_updated_sequence_number
+                            : -1;
             iceberg_reader->set_row_lineage_columns(std::move(row_lineage_columns));
         }
         iceberg_reader->set_push_down_agg_type(_get_push_down_agg_type());
 
-        init_status = iceberg_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
+        init_status = static_cast<GenericReader*>(iceberg_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(iceberg_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "paimon") {
-        std::unique_ptr<PaimonParquetReader> paimon_reader = PaimonParquetReader::create_unique(
-                std::move(parquet_reader), _profile, _state, *_params, range, _kv_cache,
-                _io_ctx.get(), file_meta_cache_ptr);
-        init_status = paimon_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
-        RETURN_IF_ERROR(paimon_reader->init_row_filters());
+        // PaimonParquetReader IS-A ParquetReader, no wrapping needed
+        auto paimon_reader = PaimonParquetReader::create_unique(
+                _profile, *_params, range, _state->query_options().batch_size,
+                &_state->timezone_obj(), _kv_cache, _io_ctx.get(), _state, file_meta_cache_ptr);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            paimon_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        paimon_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(paimon_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(paimon_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "hudi") {
-        std::unique_ptr<HudiParquetReader> hudi_reader = HudiParquetReader::create_unique(
-                std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get(),
-                file_meta_cache_ptr);
-        init_status = hudi_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
+        // HudiParquetReader IS-A ParquetReader, no wrapping needed
+        auto hudi_reader = HudiParquetReader::create_unique(
+                _profile, *_params, range, _state->query_options().batch_size,
+                &_state->timezone_obj(), _io_ctx.get(), _state, file_meta_cache_ptr);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            hudi_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        hudi_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(hudi_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(hudi_reader);
     } else if (range.table_format_params.table_format_type == "hive") {
-        auto hive_reader = HiveParquetReader::create_unique(std::move(parquet_reader), _profile,
-                                                            _state, *_params, range, _io_ctx.get(),
-                                                            &_is_file_slot, file_meta_cache_ptr);
-        init_status = hive_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
+        auto hive_reader = HiveParquetReader::create_unique(
+                _profile, *_params, range, _state->query_options().batch_size,
+                &_state->timezone_obj(), _io_ctx.get(), _state, &_is_file_slot, file_meta_cache_ptr,
+                _state->query_options().enable_parquet_lazy_mat);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            hive_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        hive_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(hive_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(hive_reader);
     } else if (range.table_format_params.table_format_type == "tvf") {
-        const FieldDescriptor* parquet_meta = nullptr;
-        RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&parquet_meta));
-        DCHECK(parquet_meta != nullptr);
-
-        // TVF will first `get_parsed_schema` to obtain file information from BE, and FE will convert
-        // the column names to lowercase (because the query process is case-insensitive),
-        // so the lowercase file column names are used here to match the read columns.
-        std::shared_ptr<TableSchemaChangeHelper::Node> tvf_info_node = nullptr;
-        RETURN_IF_ERROR(TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
-                _real_tuple_desc, *parquet_meta, tvf_info_node));
-        init_status = parquet_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts, tvf_info_node);
+        if (!parquet_reader) {
+            parquet_reader = ParquetReader::create_unique(
+                    _profile, *_params, range, _state->query_options().batch_size,
+                    &_state->timezone_obj(), _io_ctx.get(), _state, file_meta_cache_ptr,
+                    _state->query_options().enable_parquet_lazy_mat);
+        }
+        parquet_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            parquet_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        init_status = static_cast<GenericReader*>(parquet_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(parquet_reader);
     } else if (_is_load) {
-        const FieldDescriptor* parquet_meta = nullptr;
-        RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&parquet_meta));
-        DCHECK(parquet_meta != nullptr);
-
-        // Load is case-insensitive, so you to match the columns in the file.
-        std::map<std::string, std::string> file_lower_name_to_native;
-        for (const auto& parquet_field : parquet_meta->get_fields_schema()) {
-            file_lower_name_to_native.emplace(doris::to_lower(parquet_field.name),
-                                              parquet_field.name);
-        }
-        auto load_info_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        for (const auto slot : _real_tuple_desc->slots()) {
-            if (file_lower_name_to_native.contains(slot->col_name())) {
-                load_info_node->add_children(slot->col_name(),
-                                             file_lower_name_to_native[slot->col_name()],
-                                             TableSchemaChangeHelper::ConstNode::get_instance());
-                // For Load, `file_scanner` will create block columns using the file type,
-                // there is no schema change when reading inside the struct,
-                // so use `TableSchemaChangeHelper::ConstNode`.
-            } else {
-                load_info_node->add_not_exist_children(slot->col_name());
-            }
+        if (!parquet_reader) {
+            parquet_reader = ParquetReader::create_unique(
+                    _profile, *_params, range, _state->query_options().batch_size,
+                    &_state->timezone_obj(), _io_ctx.get(), _state, file_meta_cache_ptr,
+                    _state->query_options().enable_parquet_lazy_mat);
         }
-
-        init_status = parquet_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts,
-                slot_id_to_predicates, _real_tuple_desc, _default_val_row_desc.get(),
-                _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts, load_info_node);
+        parquet_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(parquet_reader.get())->init_reader(&pctx);
         _cur_reader = std::move(parquet_reader);
     }
 
     return init_status;
 }
 
-Status FileScanner::_init_orc_reader(std::unique_ptr<OrcReader>&& orc_reader,
-                                     FileMetaCache* file_meta_cache_ptr) {
+Status FileScanner::_init_orc_reader(FileMetaCache* file_meta_cache_ptr,
+                                     std::unique_ptr<OrcReader> orc_reader) {
     const TFileRangeDesc& range = _current_range;
     Status init_status = Status::OK();
 
+    // Build unified OrcInitContext (shared by all ORC reader variants)
+    OrcInitContext octx;
+    _fill_base_init_context(&octx);
+    octx.conjuncts = &_push_down_conjuncts;
+    octx.not_single_slot_filter_conjuncts = &_not_single_slot_filter_conjuncts;
+    octx.slot_id_to_filter_conjuncts = &_slot_id_to_filter_conjuncts;
+
     if (range.__isset.table_format_params &&
         range.table_format_params.table_format_type == "transactional_hive") {
-        std::unique_ptr<TransactionalHiveReader> tran_orc_reader =
-                TransactionalHiveReader::create_unique(std::move(orc_reader), _profile, _state,
-                                                       *_params, range, _io_ctx.get(),
-                                                       file_meta_cache_ptr);
-        init_status = tran_orc_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc,
-                _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
-        RETURN_IF_ERROR(tran_orc_reader->init_row_filters());
+        // TransactionalHiveReader IS-A OrcReader, no wrapping needed
+        auto tran_orc_reader = TransactionalHiveReader::create_unique(
+                _profile, _state, *_params, range, _state->query_options().batch_size,
+                _state->timezone(), _io_ctx.get(), file_meta_cache_ptr);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            tran_orc_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        tran_orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(tran_orc_reader.get())->init_reader(&octx);
+
         _cur_reader = std::move(tran_orc_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "iceberg") {
+        // IcebergOrcReader IS-A OrcReader (CRTP mixin), no wrapping needed
         std::unique_ptr<IcebergOrcReader> iceberg_reader = IcebergOrcReader::create_unique(
-                std::move(orc_reader), _profile, _state, *_params, range, _kv_cache, _io_ctx.get(),
-                file_meta_cache_ptr);
-        if (_need_iceberg_rowid_column) {
-            iceberg_reader->set_need_row_id_column(true);
+                _kv_cache, _profile, _state, *_params, range, _state->query_options().batch_size,
+                _state->timezone(), _io_ctx.get(), file_meta_cache_ptr);
+
+        // Transfer properties
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            iceberg_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
         }
         if (_row_lineage_columns.row_id_column_idx != -1 ||
             _row_lineage_columns.last_updated_sequence_number_column_idx != -1) {
-            std::shared_ptr<RowLineageColumns> row_lineage_columns;
-            row_lineage_columns = std::make_shared<RowLineageColumns>();
+            auto row_lineage_columns = std::make_shared<RowLineageColumns>();
             row_lineage_columns->row_id_column_idx = _row_lineage_columns.row_id_column_idx;
             row_lineage_columns->last_updated_sequence_number_column_idx =
                     _row_lineage_columns.last_updated_sequence_number_column_idx;
+            const auto& iceberg_params = range.table_format_params.iceberg_params;
+            row_lineage_columns->first_row_id =
+                    iceberg_params.__isset.first_row_id ? iceberg_params.first_row_id : -1;
+            row_lineage_columns->last_updated_sequence_number =
+                    iceberg_params.__isset.last_updated_sequence_number
+                            ? iceberg_params.last_updated_sequence_number
+                            : -1;
             iceberg_reader->set_row_lineage_columns(std::move(row_lineage_columns));
         }
-        init_status = iceberg_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc,
-                _default_val_row_desc.get(), _col_name_to_slot_id,
-                &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts);
+        iceberg_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(iceberg_reader.get())->init_reader(&octx);
+
         _cur_reader = std::move(iceberg_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "paimon") {
-        std::unique_ptr<PaimonOrcReader> paimon_reader = PaimonOrcReader::create_unique(
-                std::move(orc_reader), _profile, _state, *_params, range, _kv_cache, _io_ctx.get(),
-                file_meta_cache_ptr);
-
-        init_status = paimon_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc,
-                _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
-        RETURN_IF_ERROR(paimon_reader->init_row_filters());
+        // PaimonOrcReader IS-A OrcReader, no wrapping needed
+        auto paimon_reader = PaimonOrcReader::create_unique(
+                _profile, _state, *_params, range, _state->query_options().batch_size,
+                _state->timezone(), _kv_cache, _io_ctx.get(), file_meta_cache_ptr);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            paimon_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        paimon_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(paimon_reader.get())->init_reader(&octx);
+
         _cur_reader = std::move(paimon_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "hudi") {
-        std::unique_ptr<HudiOrcReader> hudi_reader =
-                HudiOrcReader::create_unique(std::move(orc_reader), _profile, _state, *_params,
-                                             range, _io_ctx.get(), file_meta_cache_ptr);
-
-        init_status = hudi_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc,
-                _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
+        // HudiOrcReader IS-A OrcReader, no wrapping needed
+        auto hudi_reader = HudiOrcReader::create_unique(
+                _profile, _state, *_params, range, _state->query_options().batch_size,
+                _state->timezone(), _io_ctx.get(), file_meta_cache_ptr);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            hudi_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        hudi_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(hudi_reader.get())->init_reader(&octx);
+
         _cur_reader = std::move(hudi_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "hive") {
-        std::unique_ptr<HiveOrcReader> hive_reader = HiveOrcReader::create_unique(
-                std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get(),
-                &_is_file_slot, file_meta_cache_ptr);
-
-        init_status = hive_reader->init_reader(
-                _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc,
-                _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts);
+        auto hive_reader = HiveOrcReader::create_unique(
+                _profile, _state, *_params, range, _state->query_options().batch_size,
+                _state->timezone(), _io_ctx.get(), &_is_file_slot, file_meta_cache_ptr,
+                _state->query_options().enable_orc_lazy_mat);
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            hive_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        hive_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(hive_reader.get())->init_reader(&octx);
+
         _cur_reader = std::move(hive_reader);
     } else if (range.__isset.table_format_params &&
                range.table_format_params.table_format_type == "tvf") {
-        const orc::Type* orc_type_ptr = nullptr;
-        RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> tvf_info_node = nullptr;
-        RETURN_IF_ERROR(TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
-                _real_tuple_desc, orc_type_ptr, tvf_info_node));
-        init_status = orc_reader->init_reader(
-                &_file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, false,
-                _real_tuple_desc, _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts, tvf_info_node);
+        if (!orc_reader) {
+            orc_reader = OrcReader::create_unique(
+                    _profile, _state, *_params, range, _state->query_options().batch_size,
+                    _state->timezone(), _io_ctx.get(), file_meta_cache_ptr,
+                    _state->query_options().enable_orc_lazy_mat);
+        }
+        orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        if (_row_id_column_iterator_pair.second != -1) {
+            RETURN_IF_ERROR(_create_row_id_column_iterator());
+            orc_reader->set_row_id_column_iterator(_row_id_column_iterator_pair);
+        }
+        init_status = static_cast<GenericReader*>(orc_reader.get())->init_reader(&octx);
         _cur_reader = std::move(orc_reader);
     } else if (_is_load) {
-        const orc::Type* orc_type_ptr = nullptr;
-        RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
-
-        std::map<std::string, std::string> file_lower_name_to_native;
-        for (uint64_t idx = 0; idx < orc_type_ptr->getSubtypeCount(); idx++) {
-            file_lower_name_to_native.emplace(doris::to_lower(orc_type_ptr->getFieldName(idx)),
-                                              orc_type_ptr->getFieldName(idx));
-        }
-
-        auto load_info_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        for (const auto slot : _real_tuple_desc->slots()) {
-            if (file_lower_name_to_native.contains(slot->col_name())) {
-                load_info_node->add_children(slot->col_name(),
-                                             file_lower_name_to_native[slot->col_name()],
-                                             TableSchemaChangeHelper::ConstNode::get_instance());
-            } else {
-                load_info_node->add_not_exist_children(slot->col_name());
-            }
+        if (!orc_reader) {
+            orc_reader = OrcReader::create_unique(
+                    _profile, _state, *_params, range, _state->query_options().batch_size,
+                    _state->timezone(), _io_ctx.get(), file_meta_cache_ptr,
+                    _state->query_options().enable_orc_lazy_mat);
         }
-        init_status = orc_reader->init_reader(
-                &_file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, false,
-                _real_tuple_desc, _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts,
-                &_slot_id_to_filter_conjuncts, load_info_node);
+        orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
+        init_status = static_cast<GenericReader*>(orc_reader.get())->init_reader(&octx);
         _cur_reader = std::move(orc_reader);
     }
 
@@ -1465,15 +1530,11 @@ Status FileScanner::_init_orc_reader(std::unique_ptr<OrcReader>&& orc_reader,
 }
 
 Status FileScanner::_set_fill_or_truncate_columns(bool need_to_get_parsed_schema) {
-    _missing_cols.clear();
     _slot_lower_name_to_col_type.clear();
+
     std::unordered_map<std::string, DataTypePtr> name_to_col_type;
-    RETURN_IF_ERROR(_cur_reader->get_columns(&name_to_col_type, &_missing_cols));
-    if (_need_iceberg_rowid_column && _current_range.__isset.table_format_params &&
-        _current_range.table_format_params.table_format_type == "iceberg") {
-        _missing_cols.erase(BeConsts::ICEBERG_ROWID_COL);
-        _missing_cols.erase(to_lower(BeConsts::ICEBERG_ROWID_COL));
-    }
+    RETURN_IF_ERROR(_cur_reader->get_columns(&name_to_col_type));
+
     for (const auto& [col_name, col_type] : name_to_col_type) {
         auto col_name_lower = to_lower(col_name);
         if (_partition_col_descs.contains(col_name_lower)) {
@@ -1492,33 +1553,6 @@ Status FileScanner::_set_fill_or_truncate_columns(bool need_to_get_parsed_schema
         _slot_lower_name_to_col_type.emplace(col_name_lower, col_type);
     }
 
-    if (!_fill_partition_from_path && config::enable_iceberg_partition_column_fallback) {
-        // check if the cols of _partition_col_descs are in _missing_cols
-        // if so, set _fill_partition_from_path to true and remove the col from _missing_cols
-        for (const auto& [col_name, col_type] : _partition_col_descs) {
-            if (_missing_cols.contains(col_name)) {
-                _fill_partition_from_path = true;
-                _missing_cols.erase(col_name);
-            }
-        }
-    }
-
-    RETURN_IF_ERROR(_generate_missing_columns());
-    if (_fill_partition_from_path) {
-        RETURN_IF_ERROR(_cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs));
-    } else {
-        // If the partition columns are not from path, we only fill the missing columns.
-        RETURN_IF_ERROR(_cur_reader->set_fill_columns({}, _missing_col_descs));
-    }
-    if (VLOG_NOTICE_IS_ON && !_missing_cols.empty() && _is_load) {
-        fmt::memory_buffer col_buf;
-        for (auto& col : _missing_cols) {
-            fmt::format_to(col_buf, " {}", col);
-        }
-        VLOG_NOTICE << fmt::format("Unknown columns:{} in file {}", fmt::to_string(col_buf),
-                                   _current_range.path);
-    }
-
     RETURN_IF_ERROR(_generate_truncate_columns(need_to_get_parsed_schema));
     return Status::OK();
 }
@@ -1588,19 +1622,21 @@ Status FileScanner::read_lines_from_range(const TFileRangeDesc& range,
                     std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
                             _profile, *_params, range, 1, &_state->timezone_obj(), _io_ctx.get(),
                             _state, file_meta_cache_ptr, false);
-
-                    RETURN_IF_ERROR(parquet_reader->read_by_rows(row_ids));
                     RETURN_IF_ERROR(
-                            _init_parquet_reader(std::move(parquet_reader), file_meta_cache_ptr));
+                            _init_parquet_reader(file_meta_cache_ptr, std::move(parquet_reader)));
+                    // _init_parquet_reader may create a new table-format specific reader
+                    // (e.g., HiveParquetReader) that replaces the original parquet_reader.
+                    // We need to re-apply read_by_rows to the actual _cur_reader.
+                    RETURN_IF_ERROR(_cur_reader->read_by_rows(row_ids));
                     break;
                 }
                 case TFileFormatType::FORMAT_ORC: {
                     std::unique_ptr<OrcReader> orc_reader = OrcReader::create_unique(
                             _profile, _state, *_params, range, 1, _state->timezone(), _io_ctx.get(),
                             file_meta_cache_ptr, false);
-
-                    RETURN_IF_ERROR(orc_reader->read_by_rows(row_ids));
-                    RETURN_IF_ERROR(_init_orc_reader(std::move(orc_reader), file_meta_cache_ptr));
+                    RETURN_IF_ERROR(_init_orc_reader(file_meta_cache_ptr, std::move(orc_reader)));
+                    // Same as above: re-apply read_by_rows to the actual _cur_reader.
+                    RETURN_IF_ERROR(_cur_reader->read_by_rows(row_ids));
                     break;
                 }
                 default: {
@@ -1639,41 +1675,33 @@ Status FileScanner::_generate_partition_columns() {
     _partition_col_descs.clear();
     _partition_value_is_null.clear();
     const TFileRangeDesc& range = _current_range;
-    if (range.__isset.columns_from_path && !_partition_slot_descs.empty()) {
-        for (const auto& slot_desc : _partition_slot_descs) {
-            if (slot_desc) {
-                auto it = _partition_slot_index_map.find(slot_desc->id());
-                if (it == std::end(_partition_slot_index_map)) {
-                    return Status::InternalError("Unknown source slot descriptor, slot_id={}",
-                                                 slot_desc->id());
-                }
-                const std::string& column_from_path = range.columns_from_path[it->second];
-                _partition_col_descs.emplace(slot_desc->col_name(),
-                                             std::make_tuple(column_from_path, slot_desc));
-                if (range.__isset.columns_from_path_is_null) {
-                    _partition_value_is_null.emplace(slot_desc->col_name(),
-                                                     range.columns_from_path_is_null[it->second]);
-                }
-            }
-        }
+    if (!range.__isset.columns_from_path_keys) {
+        return Status::OK();
     }
-    return Status::OK();
-}
 
-Status FileScanner::_generate_missing_columns() {
-    _missing_col_descs.clear();
-    if (!_missing_cols.empty()) {
-        for (auto* slot_desc : _real_tuple_desc->slots()) {
-            if (!_missing_cols.contains(slot_desc->col_name())) {
-                continue;
-            }
+    std::unordered_map<std::string, int> partition_name_to_key_index;
+    int index = 0;
+    for (const auto& key : range.columns_from_path_keys) {
+        partition_name_to_key_index.emplace(key, index++);
+    }
 
-            auto it = _col_default_value_ctx.find(slot_desc->col_name());
-            if (it == _col_default_value_ctx.end()) {
-                return Status::InternalError("failed to find default value expr for slot: {}",
-                                             slot_desc->col_name());
+    // Iterate _column_descs to find PARTITION_KEY columns instead of _partition_slot_descs.
+    for (const auto& col_desc : _column_descs) {
+        if (col_desc.category != ColumnCategory::PARTITION_KEY) {
+            continue;
+        }
+        auto pit = partition_name_to_key_index.find(col_desc.name);
+        if (pit != partition_name_to_key_index.end()) {
+            int values_index = pit->second;
+            if (range.__isset.columns_from_path && values_index < range.columns_from_path.size()) {
+                _partition_col_descs.emplace(
+                        col_desc.name,
+                        std::make_tuple(range.columns_from_path[values_index], col_desc.slot_desc));
+                if (range.__isset.columns_from_path_is_null) {
+                    _partition_value_is_null.emplace(col_desc.name,
+                                                     range.columns_from_path_is_null[values_index]);
+                }
             }
-            _missing_col_descs.emplace(slot_desc->col_name(), it->second);
         }
     }
     return Status::OK();
@@ -1705,7 +1733,6 @@ Status FileScanner::_init_expr_ctxes() {
     }
 
     _num_of_columns_from_file = _params->num_of_columns_from_file;
-
     for (const auto& slot_info : _params->required_slots) {
         auto slot_id = slot_info.slot_id;
         auto it = full_src_slot_map.find(slot_id);
@@ -1713,43 +1740,59 @@ Status FileScanner::_init_expr_ctxes() {
             return Status::InternalError(
                     fmt::format("Unknown source slot descriptor, slot_id={}", slot_id));
         }
+
+        ColumnDescriptor col_desc;
+        col_desc.name = it->second->col_name();
+        col_desc.slot_desc = it->second;
+
+        // Read category from Thrift if available (new FE), otherwise fall back
+        // to slot_info.is_file_slot + partition_name_to_key_index_map for broker/stream load
+        // where the FE does not set TColumnCategory.
+        if (slot_info.__isset.category) {
+            switch (slot_info.category) {
+            case TColumnCategory::REGULAR:
+                col_desc.category = ColumnCategory::REGULAR;
+                break;
+            case TColumnCategory::PARTITION_KEY:
+                col_desc.category = ColumnCategory::PARTITION_KEY;
+                break;
+            case TColumnCategory::SYNTHESIZED:
+                col_desc.category = ColumnCategory::SYNTHESIZED;
+                break;
+            case TColumnCategory::GENERATED:
+                col_desc.category = ColumnCategory::GENERATED;
+                break;
+            }
+        } else if (partition_name_to_key_index_map.contains(it->second->col_name()) &&
+                   !slot_info.is_file_slot) {
+            col_desc.category = ColumnCategory::PARTITION_KEY;
+        }
+
         if (it->second->col_name().starts_with(BeConsts::GLOBAL_ROWID_COL)) {
             _row_id_column_iterator_pair.second = _default_val_row_desc->get_column_id(slot_id);
             continue;
         }
-        if (it->second->col_name() == BeConsts::ICEBERG_ROWID_COL) {
-            _need_iceberg_rowid_column = true;
-            continue;
-        }
 
+        bool is_row_lineage_col = false;
         if (it->second->col_name() == IcebergTableReader::ROW_LINEAGE_ROW_ID) {
             _row_lineage_columns.row_id_column_idx = _default_val_row_desc->get_column_id(slot_id);
+            is_row_lineage_col = true;
         }
 
         if (it->second->col_name() == IcebergTableReader::ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) {
             _row_lineage_columns.last_updated_sequence_number_column_idx =
                     _default_val_row_desc->get_column_id(slot_id);
+            is_row_lineage_col = true;
         }
-
-        if (slot_info.is_file_slot) {
-            _is_file_slot.emplace(slot_id);
-            _file_slot_descs.emplace_back(it->second);
-            _file_col_names.push_back(it->second->col_name());
+        if (is_row_lineage_col) {
+            col_desc.category = ColumnCategory::SYNTHESIZED;
         }
 
+        // Derive is_file_slot from category
+        bool is_file_slot = (col_desc.category == ColumnCategory::REGULAR ||
+                             col_desc.category == ColumnCategory::GENERATED);
+
         if (partition_name_to_key_index_map.contains(it->second->col_name())) {
-            if (slot_info.is_file_slot) {
-                // If there is slot which is both a partition column and a file column,
-                // we should not fill the partition column from path.
-                _fill_partition_from_path = false;
-            } else if (!_fill_partition_from_path) {
-                // This should not happen
-                return Status::InternalError(
-                        "Partition column {} is not a file column, but there is already a column "
-                        "which is both a partition column and a file column.",
-                        it->second->col_name());
-            }
-            _partition_slot_descs.emplace_back(it->second);
             if (_is_load) {
                 auto iti = full_src_index_map.find(slot_id);
                 _partition_slot_index_map.emplace(slot_id, iti->second - _num_of_columns_from_file);
@@ -1758,20 +1801,66 @@ Status FileScanner::_init_expr_ctxes() {
                 _partition_slot_index_map.emplace(slot_id, kit->second);
             }
         }
+
+        if (is_file_slot) {
+            _is_file_slot.emplace(slot_id);
+            _file_slot_descs.emplace_back(it->second);
+            _file_col_names.push_back(it->second->col_name());
+        }
+
+        _column_descs.push_back(col_desc);
     }
 
     // set column name to default value expr map
-    for (auto* slot_desc : _real_tuple_desc->slots()) {
+    // new inline TFileScanSlotInfo.default_value_expr (preferred)
+    for (const auto& slot_info : _params->required_slots) {
+        auto slot_id = slot_info.slot_id;
+        auto it = full_src_slot_map.find(slot_id);
+        if (it == std::end(full_src_slot_map)) {
+            continue;
+        }
+        const std::string& col_name = it->second->col_name();
+
         VExprContextSPtr ctx;
-        auto it = _params->default_value_of_src_slot.find(slot_desc->id());
-        if (it != std::end(_params->default_value_of_src_slot)) {
-            if (!it->second.nodes.empty()) {
-                RETURN_IF_ERROR(VExpr::create_expr_tree(it->second, ctx));
-                RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc));
-                RETURN_IF_ERROR(ctx->open(_state));
-            }
+        bool has_default = false;
+
+        // Prefer inline default_value_expr from TFileScanSlotInfo (new FE)
+        if (slot_info.__isset.default_value_expr && !slot_info.default_value_expr.nodes.empty()) {
+            RETURN_IF_ERROR(VExpr::create_expr_tree(slot_info.default_value_expr, ctx));
+            RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc));
+            RETURN_IF_ERROR(ctx->open(_state));
+            has_default = true;
+        } else if (slot_info.__isset.default_value_expr) {
+            // Empty nodes means null default (same as legacy empty TExpr)
+            has_default = true;
+        }
+
+        // // Fall back to legacy default_value_of_src_slot map (old FE)
+        // if (!has_default) {
+        //     auto legacy_it = _params->default_value_of_src_slot.find(slot_id);
+        //     if (legacy_it != std::end(_params->default_value_of_src_slot)) {
+        //         if (!legacy_it->second.nodes.empty()) {
+        //             RETURN_IF_ERROR(VExpr::create_expr_tree(legacy_it->second, ctx));
+        //             RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc));
+        //             RETURN_IF_ERROR(ctx->open(_state));
+        //         }
+        //         has_default = true;
+        //     }
+        // }
+
+        if (has_default) {
             // if expr is empty, the default value will be null
-            _col_default_value_ctx.emplace(slot_desc->col_name(), ctx);
+            _col_default_value_ctx.emplace(col_name, ctx);
+        }
+    }
+
+    // Populate default_expr in each ColumnDescriptor from _col_default_value_ctx.
+    // This makes default values available to readers via column_descs, eliminating the
+    // need for the separate _generate_missing_columns roundtrip.
+    for (auto& col_desc : _column_descs) {
+        auto it = _col_default_value_ctx.find(col_desc.name);
+        if (it != _col_default_value_ctx.end()) {
+            col_desc.default_expr = it->second;
         }
     }
 
@@ -1816,10 +1905,34 @@ Status FileScanner::_init_expr_ctxes() {
 }
 
 bool FileScanner::_should_enable_condition_cache() {
-    return _condition_cache_digest != 0 && !_is_load &&
+    DCHECK(_should_enable_condition_cache_handler != nullptr);
+    return _condition_cache_digest != 0 && (this->*_should_enable_condition_cache_handler)() &&
            (!_conjuncts.empty() || !_push_down_conjuncts.empty());
 }
 
+bool FileScanner::_should_enable_condition_cache_for_load() const {
+    return false;
+}
+
+bool FileScanner::_should_enable_condition_cache_for_query() const {
+    return true;
+}
+
+bool FileScanner::_should_push_down_predicates(TFileFormatType::type format_type) const {
+    DCHECK(_should_push_down_predicates_handler != nullptr);
+    return (this->*_should_push_down_predicates_handler)(format_type);
+}
+
+bool FileScanner::_should_push_down_predicates_for_load(TFileFormatType::type format_type) const {
+    static_cast<void>(format_type);
+    return false;
+}
+
+bool FileScanner::_should_push_down_predicates_for_query(TFileFormatType::type format_type) const {
+    // JNI readers handle predicate conversion in their own paths.
+    return format_type != TFileFormatType::FORMAT_JNI;
+}
+
 void FileScanner::_init_reader_condition_cache() {
     _condition_cache = nullptr;
     _condition_cache_ctx = nullptr;
diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h
index 6c1125f0a603a5..022847e165f067 100644
--- a/be/src/exec/scan/file_scanner.h
+++ b/be/src/exec/scan/file_scanner.h
@@ -90,7 +90,9 @@ class FileScanner : public Scanner {
             : Scanner(state, profile),
               _params(params),
               _col_name_to_slot_id(colname_to_slot_id),
-              _real_tuple_desc(tuple_desc) {};
+              _real_tuple_desc(tuple_desc) {
+        _configure_file_scan_handlers();
+    };
 
     Status read_lines_from_range(const TFileRangeDesc& range, const std::list<int64_t>& row_ids,
                                  Block* result_block, const ExternalFileMappingInfo& external_info,
@@ -107,6 +109,9 @@ class FileScanner : public Scanner {
 
     Status _get_next_reader();
 
+    // Build a ReaderInitContext with shared fields from FileScanner members.
+    void _fill_base_init_context(ReaderInitContext* ctx);
+
     // TODO: cast input block columns type to string.
     Status _cast_src_block(Block* block) { return Status::OK(); }
 
@@ -128,10 +133,10 @@ class FileScanner : public Scanner {
     std::vector<SlotDescriptor*> _file_slot_descs;
     // col names from _file_slot_descs
     std::vector<std::string> _file_col_names;
+    // Unified column descriptors for init_reader (includes file, partition, missing, synthesized cols)
+    std::vector<ColumnDescriptor> _column_descs;
 
-    // Partition source slot descriptors
-    std::vector<SlotDescriptor*> _partition_slot_descs;
-    // Partition slot id to index in _partition_slot_descs
+    // Partition slot id to partition key index (for matching columns_from_path)
     std::unordered_map<SlotId, int> _partition_slot_index_map;
     // created from param.expr_of_dest_slot
     // For query, it saves default value expr of all dest columns, or nullptr for NULL.
@@ -152,8 +157,6 @@ class FileScanner : public Scanner {
     // Get from GenericReader, save the existing columns in file to their type.
     std::unordered_map<std::string, DataTypePtr> _slot_lower_name_to_col_type;
     // Get from GenericReader, save columns that required by scan but not exist in file.
-    // These columns will be filled by default value or null.
-    std::unordered_set<std::string> _missing_cols;
 
     // The col lowercase name of source file to type of source file.
     std::map<std::string, DataTypePtr> _source_file_col_name_types;
@@ -192,7 +195,6 @@ class FileScanner : public Scanner {
     std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
             _partition_col_descs;
     std::unordered_map<std::string, bool> _partition_value_is_null;
-    std::unordered_map<std::string, VExprContextSPtr> _missing_col_descs;
 
     // idx of skip_bitmap_col in _input_tuple_desc
     int32_t _skip_bitmap_col_idx {-1};
@@ -232,13 +234,17 @@ class FileScanner : public Scanner {
 
     std::pair<std::shared_ptr<RowIdColumnIteratorV2>, int> _row_id_column_iterator_pair = {nullptr,
                                                                                            -1};
-    bool _need_iceberg_rowid_column = false;
-    int _iceberg_rowid_column_pos = -1;
     // for iceberg row lineage
     RowLineageColumns _row_lineage_columns;
     int64_t _last_bytes_read_from_local = 0;
     int64_t _last_bytes_read_from_remote = 0;
 
+    Status (FileScanner::*_init_src_block_handler)(Block* block) = nullptr;
+    Status (FileScanner::*_process_src_block_after_read_handler)(Block* block) = nullptr;
+    bool (FileScanner::*_should_push_down_predicates_handler)(
+            TFileFormatType::type format_type) const = nullptr;
+    bool (FileScanner::*_should_enable_condition_cache_handler)() const = nullptr;
+
     // Condition cache for external tables
     uint64_t _condition_cache_digest = 0;
     segment_v2::ConditionCache::ExternalCacheKey _condition_cache_key;
@@ -246,18 +252,25 @@ class FileScanner : public Scanner {
     std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
     int64_t _condition_cache_hit_count = 0;
 
+    void _configure_file_scan_handlers();
+
     Status _init_expr_ctxes();
     Status _init_src_block(Block* block);
-    Status _check_output_block_types();
-    Status _cast_to_input_block(Block* block);
+    Status _init_src_block_for_load(Block* block);
+    Status _init_src_block_for_query(Block* block);
+    Status _process_src_block_after_read(Block* block);
+    Status _process_src_block_after_read_for_load(Block* block);
+    Status _process_src_block_after_read_for_query(Block* block);
     Status _fill_columns_from_path(size_t rows);
     Status _fill_missing_columns(size_t rows);
+    Status _check_output_block_types();
+    Status _cast_to_input_block(Block* block);
     Status _pre_filter_src_block();
     Status _convert_to_output_block(Block* block);
     Status _truncate_char_or_varchar_columns(Block* block);
     void _truncate_char_or_varchar_column(Block* block, int idx, int len);
     Status _generate_partition_columns();
-    Status _generate_missing_columns();
+
     bool _check_partition_prune_expr(const VExprSPtr& expr);
     void _init_runtime_filter_partition_prune_ctxs();
     void _init_runtime_filter_partition_prune_block();
@@ -267,10 +280,10 @@ class FileScanner : public Scanner {
     void _get_slot_ids(VExpr* expr, std::vector<int>* slot_ids);
     Status _generate_truncate_columns(bool need_to_get_parsed_schema);
     Status _set_fill_or_truncate_columns(bool need_to_get_parsed_schema);
-    Status _init_orc_reader(std::unique_ptr<OrcReader>&& orc_reader,
-                            FileMetaCache* file_meta_cache_ptr);
-    Status _init_parquet_reader(std::unique_ptr<ParquetReader>&& parquet_reader,
-                                FileMetaCache* file_meta_cache_ptr);
+    Status _init_orc_reader(FileMetaCache* file_meta_cache_ptr,
+                            std::unique_ptr<OrcReader> orc_reader = nullptr);
+    Status _init_parquet_reader(FileMetaCache* file_meta_cache_ptr,
+                                std::unique_ptr<ParquetReader> parquet_reader = nullptr);
     Status _create_row_id_column_iterator();
 
     TFileFormatType::type _get_current_format_type() {
@@ -291,6 +304,11 @@ class FileScanner : public Scanner {
     }
 
     bool _should_enable_condition_cache();
+    bool _should_enable_condition_cache_for_load() const;
+    bool _should_enable_condition_cache_for_query() const;
+    bool _should_push_down_predicates(TFileFormatType::type format_type) const;
+    bool _should_push_down_predicates_for_load(TFileFormatType::type format_type) const;
+    bool _should_push_down_predicates_for_query(TFileFormatType::type format_type) const;
     void _init_reader_condition_cache();
     void _finalize_reader_condition_cache();
 
diff --git a/be/src/format/arrow/arrow_stream_reader.cpp b/be/src/format/arrow/arrow_stream_reader.cpp
index be6e8acb19fc64..4bbb8cbd39bb4a 100644
--- a/be/src/format/arrow/arrow_stream_reader.cpp
+++ b/be/src/format/arrow/arrow_stream_reader.cpp
@@ -65,7 +65,7 @@ Status ArrowStreamReader::init_reader() {
     return Status::OK();
 }
 
-Status ArrowStreamReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     bool has_next = false;
     RETURN_IF_ERROR(_pip_stream->HasNext(&has_next));
     if (!has_next) {
@@ -126,8 +126,8 @@ Status ArrowStreamReader::get_next_block(Block* block, size_t* read_rows, bool*
     return Status::OK();
 }
 
-Status ArrowStreamReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                      std::unordered_set<std::string>* missing_cols) {
+Status ArrowStreamReader::_get_columns_impl(
+        std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     for (const auto& slot : _file_slot_descs) {
         name_to_type->emplace(slot->col_name(), slot->type());
     }
diff --git a/be/src/format/arrow/arrow_stream_reader.h b/be/src/format/arrow/arrow_stream_reader.h
index 7076df158d2b82..df20d8fd920a26 100644
--- a/be/src/format/arrow/arrow_stream_reader.h
+++ b/be/src/format/arrow/arrow_stream_reader.h
@@ -55,10 +55,12 @@ class ArrowStreamReader : public GenericReader {
 
     Status init_reader();
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
+
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
 
 private:
     RuntimeState* _state;
diff --git a/be/src/format/column_descriptor.h b/be/src/format/column_descriptor.h
new file mode 100644
index 00000000000000..37126fff39f0fa
--- /dev/null
+++ b/be/src/format/column_descriptor.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "exprs/vexpr_fwd.h"
+
+namespace doris {
+class SlotDescriptor;
+
+/// Column categories for table format reading.
+///
+/// Each column requested by the query is classified into one of these categories.
+/// The category determines how the column's value is obtained:
+///   - REGULAR:       Read directly from the data file (Parquet/ORC).
+///                    If the column is absent from a file (schema evolution),
+///                    its default_expr is used to produce a default value.
+///   - PARTITION_KEY:  Filled from partition metadata (e.g. Hive path partitions).
+///   - SYNTHESIZED:    Never in the data file; fully computed at runtime
+///                     (e.g. Doris V2 __DORIS_ICEBERG_ROWID_COL__).
+///   - GENERATED:      May or may not exist in the data file. If present but null,
+///                     the value is backfilled at runtime (e.g. Iceberg V3 _row_id).
+enum class ColumnCategory {
+    REGULAR,
+    PARTITION_KEY,
+    SYNTHESIZED,
+    GENERATED,
+};
+
+/// Describes a column requested by the query, along with its category.
+struct ColumnDescriptor {
+    std::string name;
+    const SlotDescriptor* slot_desc = nullptr;
+    ColumnCategory category = ColumnCategory::REGULAR;
+    /// Default value expression when this column is missing from the data file.
+    /// nullptr means fill with NULL. Built once per table scan in FileScanner.
+    VExprContextSPtr default_expr;
+};
+
+} // namespace doris
diff --git a/be/src/format/count_reader.h b/be/src/format/count_reader.h
new file mode 100644
index 00000000000000..65764f5d028d11
--- /dev/null
+++ b/be/src/format/count_reader.h
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "format/generic_reader.h"
+
+namespace doris {
+#include "common/compile_check_begin.h"
+
+/// A lightweight reader that emits row counts without reading any actual data.
+/// Used as a decorator to replace the real reader when COUNT(*) push down is active.
+///
+/// Instead of duplicating the COUNT short-circuit logic in every format reader
+/// (ORC, Parquet, etc.), FileScanner creates a CountReader after the real reader
+/// is initialized and the total row count is known. The CountReader then serves
+/// all subsequent get_next_block calls by simply resizing columns.
+///
+/// This cleanly separates the "how many rows" concern from the actual data reading,
+/// eliminating duplicated COUNT blocks across format readers.
+class CountReader : public GenericReader {
+public:
+    /// @param total_rows   Total number of rows to emit (post-filter).
+    /// @param batch_size   Maximum rows per batch.
+    /// @param inner_reader The original reader, kept alive for profile collection
+    ///                     and lifecycle management. Ownership is transferred.
+    CountReader(int64_t total_rows, size_t batch_size,
+                std::unique_ptr<GenericReader> inner_reader = nullptr)
+            : _remaining_rows(total_rows),
+              _batch_size(batch_size),
+              _inner_reader(std::move(inner_reader)) {
+        set_push_down_agg_type(TPushAggOp::type::COUNT);
+    }
+
+    ~CountReader() override = default;
+
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override {
+        auto rows = std::min(_remaining_rows, static_cast<int64_t>(_batch_size));
+        _remaining_rows -= rows;
+
+        auto mutate_columns = block->mutate_columns();
+        for (auto& col : mutate_columns) {
+            col->resize(rows);
+        }
+        block->set_columns(std::move(mutate_columns));
+
+        *read_rows = rows;
+        *eof = (_remaining_rows == 0);
+        return Status::OK();
+    }
+
+    /// CountReader counts rows by definition.
+    bool count_read_rows() override { return true; }
+
+    /// Delegate to inner reader if available, otherwise return our total.
+    int64_t get_total_rows() const override {
+        return _inner_reader ? _inner_reader->get_total_rows() : _initial_total_rows();
+    }
+
+    Status close() override {
+        if (_inner_reader) {
+            return _inner_reader->close();
+        }
+        return Status::OK();
+    }
+
+    /// Access the inner reader for profile collection or other lifecycle needs.
+    GenericReader* inner_reader() const { return _inner_reader.get(); }
+
+protected:
+    void _collect_profile_before_close() override {
+        if (_inner_reader) {
+            _inner_reader->collect_profile_before_close();
+        }
+    }
+
+private:
+    int64_t _initial_total_rows() const { return _remaining_rows; }
+
+    int64_t _remaining_rows;
+    size_t _batch_size;
+    std::unique_ptr<GenericReader> _inner_reader;
+};
+
+#include "common/compile_check_end.h"
+} // namespace doris
diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp
index 63045f8bfebd7d..10d79126bb67c9 100644
--- a/be/src/format/csv/csv_reader.cpp
+++ b/be/src/format/csv/csv_reader.cpp
@@ -307,8 +307,78 @@ Status CsvReader::init_reader(bool is_load) {
     return Status::OK();
 }
 
+// ---- Unified init_reader(ReaderInitContext*) overrides ----
+
+Status CsvReader::_open_file_reader(ReaderInitContext* base_ctx) {
+    _start_offset = _range.start_offset;
+    if (_start_offset == 0) {
+        if (_params.__isset.file_attributes && _params.file_attributes.__isset.header_type &&
+            !_params.file_attributes.header_type.empty()) {
+            std::string header_type = to_lower(_params.file_attributes.header_type);
+            if (header_type == BeConsts::CSV_WITH_NAMES) {
+                _skip_lines = 1;
+            } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) {
+                _skip_lines = 2;
+            }
+        } else if (_params.file_attributes.__isset.skip_lines) {
+            _skip_lines = _params.file_attributes.skip_lines;
+        }
+    } else if (_start_offset != 0) {
+        if ((_file_compress_type != TFileCompressType::PLAIN) ||
+            (_file_compress_type == TFileCompressType::UNKNOWN &&
+             _file_format_type != TFileFormatType::FORMAT_CSV_PLAIN)) {
+            return Status::InternalError<false>("For now we do not support split compressed file");
+        }
+        int64_t pre_read_len = std::min(
+                static_cast<int64_t>(_params.file_attributes.text_params.line_delimiter.size()),
+                _start_offset);
+        _start_offset -= pre_read_len;
+        _size += pre_read_len;
+        _skip_lines = 1;
+    }
+
+    RETURN_IF_ERROR(_init_options());
+    RETURN_IF_ERROR(_create_file_reader(false));
+    return Status::OK();
+}
+
+Status CsvReader::_do_init_reader(ReaderInitContext* base_ctx) {
+    auto* ctx = checked_context_cast<CsvInitContext>(base_ctx);
+    _is_load = ctx->is_load;
+
+    _use_nullable_string_opt.resize(_file_slot_descs.size());
+    for (int i = 0; i < _file_slot_descs.size(); ++i) {
+        auto data_type_ptr = _file_slot_descs[i]->get_data_type_ptr();
+        if (data_type_ptr->is_nullable() && is_string_type(data_type_ptr->get_primitive_type())) {
+            _use_nullable_string_opt[i] = 1;
+        }
+    }
+
+    RETURN_IF_ERROR(_create_decompressor());
+    RETURN_IF_ERROR(_create_line_reader());
+
+    if (!_is_load) {
+        DCHECK(_params.__isset.column_idxs);
+        _col_idxs = _params.column_idxs;
+        int idx = 0;
+        for (const auto& slot_info : _params.required_slots) {
+            if (slot_info.is_file_slot) {
+                _file_slot_idx_map.push_back(idx);
+            }
+            idx++;
+        }
+    } else {
+        int i = 0;
+        for (const auto& desc [[maybe_unused]] : _file_slot_descs) {
+            _col_idxs.push_back(i++);
+        }
+    }
+    _line_reader_eof = false;
+    return Status::OK();
+}
+
 // !FIXME: Here we should use MutableBlock
-Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_line_reader_eof) {
         *eof = true;
         return Status::OK();
@@ -397,8 +467,7 @@ Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     return Status::OK();
 }
 
-Status CsvReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                              std::unordered_set<std::string>* missing_cols) {
+Status CsvReader::_get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     for (const auto& slot : _file_slot_descs) {
         name_to_type->emplace(slot->col_name(), slot->type());
     }
diff --git a/be/src/format/csv/csv_reader.h b/be/src/format/csv/csv_reader.h
index 4e24be28d15b95..f9bcb06697c10c 100644
--- a/be/src/format/csv/csv_reader.h
+++ b/be/src/format/csv/csv_reader.h
@@ -32,7 +32,7 @@
 #include "common/status.h"
 #include "core/data_type/data_type.h"
 #include "format/file_reader/new_plain_text_line_reader.h"
-#include "format/generic_reader.h"
+#include "format/table/table_format_reader.h"
 #include "io/file_factory.h"
 #include "io/fs/file_reader_writer_fwd.h"
 #include "util/decompressor.h"
@@ -52,6 +52,11 @@ struct IOContext;
 struct ScannerCounter;
 class Block;
 
+/// CSV/Text-specific initialization context.
+struct CsvInitContext final : public ReaderInitContext {
+    bool is_load = false;
+};
+
 class LineFieldSplitterIf {
 public:
     virtual ~LineFieldSplitterIf() = default;
@@ -166,7 +171,7 @@ class PlainCsvTextFieldSplitter : public BaseCsvTextFieldSplitter<PlainCsvTextFi
     std::string _value_sep;
 };
 
-class CsvReader : public GenericReader {
+class CsvReader : public TableFormatReader {
     ENABLE_FACTORY_CREATOR(CsvReader);
 
 public:
@@ -177,9 +182,8 @@ class CsvReader : public GenericReader {
     ~CsvReader() override = default;
 
     Status init_reader(bool is_load);
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status init_schema_reader() override;
     // get schema of csv file from first one line or first two lines.
@@ -193,6 +197,10 @@ class CsvReader : public GenericReader {
     Status close() override;
 
 protected:
+    // ---- Unified init_reader(ReaderInitContext*) overrides ----
+    Status _open_file_reader(ReaderInitContext* ctx) override;
+    Status _do_init_reader(ReaderInitContext* ctx) override;
+
     // init options for type serde
     virtual Status _init_options();
     virtual Status _create_line_reader();
diff --git a/be/src/format/generic_reader.cpp b/be/src/format/generic_reader.cpp
new file mode 100644
index 00000000000000..11f9c19bcc7da1
--- /dev/null
+++ b/be/src/format/generic_reader.cpp
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/generic_reader.h"
+
+namespace doris {
+// GenericReader has no out-of-line method implementations.
+// Column-filling logic lives in TableFormatReader (table_format_reader.cpp).
+} // namespace doris
diff --git a/be/src/format/generic_reader.h b/be/src/format/generic_reader.h
index c08b0427847feb..61b5c5a0ab8e42 100644
--- a/be/src/format/generic_reader.h
+++ b/be/src/format/generic_reader.h
@@ -19,10 +19,27 @@
 
 #include <gen_cpp/PlanNodes_types.h>
 
+#include <functional>
+#include <memory>
+#include <set>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
 #include "common/status.h"
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
 #include "exprs/vexpr_fwd.h"
+#include "format/column_descriptor.h"
+#include "format/table/table_schema_change_helper.h"
 #include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
 #include "storage/predicate/block_column_predicate.h"
+#include "storage/segment/common.h"
 #include "util/profile_collector.h"
 
 namespace doris {
@@ -45,21 +62,87 @@ struct ConditionCacheContext {
     static constexpr int GRANULE_SIZE = 2048;
 };
 
-// This a reader interface for all file readers.
-// A GenericReader is responsible for reading a file and return
-// a set of blocks with specified schema,
+/// Base context for the unified init_reader(ReaderInitContext*) template method.
+/// Contains fields shared by ALL reader types. Format-specific readers define
+/// subclasses (ParquetInitContext, OrcInitContext, etc.) with extra fields.
+/// FileScanner allocates the appropriate subclass and populates the shared fields
+/// before calling init_reader().
+struct ReaderInitContext {
+    virtual ~ReaderInitContext() = default;
+
+    // ---- Owned by FileScanner, shared by all readers ----
+    std::vector<ColumnDescriptor>* column_descs = nullptr;
+    std::unordered_map<std::string, uint32_t>* col_name_to_block_idx = nullptr;
+    RuntimeState* state = nullptr;
+    const TupleDescriptor* tuple_descriptor = nullptr;
+    const RowDescriptor* row_descriptor = nullptr;
+    const TFileScanRangeParams* params = nullptr;
+    const TFileRangeDesc* range = nullptr;
+    TPushAggOp::type push_down_agg_type = TPushAggOp::type::NONE;
+
+    // ---- Output slots (filled by on_before_init_reader) ----
+    std::vector<std::string> column_names;
+    std::shared_ptr<TableSchemaChangeHelper::Node> table_info_node =
+            TableSchemaChangeHelper::ConstNode::get_instance();
+    std::set<uint64_t> column_ids;
+    std::set<uint64_t> filter_column_ids;
+};
+
+/// Safe downcast for ReaderInitContext subclasses.
+/// Uses dynamic_cast + DORIS_CHECK: crashes on type mismatch (per Doris coding standards).
+template <typename To, typename From>
+To* checked_context_cast(From* ptr) {
+    auto* result = dynamic_cast<To*>(ptr);
+    DORIS_CHECK(result != nullptr);
+    return result;
+}
+
+/// Base reader interface for all file readers.
+/// A GenericReader is responsible for reading a file and returning
+/// a set of blocks with specified schema.
+///
+/// Provides hook virtual methods that implement the Template Method pattern:
+///   init_reader:      _open_file_reader → on_before_init_reader → _do_init_reader → on_after_init_reader
+///   get_next_block:   on_before_read_block → _do_get_next_block → on_after_read_block
+///
+/// Column-filling logic (partition/missing/synthesized) lives in TableFormatReader.
 class GenericReader : public ProfileCollector {
 public:
     GenericReader() : _push_down_agg_type(TPushAggOp::type::NONE) {}
     void set_push_down_agg_type(TPushAggOp::type push_down_agg_type) {
-        _push_down_agg_type = push_down_agg_type;
+        if (!_push_down_agg_type_locked) {
+            _push_down_agg_type = push_down_agg_type;
+        }
     }
+    // Lock the current push_down_agg_type so FileScanner cannot override it.
+    // Used by readers that must disable COUNT pushdown (e.g., ACID deletes, Paimon DV).
+    void lock_push_down_agg_type() { _push_down_agg_type_locked = true; }
+    TPushAggOp::type get_push_down_agg_type() const { return _push_down_agg_type; }
 
-    virtual Status get_next_block(Block* block, size_t* read_rows, bool* eof) = 0;
+    /// Template method for reading blocks.
+    /// Calls: on_before_read_block → _do_get_next_block → on_after_read_block
+    Status get_next_block(Block* block, size_t* read_rows, bool* eof) {
+        RETURN_IF_ERROR(on_before_read_block(block));
+        RETURN_IF_ERROR(_do_get_next_block(block, read_rows, eof));
+        RETURN_IF_ERROR(on_after_read_block(block, read_rows));
+        return Status::OK();
+    }
 
     // Type is always nullable to process illegal values.
-    virtual Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                               std::unordered_set<std::string>* missing_cols) {
+    // Results are cached after the first successful call.
+    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
+        if (_get_columns_cached) {
+            *name_to_type = _cached_name_to_type;
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(_get_columns_impl(name_to_type));
+        _cached_name_to_type = *name_to_type;
+        _get_columns_cached = true;
+
+        return Status::OK();
+    }
+
+    virtual Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
         return Status::NotSupported("get_columns is not implemented");
     }
 
@@ -75,20 +158,6 @@ class GenericReader : public ProfileCollector {
     }
     ~GenericReader() override = default;
 
-    /// If the underlying FileReader has filled the partition&missing columns,
-    /// The FileScanner does not need to fill
-    virtual bool fill_all_columns() const { return _fill_all_columns; }
-
-    /// Tell the underlying FileReader the partition&missing columns,
-    /// and the FileReader determine to fill columns or not.
-    /// Should set _fill_all_columns = true, if fill the columns.
-    virtual Status set_fill_columns(
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
-        return Status::OK();
-    }
-
     virtual Status close() { return Status::OK(); }
 
     Status read_by_rows(const std::list<int64_t>& row_ids) {
@@ -102,21 +171,106 @@ class GenericReader : public ProfileCollector {
     /// can skip some pages/rowgroups through indexes.
     virtual bool count_read_rows() { return false; }
 
+    /// Returns true if on_before_init_reader has already set _column_descs.
+    bool has_column_descs() const { return _column_descs != nullptr; }
+
+    /// Unified initialization entry point (NVI pattern).
+    /// Enforces the template method sequence for ALL readers:
+    ///   _open_file_reader → on_before_init_reader → _do_init_reader → on_after_init_reader
+    /// Subclasses implement _open_file_reader and _do_init_reader(ReaderInitContext*).
+    /// FileScanner constructs the appropriate ReaderInitContext subclass and calls this.
+    ///
+    /// NOTE: During migration, readers not yet ported to this API still use their
+    /// format-specific init_reader(...) methods. This method is non-virtual so it
+    /// cannot be accidentally overridden.
+    Status init_reader(ReaderInitContext* ctx) {
+        // Apply push_down_agg_type early so _open_file_reader and _do_init_reader
+        // can use it (e.g., PaimonCppReader skips full init on COUNT pushdown).
+        // on_after_init_reader may reset this (e.g., Iceberg with equality deletes).
+        set_push_down_agg_type(ctx->push_down_agg_type);
+
+        RETURN_IF_ERROR(_open_file_reader(ctx));
+
+        // Standalone readers (delete file readers, push handler) set column_descs=nullptr
+        // and pre-populate column_names directly. Skip hooks for them.
+        if (ctx->column_descs != nullptr) {
+            RETURN_IF_ERROR(on_before_init_reader(ctx));
+        }
+
+        RETURN_IF_ERROR(_do_init_reader(ctx));
+
+        if (ctx->column_descs != nullptr) {
+            RETURN_IF_ERROR(on_after_init_reader(ctx));
+        }
+
+        return Status::OK();
+    }
+
+    /// Hook called before core init. Default just sets _column_descs.
+    /// TableFormatReader overrides with partition/missing column computation.
+    /// ORC/Parquet/Hive/Iceberg further override with format-specific schema matching.
+    virtual Status on_before_init_reader(ReaderInitContext* ctx) {
+        _column_descs = ctx->column_descs;
+        return Status::OK();
+    }
+
 protected:
+    // ---- Init-time hooks (Template Method for init_reader) ----
+
+    /// Opens the file and prepares I/O resources before hooks run. Override in
+    /// subclasses to open files, read metadata, set up decompressors, etc.
+    /// For Parquet/ORC, opens the file and reads footer metadata.
+    /// For CSV/JSON, opens the file, creates decompressors, and sets up line readers.
+    /// Default is no-op (for JNI, Native, Arrow readers).
+    virtual Status _open_file_reader(ReaderInitContext* /*ctx*/) { return Status::OK(); }
+
+    /// Core initialization (format-specific). Subclasses override to perform
+    /// their actual parsing engine setup. The context should be downcast to
+    /// the appropriate subclass using checked_context_cast<T>.
+    /// Default returns NotSupported — readers not yet migrated to the unified
+    /// init_reader(ReaderInitContext*) API still use their old init methods.
+    virtual Status _do_init_reader(ReaderInitContext* /*ctx*/) {
+        return Status::NotSupported(
+                "_do_init_reader(ReaderInitContext*) not yet implemented for this reader");
+    }
+
+    // ---- Existing init-time hooks ----
+
+    /// Called after core init completes. Subclasses override to process
+    /// delete files, deletion vectors, etc.
+    virtual Status on_after_init_reader(ReaderInitContext* /*ctx*/) { return Status::OK(); }
+
+    // ---- Read-time hooks ----
+
+    /// Called before reading a block. Subclasses override to modify block
+    /// structure (e.g. add ACID columns, expand for equality delete).
+    virtual Status on_before_read_block(Block* block) { return Status::OK(); }
+
+    /// Core block reading. Subclasses must override with actual read logic.
+    virtual Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) = 0;
+
+    /// Called after reading a block. Subclasses override to post-process
+    /// (e.g. remove ACID columns, apply equality delete filter).
+    virtual Status on_after_read_block(Block* block, size_t* read_rows) { return Status::OK(); }
+
     virtual Status _set_read_one_line_impl() {
         return Status::NotSupported("read_by_rows is not implemented for this reader.");
     }
 
     const size_t _MIN_BATCH_SIZE = 4064; // 4094 - 32(padding)
 
-    /// Whether the underlying FileReader has filled the partition&missing columns
-    bool _fill_all_columns = false;
     TPushAggOp::type _push_down_agg_type {};
+    bool _push_down_agg_type_locked = false;
 
 public:
     // Pass condition cache context to the reader for HIT/MISS tracking.
     virtual void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {}
 
+    // Returns true if this reader can produce an accurate total row count from metadata
+    // without reading actual data. Used to determine if CountReader decorator can be applied.
+    // Only ORC and Parquet readers support this (via file footer metadata).
+    virtual bool supports_count_pushdown() const { return false; }
+
     // Returns the total number of rows the reader will produce.
     // Used to pre-allocate condition cache with the correct number of granules.
     virtual int64_t get_total_rows() const { return 0; }
@@ -133,6 +287,21 @@ class GenericReader : public ProfileCollector {
     // Cache to save some common part such as file footer.
     // Maybe null if not used
     FileMetaCache* _meta_cache = nullptr;
+
+    // ---- Column descriptors (set by init_reader, owned by FileScanner) ----
+    const std::vector<ColumnDescriptor>* _column_descs = nullptr;
+
+    // ---- get_columns cache ----
+    bool _get_columns_cached = false;
+    std::unordered_map<std::string, DataTypePtr> _cached_name_to_type;
+};
+
+/// Provides an accessor for the current batch's row positions within the file.
+/// Implemented by RowGroupReader (Parquet) and OrcReader.
+class RowPositionProvider {
+public:
+    virtual ~RowPositionProvider() = default;
+    virtual const std::vector<segment_v2::rowid_t>& current_batch_row_positions() const = 0;
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/jni/jni_reader.cpp b/be/src/format/jni/jni_reader.cpp
index 22e26d829c2010..e074fce9650d24 100644
--- a/be/src/format/jni/jni_reader.cpp
+++ b/be/src/format/jni/jni_reader.cpp
@@ -110,10 +110,10 @@ Status JniReader::open(RuntimeState* state, RuntimeProfile* profile) {
 }
 
 // =========================================================================
-// JniReader::get_next_block  (merged from JniConnector::get_next_block)
+// JniReader::_do_get_next_block  (merged from JniConnector::get_next_block)
 // =========================================================================
 
-Status JniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status JniReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     JNIEnv* env = nullptr;
     RETURN_IF_ERROR(Jni::Env::Get(&env));
     long meta_address = 0;
diff --git a/be/src/format/jni/jni_reader.h b/be/src/format/jni/jni_reader.h
index 87c0c9c0d828e1..9d1cfd4b404f35 100644
--- a/be/src/format/jni/jni_reader.h
+++ b/be/src/format/jni/jni_reader.h
@@ -87,8 +87,7 @@ class JniReader : public GenericReader {
      */
     Status open(RuntimeState* state, RuntimeProfile* profile);
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override {
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override {
         for (const auto& desc : _file_slot_descs) {
             name_to_type->emplace(desc->col_name(), desc->type());
         }
@@ -98,7 +97,7 @@ class JniReader : public GenericReader {
     /**
      * Read next batch from Java scanner and fill the block.
      */
-    virtual Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
     /**
      * Get table schema from Java scanner (used by Avro schema discovery).
diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp
index cecfcf3f0dcf54..4c22bec476b743 100644
--- a/be/src/format/json/new_json_reader.cpp
+++ b/be/src/format/json/new_json_reader.cpp
@@ -197,7 +197,53 @@ Status NewJsonReader::init_reader(
     return Status::OK();
 }
 
-Status NewJsonReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+// ---- Unified init_reader(ReaderInitContext*) overrides ----
+
+Status NewJsonReader::_open_file_reader(ReaderInitContext* /*ctx*/) {
+    RETURN_IF_ERROR(_get_range_params());
+    RETURN_IF_ERROR(_open_file_reader(false));
+    return Status::OK();
+}
+
+Status NewJsonReader::_do_init_reader(ReaderInitContext* base_ctx) {
+    auto* ctx = checked_context_cast<JsonInitContext>(base_ctx);
+    _is_load = ctx->is_load;
+
+    RETURN_IF_ERROR(_get_column_default_value(_file_slot_descs, *ctx->col_default_value_ctx));
+    for (auto* slot_desc : _file_slot_descs) {
+        _serdes.emplace_back(slot_desc->get_data_type_ptr()->get_serde());
+    }
+
+    // Create decompressor (needed by line reader below)
+    RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, &_decompressor));
+
+    if (LIKELY(_read_json_by_line)) {
+        RETURN_IF_ERROR(_open_line_reader());
+    }
+    RETURN_IF_ERROR(_parse_jsonpath_and_json_root());
+
+    if (_parsed_jsonpaths.empty()) {
+        _vhandle_json_callback = &NewJsonReader::_simdjson_handle_simple_json;
+    } else {
+        if (_strip_outer_array) {
+            _vhandle_json_callback = &NewJsonReader::_simdjson_handle_flat_array_complex_json;
+        } else {
+            _vhandle_json_callback = &NewJsonReader::_simdjson_handle_nested_complex_json;
+        }
+    }
+    _ondemand_json_parser = std::make_unique<simdjson::ondemand::parser>();
+    for (int i = 0; i < _file_slot_descs.size(); ++i) {
+        _slot_desc_index[StringRef {_file_slot_descs[i]->col_name()}] = i;
+        if (_file_slot_descs[i]->is_skip_bitmap_col()) {
+            skip_bitmap_col_idx = i;
+        }
+    }
+    _simdjson_ondemand_padding_buffer.resize(_padded_size);
+    _simdjson_ondemand_unscape_padding_buffer.resize(_padded_size);
+    return Status::OK();
+}
+
+Status NewJsonReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_reader_eof) {
         *eof = true;
         return Status::OK();
@@ -228,8 +274,8 @@ Status NewJsonReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
     return Status::OK();
 }
 
-Status NewJsonReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                  std::unordered_set<std::string>* missing_cols) {
+Status NewJsonReader::_get_columns_impl(
+        std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     for (const auto& slot : _file_slot_descs) {
         name_to_type->emplace(slot->col_name(), slot->type());
     }
diff --git a/be/src/format/json/new_json_reader.h b/be/src/format/json/new_json_reader.h
index 4d803fc1050b19..4c6bb4e3609c78 100644
--- a/be/src/format/json/new_json_reader.h
+++ b/be/src/format/json/new_json_reader.h
@@ -36,8 +36,8 @@
 #include "core/string_ref.h"
 #include "core/types.h"
 #include "exprs/json_functions.h"
-#include "format/generic_reader.h"
 #include "format/line_reader.h"
+#include "format/table/table_format_reader.h"
 #include "io/file_factory.h"
 #include "io/fs/file_reader_writer_fwd.h"
 #include "runtime/runtime_profile.h"
@@ -63,7 +63,14 @@ struct ScannerCounter;
 class Block;
 class IColumn;
 
-class NewJsonReader : public GenericReader {
+/// JSON-specific initialization context.
+/// Extends ReaderInitContext with default value context (unique to JSON reader).
+struct JsonInitContext final : public ReaderInitContext {
+    const std::unordered_map<std::string, VExprContextSPtr>* col_default_value_ctx = nullptr;
+    bool is_load = false;
+};
+
+class NewJsonReader : public TableFormatReader {
     ENABLE_FACTORY_CREATOR(NewJsonReader);
 
 public:
@@ -80,14 +87,17 @@ class NewJsonReader : public GenericReader {
     Status init_reader(
             const std::unordered_map<std::string, VExprContextSPtr>& col_default_value_ctx,
             bool is_load);
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
     Status init_schema_reader() override;
     Status get_parsed_schema(std::vector<std::string>* col_names,
                              std::vector<DataTypePtr>* col_types) override;
 
 protected:
+    // ---- Unified init_reader(ReaderInitContext*) overrides ----
+    Status _open_file_reader(ReaderInitContext* ctx) override;
+    Status _do_init_reader(ReaderInitContext* ctx) override;
+
     void _collect_profile_before_close() override;
 
 private:
diff --git a/be/src/format/native/native_reader.cpp b/be/src/format/native/native_reader.cpp
index 8693a3e9a22066..90599b223b4e52 100644
--- a/be/src/format/native/native_reader.cpp
+++ b/be/src/format/native/native_reader.cpp
@@ -146,7 +146,7 @@ Status NativeReader::init_reader() {
     return Status::OK();
 }
 
-Status NativeReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status NativeReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_eof) {
         *read_rows = 0;
         *eof = true;
@@ -219,9 +219,7 @@ Status NativeReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
     return Status::OK();
 }
 
-Status NativeReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                 std::unordered_set<std::string>* missing_cols) {
-    missing_cols->clear();
+Status NativeReader::_get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     RETURN_IF_ERROR(init_reader());
 
     if (!_schema_inited) {
diff --git a/be/src/format/native/native_reader.h b/be/src/format/native/native_reader.h
index 65d70816628eea..1eb8df868eff0b 100644
--- a/be/src/format/native/native_reader.h
+++ b/be/src/format/native/native_reader.h
@@ -25,7 +25,7 @@
 #include <unordered_set>
 
 #include "common/status.h"
-#include "format/generic_reader.h"
+#include "format/table/table_format_reader.h"
 #include "io/fs/file_reader_writer_fwd.h"
 
 namespace doris {
@@ -46,7 +46,7 @@ class Block;
 // it will read a sequence of Blocks encoded in Doris Native binary format.
 //
 // NOTE: current implementation is just a skeleton and will be filled step by step.
-class NativeReader : public GenericReader {
+class NativeReader : public TableFormatReader {
 public:
     ENABLE_FACTORY_CREATOR(NativeReader);
 
@@ -58,10 +58,9 @@ class NativeReader : public GenericReader {
     // Initialize underlying file reader and any format specific state.
     Status init_reader();
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status init_schema_reader() override;
 
@@ -74,6 +73,7 @@ class NativeReader : public GenericReader {
 
 protected:
     void _collect_profile_before_close() override {}
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
 
 private:
     RuntimeProfile* _profile = nullptr;
diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp
index a1a46d8565226b..c7b1454f35aebb 100644
--- a/be/src/format/orc/vorc_reader.cpp
+++ b/be/src/format/orc/vorc_reader.cpp
@@ -292,6 +292,7 @@ OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& r
         : _profile(nullptr),
           _scan_params(params),
           _scan_range(range),
+          _batch_size(_MIN_BATCH_SIZE),
           _ctz(ctz),
           _file_system(nullptr),
           _io_ctx(io_ctx),
@@ -309,6 +310,7 @@ OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& r
         : _profile(nullptr),
           _scan_params(params),
           _scan_range(range),
+          _batch_size(_MIN_BATCH_SIZE),
           _ctz(ctz),
           _file_system(nullptr),
           _io_ctx(io_ctx_holder ? io_ctx_holder.get() : nullptr),
@@ -461,31 +463,37 @@ Status OrcReader::_create_file_reader() {
     return Status::OK();
 }
 
-Status OrcReader::init_reader(
-        const std::vector<std::string>* column_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts, bool is_acid, const TupleDescriptor* tuple_descriptor,
-        const RowDescriptor* row_descriptor,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts,
-        std::shared_ptr<TableSchemaChangeHelper::Node> table_info_node_ptr,
-        const std::set<uint64_t>& column_ids, const std::set<uint64_t>& filter_column_ids) {
-    _table_column_names = column_names;
-    _col_name_to_block_idx = col_name_to_block_idx;
-    _lazy_read_ctx.conjuncts = conjuncts;
-    _is_acid = is_acid;
-    _tuple_descriptor = tuple_descriptor;
-    _row_descriptor = row_descriptor;
-    _table_info_node_ptr = table_info_node_ptr;
-    _column_ids = column_ids;
-    _filter_column_ids = filter_column_ids;
-
-    if (not_single_slot_filter_conjuncts != nullptr && !not_single_slot_filter_conjuncts->empty()) {
+// ---- Unified init_reader(ReaderInitContext*) overrides ----
+
+Status OrcReader::_open_file_reader(ReaderInitContext* /*ctx*/) {
+    if (_state != nullptr) {
+        _orc_tiny_stripe_threshold_bytes = _state->query_options().orc_tiny_stripe_threshold_bytes;
+        _orc_once_max_read_bytes = _state->query_options().orc_once_max_read_bytes;
+        _orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes;
+    }
+    return _create_file_reader();
+}
+
+Status OrcReader::_do_init_reader(ReaderInitContext* base_ctx) {
+    auto* ctx = checked_context_cast<OrcInitContext>(base_ctx);
+    _table_column_names = base_ctx->column_names;
+    _col_name_to_block_idx = base_ctx->col_name_to_block_idx;
+    if (ctx->conjuncts != nullptr) {
+        _lazy_read_ctx.conjuncts = *ctx->conjuncts;
+    }
+    _tuple_descriptor = ctx->tuple_descriptor;
+    _row_descriptor = ctx->row_descriptor;
+    _table_info_node_ptr = base_ctx->table_info_node;
+    _column_ids = base_ctx->column_ids;
+    _filter_column_ids = base_ctx->filter_column_ids;
+
+    if (ctx->not_single_slot_filter_conjuncts != nullptr &&
+        !ctx->not_single_slot_filter_conjuncts->empty()) {
         _not_single_slot_filter_conjuncts.insert(_not_single_slot_filter_conjuncts.end(),
-                                                 not_single_slot_filter_conjuncts->begin(),
-                                                 not_single_slot_filter_conjuncts->end());
+                                                 ctx->not_single_slot_filter_conjuncts->begin(),
+                                                 ctx->not_single_slot_filter_conjuncts->end());
     }
-    _slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
+    _slot_id_to_filter_conjuncts = ctx->slot_id_to_filter_conjuncts;
     _obj_pool = std::make_unique<ObjectPool>();
 
     if (_state != nullptr) {
@@ -494,8 +502,146 @@ Status OrcReader::init_reader(
         _orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes;
     }
 
-    RETURN_IF_ERROR(_create_file_reader());
+    // _create_file_reader() is called by init_reader template method before hooks.
+    // For standalone _do_init_reader callers (tvf, load, etc.), open the file here if not already opened.
+    if (_reader == nullptr) {
+        RETURN_IF_ERROR(_create_file_reader());
+    }
     RETURN_IF_ERROR(_init_read_columns());
+
+    // Compute missing columns and file↔table column mapping.
+    // This runs in _do_init_reader (not on_before_init_reader) because table-format readers
+    // (Iceberg, Paimon, Hive, Hudi) override on_before_init_reader completely.
+    if (has_column_descs()) {
+        _fill_missing_cols.clear();
+        _fill_missing_defaults.clear();
+        for (const auto& col_name : _table_column_names) {
+            if (!_table_info_node_ptr->children_column_exists(col_name)) {
+                _fill_missing_cols.insert(col_name);
+            }
+        }
+        if (_column_descs && !_fill_missing_cols.empty()) {
+            for (const auto& desc : *_column_descs) {
+                if (_fill_missing_cols.contains(desc.name) &&
+                    !_fill_partition_values.contains(desc.name)) {
+                    _fill_missing_defaults[desc.name] = desc.default_expr;
+                }
+            }
+        }
+        for (const auto& table_column_name : _table_column_names) {
+            if (_fill_missing_cols.contains(table_column_name)) {
+                continue;
+            }
+            const auto file_column_name =
+                    _table_info_node_ptr->children_file_column_name(table_column_name);
+            _read_file_cols.emplace_back(file_column_name);
+            _read_table_cols.emplace_back(table_column_name);
+        }
+    }
+
+    // Register row-position-based synthesized column handler.
+    // _row_id_column_iterator_pair, _row_lineage_columns, and _iceberg_rowid_params
+    // are all set before init_reader by FileScanner.
+    // This must be outside has_column_descs() guard because standalone readers
+    // (e.g., orc_read_lines tests) also use row_id columns.
+    if (_row_id_column_iterator_pair.first != nullptr || _iceberg_rowid_params.enabled ||
+        (_row_lineage_columns != nullptr &&
+         (_row_lineage_columns->need_row_ids() ||
+          _row_lineage_columns->has_last_updated_sequence_number_column()))) {
+        register_synthesized_column_handler(
+                BeConsts::ROWID_COL, [this](Block* block, size_t rows) -> Status {
+                    return _fill_row_id_columns(block, _row_reader->getRowNumber());
+                });
+    }
+
+    // Standalone callers (column_descs == nullptr) skip on_before_init_reader,
+    // so _read_file_cols etc. are not populated. Use table_info_node for name mapping
+    // when available (e.g., ACID delete reader), otherwise fall back to 1:1 mapping.
+    if (!has_column_descs() && _read_file_cols.empty()) {
+        for (const auto& col_name : _table_column_names) {
+            if (_table_info_node_ptr && _table_info_node_ptr->children_column_exists(col_name)) {
+                _read_file_cols.emplace_back(
+                        _table_info_node_ptr->children_file_column_name(col_name));
+            } else {
+                _read_file_cols.emplace_back(col_name);
+            }
+            _read_table_cols.emplace_back(col_name);
+        }
+    }
+
+    // ---- Inlined set_fill_columns logic (partition/missing/synthesized classification) ----
+    SCOPED_RAW_TIMER(&_statistics.set_fill_column_time);
+
+    // 1. Collect predicate columns from conjuncts for lazy materialization
+    std::unordered_map<std::string, std::pair<uint32_t, int>> predicate_table_columns;
+    _collect_predicate_columns_from_conjuncts(predicate_table_columns);
+
+    // 2. Classify read/partition/missing/synthesized columns into lazy vs predicate groups
+    _classify_columns_for_lazy_read(predicate_table_columns, _fill_partition_values,
+                                    _fill_missing_defaults);
+
+    // 3. Init search argument for min-max filtering
+    if (_lazy_read_ctx.conjuncts.empty()) {
+        _lazy_read_ctx.can_lazy_read = false;
+    } else if (_enable_filter_by_min_max) {
+        auto res = _init_search_argument(_push_down_exprs);
+        if (_state->query_options().check_orc_init_sargs_success && !res) {
+            std::stringstream ss;
+            for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+                ss << conjunct->root()->debug_string() << "\n";
+            }
+            return Status::InternalError(
+                    "Session variable check_orc_init_sargs_success is set, but "
+                    "_init_search_argument returns false because all exprs can not be pushed "
+                    "down:\n " +
+                    ss.str());
+        }
+    }
+
+    // 4. Create ORC row reader (includes tiny stripe optimization and type map)
+    RETURN_IF_ERROR(_init_orc_row_reader());
+
+    // 5. Build filter conjuncts from not_single_slot and predicate_partition_columns
+    if (!_not_single_slot_filter_conjuncts.empty()) {
+        _filter_conjuncts.insert(_filter_conjuncts.end(), _not_single_slot_filter_conjuncts.begin(),
+                                 _not_single_slot_filter_conjuncts.end());
+        _disable_dict_filter = true;
+    }
+    if (_slot_id_to_filter_conjuncts && !_slot_id_to_filter_conjuncts->empty()) {
+        for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
+            auto& [value, slot_desc] = kv.second;
+            auto iter = _slot_id_to_filter_conjuncts->find(slot_desc->id());
+            if (iter != _slot_id_to_filter_conjuncts->end()) {
+                for (const auto& conjunct_ctx : iter->second) {
+                    _filter_conjuncts.push_back(conjunct_ctx);
+                }
+            }
+        }
+    }
+
+    return Status::OK();
+}
+
+Status OrcReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
+
+    // Build table_info_node from ORC file type with case-insensitive recursive matching.
+    // _reader is available here because init_reader calls _create_file_reader() before this hook.
+    // tuple_descriptor may be null in unit tests that only set column_descs.
+    if (ctx->tuple_descriptor != nullptr) {
+        RETURN_IF_ERROR(TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
+                ctx->tuple_descriptor, &_reader->getType(), ctx->table_info_node));
+    }
+
     return Status::OK();
 }
 
@@ -524,6 +670,8 @@ void OrcReader::set_iceberg_rowid_params(const std::string& file_path, int32_t p
     _iceberg_rowid_params.row_id_column_pos = row_id_column_pos;
 }
 
+// set_iceberg_rowid_params removed: now handled by ColumnProcessor
+
 Status OrcReader::_init_read_columns() {
     SCOPED_RAW_TIMER(&_statistics.init_column_time);
     const auto& root_type = _reader->getType();
@@ -568,17 +716,6 @@ Status OrcReader::_init_read_columns() {
         }
     }
 
-    for (size_t i = 0; i < _table_column_names->size(); ++i) {
-        const auto& table_column_name = (*_table_column_names)[i];
-        if (!_table_info_node_ptr->children_column_exists(table_column_name)) {
-            _missing_cols.emplace_back(table_column_name);
-            continue;
-        }
-        const auto file_column_name =
-                _table_info_node_ptr->children_file_column_name(table_column_name);
-        _read_file_cols.emplace_back(file_column_name);
-        _read_table_cols.emplace_back(table_column_name);
-    }
     return Status::OK();
 }
 
@@ -1142,15 +1279,8 @@ bool OrcReader::_init_search_argument(const VExprSPtrs& exprs) {
     return true;
 }
 
-Status OrcReader::set_fill_columns(
-        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                partition_columns,
-        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
-    SCOPED_RAW_TIMER(&_statistics.set_fill_column_time);
-
-    // std::unordered_map<column_name, std::pair<col_id, slot_id>>
-    std::unordered_map<std::string, std::pair<uint32_t, int>> predicate_table_columns;
-    // visit_slot for lazy mat.
+void OrcReader::_collect_predicate_columns_from_conjuncts(
+        std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_table_columns) {
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
         if (expr->is_slot_ref()) {
             VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
@@ -1171,13 +1301,10 @@ Status OrcReader::set_fill_columns(
         auto expr = conjunct->root();
 
         if (expr->is_rf_wrapper()) {
-            // REF: src/runtime_filter/runtime_filter_consumer.cpp
             auto* runtime_filter = static_cast<VRuntimeFilterWrapper*>(expr.get());
-
             auto filter_impl = runtime_filter->get_impl();
             visit_slot(filter_impl.get());
 
-            // only support push down for filter row group : MAX_FILTER, MAX_FILTER, MINMAX_FILTER, IN_FILTER
             if ((runtime_filter->node_type() == TExprNodeType::BINARY_PRED) &&
                 (runtime_filter->op() == TExprOpcode::GE ||
                  runtime_filter->op() == TExprOpcode::LE)) {
@@ -1185,7 +1312,6 @@ Status OrcReader::set_fill_columns(
             } else if (runtime_filter->node_type() == TExprNodeType::IN_PRED &&
                        runtime_filter->op() == TExprOpcode::FILTER_IN) {
                 auto* direct_in_predicate = static_cast<VDirectInPredicate*>(filter_impl.get());
-
                 int max_in_size =
                         _state->query_options().__isset.max_pushdown_conditions_per_column
                                 ? _state->query_options().max_pushdown_conditions_per_column
@@ -1194,7 +1320,6 @@ Status OrcReader::set_fill_columns(
                     direct_in_predicate->get_set_func()->size() > max_in_size) {
                     continue;
                 }
-
                 VExprSPtr new_in_slot = nullptr;
                 if (direct_in_predicate->get_slot_in_expr(new_in_slot)) {
                     expr = new_in_slot;
@@ -1205,13 +1330,10 @@ Status OrcReader::set_fill_columns(
                 continue;
             }
         } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(expr.get())) {
-            // top runtime filter : only le && ge.
             DCHECK(topn_pred->children().size() > 0);
             visit_slot(topn_pred->children()[0].get());
-
             VExprSPtr binary_expr;
             if (topn_pred->get_binary_expr(binary_expr)) {
-                // for min-max filter.
                 expr = binary_expr;
             } else {
                 continue;
@@ -1224,7 +1346,13 @@ Status OrcReader::set_fill_columns(
             _push_down_exprs.emplace_back(expr);
         }
     }
+}
 
+void OrcReader::_classify_columns_for_lazy_read(
+        const std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_table_columns,
+        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                partition_columns,
+        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
     if (_is_acid) {
         _lazy_read_ctx.predicate_orc_columns.insert(
                 _lazy_read_ctx.predicate_orc_columns.end(),
@@ -1258,7 +1386,6 @@ Status OrcReader::set_fill_columns(
             } else {
                 _lazy_read_ctx.predicate_columns.first.emplace_back(iter->first);
                 _lazy_read_ctx.predicate_columns.second.emplace_back(iter->second.second);
-
                 _lazy_read_ctx.predicate_orc_columns.emplace_back(
                         _table_info_node_ptr->children_file_column_name(iter->first));
                 if (check_iceberg_row_lineage_column_idx(read_table_col) != -1) {
@@ -1283,16 +1410,13 @@ Status OrcReader::set_fill_columns(
         if (iter == predicate_table_columns.end()) {
             _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
         } else {
-            //For check missing column :   missing column == xx, missing column is null,missing column is not null.
             if (_slot_id_to_filter_conjuncts->find(iter->second.second) !=
                 _slot_id_to_filter_conjuncts->end()) {
                 for (const auto& ctx :
                      _slot_id_to_filter_conjuncts->find(iter->second.second)->second) {
-                    _filter_conjuncts.emplace_back(ctx); //  todo ??????
+                    _filter_conjuncts.emplace_back(ctx);
                 }
             }
-
-            // predicate_missing_columns is VLiteral.To fill in default values for missing columns.
             _lazy_read_ctx.predicate_missing_columns.emplace(kv.first, kv.second);
             if (check_iceberg_row_lineage_column_idx(kv.first) != -1) {
                 _enable_lazy_mat = false;
@@ -1304,40 +1428,25 @@ Status OrcReader::set_fill_columns(
         !_lazy_read_ctx.lazy_read_columns.empty()) {
         _lazy_read_ctx.can_lazy_read = true;
     }
+}
 
-    if (_lazy_read_ctx.conjuncts.empty()) {
-        _lazy_read_ctx.can_lazy_read = false;
-    } else if (_enable_filter_by_min_max) {
-        auto res = _init_search_argument(_push_down_exprs);
-        if (_state->query_options().check_orc_init_sargs_success && !res) {
-            std::stringstream ss;
-            for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
-                ss << conjunct->root()->debug_string() << "\n";
-            }
-            std::string conjuncts_str = ss.str();
-            return Status::InternalError(
-                    "Session variable check_orc_init_sargs_success is set, but "
-                    "_init_search_argument returns false because all exprs can not be pushed "
-                    "down:\n " +
-                    conjuncts_str);
-        }
-    }
+Status OrcReader::_init_orc_row_reader() {
     try {
         _row_reader_options.range(_range_start_offset, _range_size);
-        _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz);
+        std::string tz = _ctz.empty() ? "UTC" : (_ctz == "CST" ? "Asia/Shanghai" : _ctz);
+        _row_reader_options.setTimezoneName(tz);
         if (!_column_ids.empty()) {
             std::list<uint64_t> column_ids_list(_column_ids.begin(), _column_ids.end());
             _row_reader_options.includeTypes(column_ids_list);
-        } else { // If column_ids is empty, include all top-level columns to be read.
+        } else {
             _row_reader_options.include(_read_file_cols);
         }
         _row_reader_options.setEnableLazyDecoding(true);
 
-        //orc reader should not use the tiny stripe optimization when reading by row id.
+        // Tiny stripe optimization (skip when reading by row id)
         if (!_read_by_rows) {
             uint64_t number_of_stripes = _reader->getNumberOfStripes();
             auto all_stripes_needed = _reader->getNeedReadStripes(_row_reader_options);
-
             int64_t range_end_offset = _range_start_offset + _range_size;
 
             bool all_tiny_stripes = true;
@@ -1356,7 +1465,6 @@ Status OrcReader::set_fill_columns(
                     all_tiny_stripes = false;
                     break;
                 }
-
                 tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset);
             }
             if (all_tiny_stripes && number_of_stripes > 0) {
@@ -1366,7 +1474,6 @@ Status OrcReader::set_fill_columns(
                                                                      _orc_once_max_read_bytes);
                 auto range_finder = std::make_shared<io::LinearProbeRangeFinder>(
                         std::move(prefetch_merge_ranges));
-
                 auto* orc_input_stream_ptr = static_cast<ORCFileInputStream*>(_reader->getStream());
                 orc_input_stream_ptr->set_all_tiny_stripes();
                 auto& orc_file_reader = orc_input_stream_ptr->get_file_reader();
@@ -1376,6 +1483,7 @@ Status OrcReader::set_fill_columns(
             }
         }
 
+        // Merge predicate partition/missing back if can't lazy read
         if (!_lazy_read_ctx.can_lazy_read) {
             for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
                 _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
@@ -1385,8 +1493,7 @@ Status OrcReader::set_fill_columns(
             }
         }
 
-        _fill_all_columns = true;
-        // create orc row reader
+        // Create ORC row reader
         if (_lazy_read_ctx.can_lazy_read) {
             _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns);
             _orc_filter = std::make_unique<ORCFilterImpl>(this);
@@ -1397,6 +1504,7 @@ Status OrcReader::set_fill_columns(
         _row_reader = _reader->createRowReader(_row_reader_options, _orc_filter.get(),
                                                _string_dict_filter.get());
 
+        // Build column name → index and type maps
         _batch = _row_reader->createRowBatch(_batch_size);
 
         // Derive the first row in this scan range from ORC RowReader's initial state.
@@ -1457,94 +1565,6 @@ Status OrcReader::set_fill_columns(
         }
     }
 
-    if (!_not_single_slot_filter_conjuncts.empty()) {
-        _filter_conjuncts.insert(_filter_conjuncts.end(), _not_single_slot_filter_conjuncts.begin(),
-                                 _not_single_slot_filter_conjuncts.end());
-        _disable_dict_filter = true;
-    }
-
-    if (_slot_id_to_filter_conjuncts && !_slot_id_to_filter_conjuncts->empty()) {
-        // Add predicate_partition_columns in _slot_id_to_filter_conjuncts(single slot conjuncts)
-        // to _filter_conjuncts, others should be added from not_single_slot_filter_conjuncts.
-        for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
-            auto& [value, slot_desc] = kv.second;
-            auto iter = _slot_id_to_filter_conjuncts->find(slot_desc->id());
-            if (iter != _slot_id_to_filter_conjuncts->end()) {
-                for (const auto& ctx : iter->second) {
-                    _filter_conjuncts.push_back(ctx);
-                }
-            }
-        }
-    }
-    return Status::OK();
-}
-
-Status OrcReader::_fill_partition_columns(
-        Block* block, uint64_t rows,
-        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                partition_columns) {
-    DataTypeSerDe::FormatOptions _text_formatOptions;
-    for (const auto& kv : partition_columns) {
-        auto col_ptr = block->get_by_position((*_col_name_to_block_idx)[kv.first])
-                               .column->assume_mutable();
-        const auto& [value, slot_desc] = kv.second;
-        auto text_serde = slot_desc->get_data_type_ptr()->get_serde();
-        Slice slice(value.data(), value.size());
-        uint64_t num_deserialized = 0;
-        if (text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, &num_deserialized,
-                                                           _text_formatOptions) != Status::OK()) {
-            return Status::InternalError("Failed to fill partition column: {}={}",
-                                         slot_desc->col_name(), value);
-        }
-        if (num_deserialized != rows) {
-            return Status::InternalError(
-                    "Failed to fill partition column: {}={} ."
-                    "Number of rows expected to be written : {}, number of rows actually "
-                    "written : "
-                    "{}",
-                    slot_desc->col_name(), value, num_deserialized, rows);
-        }
-    }
-    return Status::OK();
-}
-
-Status OrcReader::_fill_missing_columns(
-        Block* block, uint64_t rows,
-        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
-    for (const auto& kv : missing_columns) {
-        if (!_col_name_to_block_idx->contains(kv.first)) {
-            return Status::InternalError("Failed to find missing column: {}, block: {}", kv.first,
-                                         block->dump_structure());
-        }
-        if (kv.second == nullptr) {
-            // no default column, fill with null
-            auto mutable_column = block->get_by_position((*_col_name_to_block_idx)[kv.first])
-                                          .column->assume_mutable();
-            auto* nullable_column = static_cast<ColumnNullable*>(mutable_column.get());
-            nullable_column->insert_many_defaults(rows);
-        } else {
-            // fill with default value
-            const auto& ctx = kv.second;
-            // PT1 => dest primitive type
-            ColumnPtr result_column_ptr;
-            RETURN_IF_ERROR(ctx->execute(block, result_column_ptr));
-            if (result_column_ptr->use_count() == 1) {
-                // call resize because the first column of _src_block_ptr may not be filled by reader,
-                // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
-                // has only one row.
-                auto mutable_column = result_column_ptr->assume_mutable();
-                mutable_column->resize(rows);
-                // result_column_ptr maybe a ColumnConst, convert it to a normal column
-                result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
-                auto origin_column_type =
-                        block->get_by_position((*_col_name_to_block_idx)[kv.first]).type;
-                bool is_nullable = origin_column_type->is_nullable();
-                block->replace_by_position(
-                        (*_col_name_to_block_idx)[kv.first],
-                        is_nullable ? make_nullable(result_column_ptr) : result_column_ptr);
-            }
-        }
-    }
     return Status::OK();
 }
 
@@ -1639,6 +1659,8 @@ Status OrcReader::_append_iceberg_rowid_column(Block* block, size_t rows, int64_
     return Status::OK();
 }
 
+// _append_iceberg_rowid_column removed: now handled by ColumnProcessor.fill_synthesized_columns
+
 void OrcReader::_init_system_properties() {
     if (_scan_range.__isset.file_type) {
         // for compatibility
@@ -1819,16 +1841,12 @@ DataTypePtr OrcReader::convert_to_doris_type(const orc::Type* orc_type) {
     }
 }
 
-Status OrcReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                              std::unordered_set<std::string>* missing_cols) {
+Status OrcReader::_get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     const auto& root_type = _reader->getType();
     for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
         name_to_type->emplace(root_type.getFieldName(i),
                               convert_to_doris_type(root_type.getSubtype(i)));
     }
-    for (auto& col : _missing_cols) {
-        missing_cols->insert(col);
-    }
     return Status::OK();
 }
 
@@ -2381,7 +2399,7 @@ std::string OrcReader::get_field_name_lower_case(const orc::Type* orc_type, int
     return name;
 }
 
-Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status OrcReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     RETURN_IF_ERROR(_get_next_block_impl(block, read_rows, eof));
     if (*eof) {
         COUNTER_UPDATE(_orc_profile.selected_row_group_count,
@@ -2435,21 +2453,6 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
         *read_rows = 0;
         return Status::OK();
     }
-    if (_push_down_agg_type == TPushAggOp::type::COUNT) {
-        auto rows = std::min(get_remaining_rows(), (int64_t)_batch_size);
-
-        set_remaining_rows(get_remaining_rows() - rows);
-        auto mutate_columns = block->mutate_columns();
-        for (auto& col : mutate_columns) {
-            col->resize(rows);
-        }
-        block->set_columns(std::move(mutate_columns));
-        *read_rows = rows;
-        if (get_remaining_rows() == 0) {
-            *eof = true;
-        }
-        return Status::OK();
-    }
 
     if (!_seek_to_read_one_line()) {
         *eof = true;
@@ -2482,6 +2485,7 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
                 }
                 // After nextBatch(), getRowNumber() returns the start of the batch just read.
                 _last_read_row_number = _row_reader->getRowNumber();
+
                 // Use _batch->numElements (not rr) because ORC's nextBatch has an
                 // internal do-while loop: when the filter callback rejects an entire
                 // batch, the loop retries with the next batch.  The return value (rr)
@@ -2490,6 +2494,7 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
                 // to that iteration's batch size (Reader.cc:1427), giving the correct
                 // next-read position.
                 _current_read_position = _last_read_row_number + _batch->numElements;
+
             } catch (std::exception& e) {
                 std::string _err_msg = e.what();
                 if (_io_ctx && _io_ctx->should_stop && _err_msg == "stop") {
@@ -2545,13 +2550,30 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
 #endif
         }
 
-        RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements,
-                                                _lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(
-                _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns));
+        {
+            std::vector<std::string> part_cols;
+            for (const auto& kv : _lazy_read_ctx.partition_columns) {
+                part_cols.push_back(kv.first);
+            }
+            RETURN_IF_ERROR(on_fill_partition_columns(block, _batch->numElements, part_cols));
+        }
+        {
+            std::vector<std::string> miss_cols;
+            for (const auto& kv : _lazy_read_ctx.missing_columns) {
+                miss_cols.push_back(kv.first);
+            }
+            RETURN_IF_ERROR(on_fill_missing_columns(block, _batch->numElements, miss_cols));
+        }
 
-        RETURN_IF_ERROR(_fill_row_id_columns(block, start_row));
-        RETURN_IF_ERROR(_append_iceberg_rowid_column(block, block->rows(), start_row));
+        // Build sequential row positions for RowPositionProvider
+        _current_batch_row_positions.resize(block->rows());
+        for (size_t i = 0; i < block->rows(); ++i) {
+            _current_batch_row_positions[i] =
+                    static_cast<rowid_t>(start_row + static_cast<int64_t>(i));
+        }
+        if (has_synthesized_column_handlers()) {
+            RETURN_IF_ERROR(fill_synthesized_columns(block, block->rows()));
+        }
 
         if (block->rows() == 0) {
             RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, nullptr));
@@ -2616,6 +2638,7 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
                 }
                 // After nextBatch(), getRowNumber() returns the start of the batch just read.
                 _last_read_row_number = _row_reader->getRowNumber();
+
                 _current_read_position = _last_read_row_number + _batch->numElements;
             } catch (std::exception& e) {
                 std::string _err_msg = e.what();
@@ -2678,13 +2701,30 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
 #endif
         }
 
-        RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements,
-                                                _lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(
-                _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns));
+        {
+            std::vector<std::string> part_cols;
+            for (const auto& kv : _lazy_read_ctx.partition_columns) {
+                part_cols.push_back(kv.first);
+            }
+            RETURN_IF_ERROR(on_fill_partition_columns(block, _batch->numElements, part_cols));
+        }
+        {
+            std::vector<std::string> miss_cols;
+            for (const auto& kv : _lazy_read_ctx.missing_columns) {
+                miss_cols.push_back(kv.first);
+            }
+            RETURN_IF_ERROR(on_fill_missing_columns(block, _batch->numElements, miss_cols));
+        }
 
-        RETURN_IF_ERROR(_fill_row_id_columns(block, start_row));
-        RETURN_IF_ERROR(_append_iceberg_rowid_column(block, block->rows(), start_row));
+        // Build sequential row positions for RowPositionProvider
+        _current_batch_row_positions.resize(block->rows());
+        for (size_t i = 0; i < block->rows(); ++i) {
+            _current_batch_row_positions[i] =
+                    static_cast<rowid_t>(start_row + static_cast<int64_t>(i));
+        }
+        if (has_synthesized_column_handlers()) {
+            RETURN_IF_ERROR(fill_synthesized_columns(block, block->rows()));
+        }
 
         if (block->rows() == 0) {
             RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, nullptr));
@@ -2836,10 +2876,9 @@ void OrcReader::_build_delete_row_filter(const Block* block, size_t rows) {
             auto bucket_id = bucket_id_column.get_int(i);
             auto row_id = row_id_column.get_int(i);
 
-            TransactionalHiveReader::AcidRowID transactional_row_id = {
-                    .original_transaction = original_transaction,
-                    .bucket = bucket_id,
-                    .row_id = row_id};
+            AcidRowID transactional_row_id = {.original_transaction = original_transaction,
+                                              .bucket = bucket_id,
+                                              .row_id = row_id};
             if (_delete_rows->contains(transactional_row_id)) {
                 _pos_delete_filter_data[i] = 0;
             }
@@ -2903,9 +2942,20 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
         column_ptr->sanity_check();
 #endif
     }
-    RETURN_IF_ERROR(
-            _fill_partition_columns(block, size, _lazy_read_ctx.predicate_partition_columns));
-    RETURN_IF_ERROR(_fill_missing_columns(block, size, _lazy_read_ctx.predicate_missing_columns));
+    {
+        std::vector<std::string> pred_part_cols;
+        for (const auto& kv : _lazy_read_ctx.predicate_partition_columns) {
+            pred_part_cols.push_back(kv.first);
+        }
+        RETURN_IF_ERROR(on_fill_partition_columns(block, size, pred_part_cols));
+    }
+    {
+        std::vector<std::string> pred_miss_cols;
+        for (const auto& kv : _lazy_read_ctx.predicate_missing_columns) {
+            pred_miss_cols.push_back(kv.first);
+        }
+        RETURN_IF_ERROR(on_fill_missing_columns(block, size, pred_miss_cols));
+    }
     if (_lazy_read_ctx.resize_first_column) {
         // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0
         // The following process may be tricky and time-consuming, but we have no other way.
diff --git a/be/src/format/orc/vorc_reader.h b/be/src/format/orc/vorc_reader.h
index 4d7a73d19f6f6e..a4ae7f1e9c6fde 100644
--- a/be/src/format/orc/vorc_reader.h
+++ b/be/src/format/orc/vorc_reader.h
@@ -41,9 +41,9 @@
 #include "exprs/vslot_ref.h"
 #include "format/column_type_convert.h"
 #include "format/format_common.h"
-#include "format/generic_reader.h"
 #include "format/table/table_format_reader.h"
-#include "format/table/transactional_hive_reader.h"
+#include "format/table/table_schema_change_helper.h"
+#include "format/table/transactional_hive_common.h"
 #include "io/file_factory.h"
 #include "io/fs/buffered_reader.h"
 #include "io/fs/file_reader.h"
@@ -54,7 +54,6 @@
 #include "orc/Vector.hh"
 #include "orc/sargs/Literal.hh"
 #include "runtime/runtime_profile.h"
-#include "storage/olap_common.h"
 
 namespace doris {
 class RuntimeState;
@@ -85,6 +84,18 @@ namespace doris {
 #include "common/compile_check_begin.h"
 class ORCFileInputStream;
 
+/// ORC-specific initialization context.
+/// Extends ReaderInitContext with conjuncts and filter fields.
+/// Note: ORC does NOT use slot_id_to_predicates (unlike Parquet).
+struct OrcInitContext final : public ReaderInitContext {
+    // Safe default for standalone readers (delete file readers) without conjuncts.
+    static inline const VExprContextSPtrs EMPTY_CONJUNCTS {};
+
+    const VExprContextSPtrs* conjuncts = &EMPTY_CONJUNCTS;
+    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
+    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
+};
+
 struct LazyReadContext {
     VExprContextSPtrs conjuncts;
     bool can_lazy_read = false;
@@ -117,7 +128,7 @@ struct LazyReadContext {
     size_t filter_phase_rows = 0;
 };
 
-class OrcReader : public GenericReader {
+class OrcReader : public TableFormatReader, public RowPositionProvider {
     ENABLE_FACTORY_CREATOR(OrcReader);
 
 public:
@@ -161,30 +172,21 @@ class OrcReader : public GenericReader {
               FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);
 
     ~OrcReader() override = default;
-    //If you want to read the file by index instead of column name, set hive_use_column_names to false.
-    Status init_reader(
-            const std::vector<std::string>* column_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, bool is_acid,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts,
-            std::shared_ptr<TableSchemaChangeHelper::Node> table_info_node_ptr =
-                    TableSchemaChangeHelper::ConstNode::get_instance(),
-            const std::set<uint64_t>& column_ids = {},
-            const std::set<uint64_t>& filter_column_ids = {});
-
-    Status set_fill_columns(
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) override;
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    // Override to build table_info_node from ORC file type using by_orc_name.
+    // Subclasses (HiveOrcReader, IcebergOrcReader) call GenericReader::on_before_init_reader
+    // directly, so this OrcReader-level override only applies to plain OrcReader (TVF, load).
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
+protected:
+    // ---- Unified init_reader(ReaderInitContext*) overrides ----
+    Status _open_file_reader(ReaderInitContext* ctx) override;
+    Status _do_init_reader(ReaderInitContext* ctx) override;
+
+public:
     int64_t size() const;
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status init_schema_reader() override;
 
@@ -195,9 +197,7 @@ class OrcReader : public GenericReader {
         _position_delete_ordered_rowids = delete_rows;
     }
 
-    void set_delete_rows(const TransactionalHiveReader::AcidRowIDSet* delete_rows) {
-        _delete_rows = delete_rows;
-    }
+    void set_delete_rows(const AcidRowIDSet* delete_rows) { _delete_rows = delete_rows; }
 
     Status filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t size, void* arg);
 
@@ -240,6 +240,8 @@ class OrcReader : public GenericReader {
         _condition_cache_ctx = std::move(ctx);
     }
 
+    bool supports_count_pushdown() const override { return true; }
+
     int64_t get_total_rows() const override {
         return _row_reader ? _row_reader->getNumberOfRows() : 0;
     }
@@ -250,10 +252,36 @@ class OrcReader : public GenericReader {
                (_delete_rows != nullptr && !_delete_rows->empty());
     }
 
+    // RowPositionProvider implementation
+    const std::vector<rowid_t>& current_batch_row_positions() const override {
+        return _current_batch_row_positions;
+    }
+
 protected:
     void _collect_profile_before_close() override;
     void _filter_rows_by_condition_cache(size_t* read_rows, bool* eof);
 
+    // Core block reading implementation
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+
+    // ORC fills partition/missing columns per-batch internally,
+    // so suppress TableFormatReader's default on_after_read_block fill.
+    Status on_after_read_block(Block* /*block*/, size_t* /*read_rows*/) override {
+        return Status::OK();
+    }
+
+    // Protected accessors so CRTP mixin subclasses can reach private members
+    io::IOContext* get_io_ctx() const { return _io_ctx; }
+    std::unordered_map<std::string, uint32_t>*& col_name_to_block_idx_ref() {
+        return _col_name_to_block_idx;
+    }
+    RuntimeProfile* get_profile() const { return _profile; }
+    RuntimeState* get_state() const { return _state; }
+    const TFileScanRangeParams& get_scan_params() const { return _scan_params; }
+    const TFileRangeDesc& get_scan_range() const { return _scan_range; }
+    const TupleDescriptor* get_tuple_descriptor() const { return _tuple_descriptor; }
+    const RowDescriptor* get_row_descriptor() const { return _row_descriptor; }
+
 private:
     struct IcebergRowIdParams {
         bool enabled = false;
@@ -330,6 +358,20 @@ class OrcReader : public GenericReader {
 
     static bool _check_acid_schema(const orc::Type& type);
 
+    // ---- set_fill_columns sub-functions ----
+    // Collect predicate columns from conjuncts for lazy materialization.
+    void _collect_predicate_columns_from_conjuncts(
+            std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_table_columns);
+    // Classify read/partition/missing columns into lazy vs predicate groups.
+    void _classify_columns_for_lazy_read(
+            const std::unordered_map<std::string, std::pair<uint32_t, int>>&
+                    predicate_table_columns,
+            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                    partition_columns,
+            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);
+    // Create ORC row reader with proper options, tiny stripe optimization, and type map.
+    Status _init_orc_row_reader();
+
     // functions for building search argument until _init_search_argument
     // Get predicate type from slot reference
     std::pair<bool, orc::PredicateDataType> _get_orc_predicate_type(const VSlotRef* slot_ref);
@@ -360,13 +402,6 @@ class OrcReader : public GenericReader {
 
     void _build_delete_row_filter(const Block* block, size_t rows);
     Status _get_next_block_impl(Block* block, size_t* read_rows, bool* eof);
-    Status _fill_partition_columns(
-            Block* block, uint64_t rows,
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns);
-    Status _fill_missing_columns(
-            Block* block, uint64_t rows,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);
     void _init_system_properties();
     void _init_file_description();
 
@@ -679,6 +714,13 @@ class OrcReader : public GenericReader {
 
     Status _set_read_one_line_impl() override {
         _batch_size = 1;
+        // If the ORC row reader already exists, the batch was created earlier
+        // (during _do_init_reader) with the original _batch_size (capped to
+        // _MIN_BATCH_SIZE = 4064).  We must recreate it with the new size of 1
+        // so that nextBatch() returns at most 1 row per call.
+        if (_row_reader) {
+            _batch = _row_reader->createRowBatch(_batch_size);
+        }
         return Status::OK();
     }
 
@@ -693,13 +735,18 @@ class OrcReader : public GenericReader {
     io::FileDescription _file_description;
     size_t _batch_size;
     int64_t _range_start_offset;
+
+protected:
+    size_t get_batch_size() const { return _batch_size; }
+
+private:
     int64_t _range_size;
     std::string _ctz;
 
     cctz::time_zone _time_zone;
 
     // The columns of the table to be read (contain columns that do not exist)
-    const std::vector<std::string>* _table_column_names;
+    std::vector<std::string> _table_column_names;
 
     // The columns of the file to be read  (file column name)
     std::list<std::string> _read_file_cols;
@@ -707,9 +754,6 @@ class OrcReader : public GenericReader {
     // The columns of the table to be read (table column name)
     std::list<std::string> _read_table_cols;
 
-    // _read_table_cols + _missing_cols = _table_column_names
-    std::list<std::string> _missing_cols;
-
     // file column name to std::vector<orc::ColumnVectorBatch*> idx.
     std::unordered_map<std::string, int> _colname_to_idx;
 
@@ -745,20 +789,25 @@ class OrcReader : public GenericReader {
 
     io::IOContext* _io_ctx = nullptr;
     std::shared_ptr<io::IOContext> _io_ctx_holder;
+    const TupleDescriptor* _tuple_descriptor = nullptr;
+    const RowDescriptor* _row_descriptor = nullptr;
     bool _enable_lazy_mat = true;
     bool _enable_filter_by_min_max = true;
 
     std::vector<DecimalScaleParams> _decimal_scale_params;
     size_t _decimal_scale_params_index;
 
+protected:
     bool _is_acid = false;
-    std::unique_ptr<IColumn::Filter> _filter;
+    // Protected so Iceberg subclasses can register synthesized columns
+    // in on_before_init_reader.
     LazyReadContext _lazy_read_ctx;
-    const TransactionalHiveReader::AcidRowIDSet* _delete_rows = nullptr;
+
+private:
+    std::unique_ptr<IColumn::Filter> _filter;
+    const AcidRowIDSet* _delete_rows = nullptr;
     std::unique_ptr<IColumn::Filter> _delete_rows_filter_ptr;
 
-    const TupleDescriptor* _tuple_descriptor = nullptr;
-    const RowDescriptor* _row_descriptor = nullptr;
     VExprContextSPtrs _not_single_slot_filter_conjuncts;
     const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr;
     VExprContextSPtrs _dict_filter_conjuncts;
@@ -790,6 +839,8 @@ class OrcReader : public GenericReader {
     IcebergRowIdParams _iceberg_rowid_params;
     std::shared_ptr<RowLineageColumns> _row_lineage_columns;
 
+    std::vector<rowid_t> _current_batch_row_positions;
+
     // Through this node, you can find the file column based on the table column.
     std::shared_ptr<TableSchemaChangeHelper::Node> _table_info_node_ptr =
             TableSchemaChangeHelper::ConstNode::get_instance();
diff --git a/be/src/format/parquet/schema_desc.cpp b/be/src/format/parquet/schema_desc.cpp
index f12b7bc4102fb7..6501e0b7a91a10 100644
--- a/be/src/format/parquet/schema_desc.cpp
+++ b/be/src/format/parquet/schema_desc.cpp
@@ -30,7 +30,8 @@
 #include "core/data_type/data_type_map.h"
 #include "core/data_type/data_type_struct.h"
 #include "core/data_type/define_primitive_type.h"
-#include "format/table/table_format_reader.h"
+#include "format/generic_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "util/slice.h"
 #include "util/string_util.h"
 
diff --git a/be/src/format/parquet/vparquet_column_reader.h b/be/src/format/parquet/vparquet_column_reader.h
index 02a94731bab380..3fa25667f18516 100644
--- a/be/src/format/parquet/vparquet_column_reader.h
+++ b/be/src/format/parquet/vparquet_column_reader.h
@@ -29,12 +29,15 @@
 
 #include "common/status.h"
 #include "core/data_type/data_type.h"
+#include "format/generic_reader.h"
 #include "format/parquet/parquet_column_convert.h"
 #include "format/parquet/parquet_common.h"
 #include "format/parquet/vparquet_column_chunk_reader.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "io/fs/buffered_reader.h"
 #include "io/fs/file_reader_writer_fwd.h"
+#include "parquet_column_convert.h"
+#include "vparquet_column_chunk_reader.h"
 
 namespace cctz {
 class time_zone;
diff --git a/be/src/format/parquet/vparquet_group_reader.cpp b/be/src/format/parquet/vparquet_group_reader.cpp
index 2cb13bfd6003f8..0efeea70c10eb8 100644
--- a/be/src/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/format/parquet/vparquet_group_reader.cpp
@@ -29,7 +29,6 @@
 #include <ostream>
 
 #include "common/config.h"
-#include "common/consts.h"
 #include "common/logging.h"
 #include "common/object_pool.h"
 #include "common/status.h"
@@ -39,13 +38,10 @@
 #include "core/column/column_const.h"
 #include "core/column/column_nullable.h"
 #include "core/column/column_string.h"
-#include "core/column/column_struct.h"
 #include "core/column/column_vector.h"
 #include "core/custom_allocator.h"
 #include "core/data_type/data_type.h"
-#include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
-#include "core/data_type/data_type_struct.h"
 #include "core/data_type/define_primitive_type.h"
 #include "core/pod_array.h"
 #include "core/types.h"
@@ -79,63 +75,6 @@ struct IOContext;
 namespace doris {
 #include "common/compile_check_begin.h"
 
-namespace {
-Status build_iceberg_rowid_column(const DataTypePtr& type, const std::string& file_path,
-                                  const std::vector<rowid_t>& row_ids, int32_t partition_spec_id,
-                                  const std::string& partition_data_json,
-                                  MutableColumnPtr* column_out) {
-    if (type == nullptr || column_out == nullptr) {
-        return Status::InvalidArgument("Invalid iceberg rowid column type or output column");
-    }
-
-    MutableColumnPtr column = type->create_column();
-    ColumnNullable* nullable_col = check_and_get_column<ColumnNullable>(column.get());
-    ColumnStruct* struct_col = nullptr;
-    if (nullable_col != nullptr) {
-        struct_col =
-                check_and_get_column<ColumnStruct>(nullable_col->get_nested_column_ptr().get());
-    } else {
-        struct_col = check_and_get_column<ColumnStruct>(column.get());
-    }
-
-    if (struct_col == nullptr || struct_col->tuple_size() < 4) {
-        return Status::InternalError("Invalid iceberg rowid column structure");
-    }
-
-    size_t num_rows = row_ids.size();
-    auto& file_path_col = struct_col->get_column(0);
-    auto& row_pos_col = struct_col->get_column(1);
-    auto& spec_id_col = struct_col->get_column(2);
-    auto& partition_data_col = struct_col->get_column(3);
-
-    file_path_col.reserve(num_rows);
-    row_pos_col.reserve(num_rows);
-    spec_id_col.reserve(num_rows);
-    partition_data_col.reserve(num_rows);
-
-    for (size_t i = 0; i < num_rows; ++i) {
-        file_path_col.insert_data(file_path.data(), file_path.size());
-    }
-    for (size_t i = 0; i < num_rows; ++i) {
-        int64_t row_pos = static_cast<int64_t>(row_ids[i]);
-        row_pos_col.insert_data(reinterpret_cast<const char*>(&row_pos), sizeof(row_pos));
-    }
-    for (size_t i = 0; i < num_rows; ++i) {
-        int32_t spec_id = partition_spec_id;
-        spec_id_col.insert_data(reinterpret_cast<const char*>(&spec_id), sizeof(spec_id));
-    }
-    for (size_t i = 0; i < num_rows; ++i) {
-        partition_data_col.insert_data(partition_data_json.data(), partition_data_json.size());
-    }
-
-    if (nullable_col != nullptr) {
-        nullable_col->get_null_map_data().resize_fill(num_rows, 0);
-    }
-
-    *column_out = std::move(column);
-    return Status::OK();
-}
-} // namespace
 const std::vector<int64_t> RowGroupReader::NO_DELETE = {};
 static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits<uint32_t>::max();
 
@@ -386,12 +325,15 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_
         bool modify_row_ids = false;
         RETURN_IF_ERROR(_read_empty_batch(batch_size, read_rows, batch_eof, &modify_row_ids));
 
-        RETURN_IF_ERROR(
-                _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns));
-
-        RETURN_IF_ERROR(_fill_row_id_columns(block, *read_rows, modify_row_ids));
-        RETURN_IF_ERROR(_append_iceberg_rowid_column(block, *read_rows, modify_row_ids));
+        DCHECK(_table_format_reader);
+        RETURN_IF_ERROR(_table_format_reader->on_fill_partition_columns(
+                block, *read_rows, _lazy_read_ctx.partition_col_names));
+        RETURN_IF_ERROR(_table_format_reader->on_fill_missing_columns(
+                block, *read_rows, _lazy_read_ctx.missing_col_names));
+        if (_table_format_reader->has_synthesized_column_handlers()) {
+            RETURN_IF_ERROR(_get_current_batch_row_id(*read_rows));
+        }
+        RETURN_IF_ERROR(_table_format_reader->fill_synthesized_columns(block, *read_rows));
 
         Status st = VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, block->columns());
         *read_rows = block->rows();
@@ -405,11 +347,16 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_
         int64_t batch_base_row = _total_read_rows;
         RETURN_IF_ERROR((_read_column_data(block, _lazy_read_ctx.all_read_columns, batch_size,
                                            read_rows, batch_eof, filter_map)));
-        RETURN_IF_ERROR(
-                _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns));
-        RETURN_IF_ERROR(_fill_row_id_columns(block, *read_rows, false));
-        RETURN_IF_ERROR(_append_iceberg_rowid_column(block, *read_rows, false));
+        DCHECK(_table_format_reader);
+        RETURN_IF_ERROR(_table_format_reader->on_fill_partition_columns(
+                block, *read_rows, _lazy_read_ctx.partition_col_names));
+        RETURN_IF_ERROR(_table_format_reader->on_fill_missing_columns(
+                block, *read_rows, _lazy_read_ctx.missing_col_names));
+
+        if (_table_format_reader->has_synthesized_column_handlers()) {
+            RETURN_IF_ERROR(_get_current_batch_row_id(*read_rows));
+        }
+        RETURN_IF_ERROR(_table_format_reader->fill_synthesized_columns(block, *read_rows));
 
 #ifndef NDEBUG
         for (auto col : *block) {
@@ -683,12 +630,15 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
         }
         pre_raw_read_rows += pre_read_rows;
 
-        RETURN_IF_ERROR(_fill_partition_columns(block, pre_read_rows,
-                                                _lazy_read_ctx.predicate_partition_columns));
-        RETURN_IF_ERROR(_fill_missing_columns(block, pre_read_rows,
-                                              _lazy_read_ctx.predicate_missing_columns));
-        RETURN_IF_ERROR(_fill_row_id_columns(block, pre_read_rows, false));
-        RETURN_IF_ERROR(_append_iceberg_rowid_column(block, pre_read_rows, false));
+        DCHECK(_table_format_reader);
+        RETURN_IF_ERROR(_table_format_reader->on_fill_partition_columns(
+                block, pre_read_rows, _lazy_read_ctx.predicate_partition_col_names));
+        RETURN_IF_ERROR(_table_format_reader->on_fill_missing_columns(
+                block, pre_read_rows, _lazy_read_ctx.predicate_missing_col_names));
+        if (_table_format_reader->has_synthesized_column_handlers()) {
+            RETURN_IF_ERROR(_get_current_batch_row_id(pre_read_rows));
+        }
+        RETURN_IF_ERROR(_table_format_reader->fill_synthesized_columns(block, pre_read_rows));
 
         RETURN_IF_ERROR(_build_pos_delete_filter(pre_read_rows));
 
@@ -706,7 +656,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
 
         bool can_filter_all = false;
         bool resize_first_column = _lazy_read_ctx.resize_first_column;
-        if (resize_first_column && _iceberg_rowid_params.enabled) {
+        if (resize_first_column && _table_format_reader->has_synthesized_column_handlers()) {
             int row_id_idx = block->get_position_by_name(doris::BeConsts::ICEBERG_ROWID_COL);
             if (row_id_idx == 0) {
                 resize_first_column = false;
@@ -716,7 +666,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
             SCOPED_RAW_TIMER(&_predicate_filter_time);
 
             // generate filter vector
-            if (resize_first_column) {
+            if (_lazy_read_ctx.resize_first_column) {
                 // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0
                 // The following process may be tricky and time-consuming, but we have no other way.
                 block->get_by_position(0).column->assume_mutable()->resize(pre_read_rows);
@@ -742,7 +692,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
                 _mark_condition_cache_granules(result_filter.data(), pre_read_rows, batch_base_row);
             }
 
-            if (resize_first_column) {
+            if (_lazy_read_ctx.resize_first_column) {
                 // We have to clean the first column to insert right data.
                 block->get_by_position(0).column->assume_mutable()->clear();
             }
@@ -775,7 +725,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
                             .column->assume_mutable()
                             ->clear();
                 }
-                if (_iceberg_rowid_params.enabled) {
+                if (_table_format_reader->has_synthesized_column_handlers()) {
                     int row_id_idx =
                             block->get_position_by_name(doris::BeConsts::ICEBERG_ROWID_COL);
                     if (row_id_idx >= 0) {
@@ -843,7 +793,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
         SCOPED_RAW_TIMER(&_predicate_filter_time);
         if (filter_map.has_filter()) {
             std::vector<uint32_t> predicate_columns = _lazy_read_ctx.all_predicate_col_ids;
-            if (_iceberg_rowid_params.enabled) {
+            if (_table_format_reader->has_synthesized_column_handlers()) {
                 int row_id_idx = block->get_position_by_name(doris::BeConsts::ICEBERG_ROWID_COL);
                 if (row_id_idx >= 0 &&
                     std::find(predicate_columns.begin(), predicate_columns.end(),
@@ -877,8 +827,11 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
     *read_rows = column_size;
 
     *batch_eof = pre_eof;
-    RETURN_IF_ERROR(_fill_partition_columns(block, column_size, _lazy_read_ctx.partition_columns));
-    RETURN_IF_ERROR(_fill_missing_columns(block, column_size, _lazy_read_ctx.missing_columns));
+    DCHECK(_table_format_reader);
+    RETURN_IF_ERROR(_table_format_reader->on_fill_partition_columns(
+            block, column_size, _lazy_read_ctx.partition_col_names));
+    RETURN_IF_ERROR(_table_format_reader->on_fill_missing_columns(
+            block, column_size, _lazy_read_ctx.missing_col_names));
 #ifndef NDEBUG
     for (auto col : *block) {
         col.column->sanity_check();
@@ -920,77 +873,6 @@ Status RowGroupReader::_rebuild_filter_map(FilterMap& filter_map,
     return Status::OK();
 }
 
-Status RowGroupReader::_fill_partition_columns(
-        Block* block, size_t rows,
-        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                partition_columns) {
-    DataTypeSerDe::FormatOptions _text_formatOptions;
-    for (const auto& kv : partition_columns) {
-        auto doris_column = block->get_by_position((*_col_name_to_block_idx)[kv.first]).column;
-        // obtained from block*, it is a mutable object.
-        auto* col_ptr = const_cast<IColumn*>(doris_column.get());
-        const auto& [value, slot_desc] = kv.second;
-        auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
-        Slice slice(value.data(), value.size());
-        uint64_t num_deserialized = 0;
-        // Be careful when reading empty rows from parquet row groups.
-        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows,
-                                                            &num_deserialized,
-                                                            _text_formatOptions) != Status::OK()) {
-            return Status::InternalError("Failed to fill partition column: {}={}",
-                                         slot_desc->col_name(), value);
-        }
-        if (num_deserialized != rows) {
-            return Status::InternalError(
-                    "Failed to fill partition column: {}={} ."
-                    "Number of rows expected to be written : {}, number of rows actually written : "
-                    "{}",
-                    slot_desc->col_name(), value, num_deserialized, rows);
-        }
-    }
-    return Status::OK();
-}
-
-Status RowGroupReader::_fill_missing_columns(
-        Block* block, size_t rows,
-        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
-    for (const auto& kv : missing_columns) {
-        if (!_col_name_to_block_idx->contains(kv.first)) {
-            return Status::InternalError("Missing column: {} not found in block {}", kv.first,
-                                         block->dump_structure());
-        }
-        if (kv.second == nullptr) {
-            // no default column, fill with null
-            auto mutable_column = block->get_by_position((*_col_name_to_block_idx)[kv.first])
-                                          .column->assume_mutable();
-            auto* nullable_column = assert_cast<ColumnNullable*>(mutable_column.get());
-            nullable_column->insert_many_defaults(rows);
-        } else {
-            // fill with default value
-            const auto& ctx = kv.second;
-            ColumnPtr result_column_ptr;
-            // PT1 => dest primitive type
-            RETURN_IF_ERROR(ctx->execute(block, result_column_ptr));
-            if (result_column_ptr->use_count() == 1) {
-                // call resize because the first column of _src_block_ptr may not be filled by reader,
-                // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
-                // has only one row.
-                auto mutable_column = result_column_ptr->assume_mutable();
-                mutable_column->resize(rows);
-                // result_column_ptr maybe a ColumnConst, convert it to a normal column
-                result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
-                auto origin_column_type =
-                        block->get_by_position((*_col_name_to_block_idx)[kv.first]).type;
-                bool is_nullable = origin_column_type->is_nullable();
-                block->replace_by_position(
-                        (*_col_name_to_block_idx)[kv.first],
-                        is_nullable ? make_nullable(result_column_ptr) : result_column_ptr);
-            }
-        }
-    }
-    return Status::OK();
-}
-
 Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof,
                                          bool* modify_row_ids) {
     *modify_row_ids = false;
@@ -1017,8 +899,8 @@ Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, b
         _position_delete_ctx.current_row_id = end_row_id;
         *batch_eof = _position_delete_ctx.current_row_id == _position_delete_ctx.last_row_id;
 
-        if (_row_id_column_iterator_pair.first != nullptr || _iceberg_rowid_params.enabled ||
-            (_row_lineage_columns != nullptr && _row_lineage_columns->need_row_ids())) {
+        if (_row_id_column_iterator_pair.first != nullptr ||
+            _table_format_reader->has_synthesized_column_handlers()) {
             *modify_row_ids = true;
             _current_batch_row_ids.clear();
             _current_batch_row_ids.resize(*read_rows);
@@ -1042,7 +924,7 @@ Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, b
             _remaining_rows = 0;
             *batch_eof = true;
         }
-        if (_iceberg_rowid_params.enabled) {
+        if (_table_format_reader->has_synthesized_column_handlers()) {
             *modify_row_ids = true;
             RETURN_IF_ERROR(_get_current_batch_row_id(*read_rows));
         }
@@ -1079,15 +961,9 @@ Status RowGroupReader::_get_current_batch_row_id(size_t read_rows) {
     return Status::OK();
 }
 
-Status RowGroupReader::_fill_row_id_columns(Block* block, size_t read_rows,
-                                            bool is_current_row_ids) {
-    const bool need_row_ids =
-            _row_id_column_iterator_pair.first != nullptr ||
-            (_row_lineage_columns != nullptr && _row_lineage_columns->need_row_ids());
-    if (need_row_ids && !is_current_row_ids) {
-        RETURN_IF_ERROR(_get_current_batch_row_id(read_rows));
-    }
+Status RowGroupReader::fill_topn_row_id(Block* block, size_t read_rows) {
     if (_row_id_column_iterator_pair.first != nullptr) {
+        // _get_current_batch_row_id must be called before fill_synthesized_columns
         auto col = block->get_by_position(_row_id_column_iterator_pair.second)
                            .column->assume_mutable();
         RETURN_IF_ERROR(_row_id_column_iterator_pair.first->read_by_rowids(
@@ -1132,56 +1008,6 @@ Status RowGroupReader::_fill_row_id_columns(Block* block, size_t read_rows,
     return Status::OK();
 }
 
-Status RowGroupReader::_append_iceberg_rowid_column(Block* block, size_t read_rows,
-                                                    bool is_current_row_ids) {
-    if (!_iceberg_rowid_params.enabled) {
-        return Status::OK();
-    }
-    if (!is_current_row_ids) {
-        RETURN_IF_ERROR(_get_current_batch_row_id(read_rows));
-    }
-
-    int row_id_idx = block->get_position_by_name(doris::BeConsts::ICEBERG_ROWID_COL);
-    if (row_id_idx >= 0) {
-        auto& col_with_type = block->get_by_position(static_cast<size_t>(row_id_idx));
-        MutableColumnPtr row_id_column;
-        RETURN_IF_ERROR(build_iceberg_rowid_column(
-                col_with_type.type, _iceberg_rowid_params.file_path, _current_batch_row_ids,
-                _iceberg_rowid_params.partition_spec_id, _iceberg_rowid_params.partition_data_json,
-                &row_id_column));
-        col_with_type.column = std::move(row_id_column);
-    } else {
-        DataTypes field_types;
-        field_types.push_back(std::make_shared<DataTypeString>());
-        field_types.push_back(std::make_shared<DataTypeInt64>());
-        field_types.push_back(std::make_shared<DataTypeInt32>());
-        field_types.push_back(std::make_shared<DataTypeString>());
-
-        std::vector<std::string> field_names = {"file_path", "row_position", "partition_spec_id",
-                                                "partition_data"};
-
-        auto row_id_type = std::make_shared<DataTypeStruct>(field_types, field_names);
-        MutableColumnPtr row_id_column;
-        RETURN_IF_ERROR(build_iceberg_rowid_column(
-                row_id_type, _iceberg_rowid_params.file_path, _current_batch_row_ids,
-                _iceberg_rowid_params.partition_spec_id, _iceberg_rowid_params.partition_data_json,
-                &row_id_column));
-        int insert_pos = _iceberg_rowid_params.row_id_column_pos;
-        if (insert_pos < 0 || insert_pos > static_cast<int>(block->columns())) {
-            insert_pos = static_cast<int>(block->columns());
-        }
-        block->insert(static_cast<size_t>(insert_pos),
-                      ColumnWithTypeAndName(std::move(row_id_column), row_id_type,
-                                            doris::BeConsts::ICEBERG_ROWID_COL));
-    }
-
-    if (_col_name_to_block_idx != nullptr) {
-        *_col_name_to_block_idx = block->get_name_to_pos_map();
-    }
-
-    return Status::OK();
-}
-
 Status RowGroupReader::_build_pos_delete_filter(size_t read_rows) {
     if (!_position_delete_ctx.has_filter) {
         _pos_delete_filter_ptr.reset(nullptr);
diff --git a/be/src/format/parquet/vparquet_group_reader.h b/be/src/format/parquet/vparquet_group_reader.h
index fa1e1127f959a6..dfb593f1ae7054 100644
--- a/be/src/format/parquet/vparquet_group_reader.h
+++ b/be/src/format/parquet/vparquet_group_reader.h
@@ -27,14 +27,17 @@
 #include <vector>
 
 #include "common/status.h"
+#include "core/block/block.h"
 #include "core/column/column.h"
 #include "exprs/vexpr_fwd.h"
 #include "format/parquet/parquet_common.h"
 #include "format/parquet/vparquet_column_reader.h"
 #include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "io/fs/file_reader_writer_fwd.h"
 #include "storage/id_manager.h"
-#include "storage/utils.h"
+#include "storage/segment/common.h"
+#include "vparquet_column_reader.h"
 
 namespace cctz {
 class time_zone;
@@ -67,15 +70,8 @@ namespace doris {
 #include "common/compile_check_begin.h"
 // TODO: we need to determine it by test.
 
-class RowGroupReader : public ProfileCollector {
+class RowGroupReader : public ProfileCollector, public RowPositionProvider {
 public:
-    struct IcebergRowIdParams {
-        bool enabled = false;
-        std::string file_path;
-        int32_t partition_spec_id = 0;
-        std::string partition_data_json;
-        int row_id_column_pos = -1;
-    };
     std::shared_ptr<TableSchemaChangeHelper::Node> _table_info_node_ptr;
     static const std::vector<int64_t> NO_DELETE;
 
@@ -92,11 +88,6 @@ class RowGroupReader : public ProfileCollector {
         // all conjuncts: in sql, join runtime filter, topn runtime filter.
         VExprContextSPtrs conjuncts;
 
-        // ParquetReader::set_fill_columns(xxx, xxx) will set these two members
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                fill_partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> fill_missing_columns;
-
         phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>
                 slot_id_to_predicates;
         bool can_lazy_read = false;
@@ -122,6 +113,14 @@ class RowGroupReader : public ProfileCollector {
         std::unordered_map<std::string, VExprContextSPtr> missing_columns;
         // should turn off filtering by page index, lazy read and dict filter if having complex type
         bool has_complex_type = false;
+
+        // ColumnProcessor path: column name lists for each category.
+        // Predicate phase: columns involved in predicate filtering.
+        std::vector<std::string> predicate_partition_col_names;
+        std::vector<std::string> predicate_missing_col_names;
+        // Remaining phase: columns filled after lazy reads.
+        std::vector<std::string> partition_col_names;
+        std::vector<std::string> missing_col_names;
     };
 
     /**
@@ -188,6 +187,8 @@ class RowGroupReader : public ProfileCollector {
 
     ParquetColumnReader::ColumnStatistics merged_column_statistics();
     void set_remaining_rows(int64_t rows) { _remaining_rows = rows; }
+    Status fill_topn_row_id(Block* block, size_t read_rows);
+
     int64_t get_remaining_rows() { return _remaining_rows; }
 
     // Filters read_ranges by removing row chunks whose condition cache granules are all-false.
@@ -202,8 +203,11 @@ class RowGroupReader : public ProfileCollector {
         _row_id_column_iterator_pair = iterator_pair;
     }
 
-    void set_iceberg_rowid_params(const IcebergRowIdParams& params) {
-        _iceberg_rowid_params = params;
+    void set_table_format_reader(TableFormatReader* reader) { _table_format_reader = reader; }
+
+    // RowPositionProvider interface
+    const std::vector<rowid_t>& current_batch_row_positions() const override {
+        return _current_batch_row_ids;
     }
 
     void set_row_lineage_columns(std::shared_ptr<RowLineageColumns> row_lineage_columns) {
@@ -242,13 +246,7 @@ class RowGroupReader : public ProfileCollector {
     Status _rebuild_filter_map(FilterMap& filter_map,
                                DorisUniqueBufferPtr<uint8_t>& filter_map_data,
                                size_t pre_read_rows) const;
-    Status _fill_partition_columns(
-            Block* block, size_t rows,
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns);
-    Status _fill_missing_columns(
-            Block* block, size_t rows,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);
+
     Status _build_pos_delete_filter(size_t read_rows);
     Status _filter_block(Block* block, int column_to_keep,
                          const std::vector<uint32_t>& columns_to_filter);
@@ -265,8 +263,6 @@ class RowGroupReader : public ProfileCollector {
                                         int64_t batch_seq_start);
 
     Status _get_current_batch_row_id(size_t read_rows);
-    Status _fill_row_id_columns(Block* block, size_t read_rows, bool is_current_row_ids);
-    Status _append_iceberg_rowid_column(Block* block, size_t read_rows, bool is_current_row_ids);
 
     io::FileReaderSPtr _file_reader;
     std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>>
@@ -313,7 +309,7 @@ class RowGroupReader : public ProfileCollector {
     std::vector<rowid_t> _current_batch_row_ids;
 
     std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
-    IcebergRowIdParams _iceberg_rowid_params;
+    TableFormatReader* _table_format_reader = nullptr;
 };
 #include "common/compile_check_end.h"
 
diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp
index 6de215608f72c8..b9838d7d6a051b 100644
--- a/be/src/format/parquet/vparquet_reader.cpp
+++ b/be/src/format/parquet/vparquet_reader.cpp
@@ -393,30 +393,53 @@ void ParquetReader::_init_file_description() {
     }
 }
 
-Status ParquetReader::init_reader(
-        const std::vector<std::string>& all_column_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts,
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                slot_id_to_predicates,
-        const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts,
-        std::shared_ptr<TableSchemaChangeHelper::Node> table_info_node_ptr, bool filter_groups,
-        const std::set<uint64_t>& column_ids, const std::set<uint64_t>& filter_column_ids) {
-    _col_name_to_block_idx = col_name_to_block_idx;
-    _tuple_descriptor = tuple_descriptor;
-    _row_descriptor = row_descriptor;
-    _colname_to_slot_id = colname_to_slot_id;
-    _not_single_slot_filter_conjuncts = not_single_slot_filter_conjuncts;
-    _slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
-    _table_info_node_ptr = table_info_node_ptr;
-    _filter_groups = filter_groups;
-    _column_ids = column_ids;
-    _filter_column_ids = filter_column_ids;
+Status ParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
 
-    RETURN_IF_ERROR(_open_file());
+    // Build table_info_node from Parquet file metadata with case-insensitive recursive matching.
+    // File is already opened by init_reader before this hook, so metadata is available.
+    // tuple_descriptor may be null in unit tests that only set column_descs.
+    if (ctx->tuple_descriptor != nullptr) {
+        const FieldDescriptor* field_desc = nullptr;
+        RETURN_IF_ERROR(get_file_metadata_schema(&field_desc));
+        RETURN_IF_ERROR(TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
+                ctx->tuple_descriptor, *field_desc, ctx->table_info_node));
+    }
+
+    return Status::OK();
+}
+
+Status ParquetReader::_open_file_reader(ReaderInitContext* /*ctx*/) {
+    return _open_file();
+}
+
+Status ParquetReader::_do_init_reader(ReaderInitContext* base_ctx) {
+    auto* ctx = checked_context_cast<ParquetInitContext>(base_ctx);
+    _col_name_to_block_idx = base_ctx->col_name_to_block_idx;
+    _tuple_descriptor = ctx->tuple_descriptor;
+    _row_descriptor = ctx->row_descriptor;
+    _colname_to_slot_id = ctx->colname_to_slot_id;
+    _not_single_slot_filter_conjuncts = ctx->not_single_slot_filter_conjuncts;
+    _slot_id_to_filter_conjuncts = ctx->slot_id_to_filter_conjuncts;
+    _filter_groups = ctx->filter_groups;
+    _table_info_node_ptr = base_ctx->table_info_node;
+    _column_ids = base_ctx->column_ids;
+    _filter_column_ids = base_ctx->filter_column_ids;
+
+    // _open_file() is called by init_reader template method before hooks.
+    // For standalone _do_init_reader callers (tvf, load, etc.), open the file here if not already opened.
+    if (_file_metadata == nullptr) {
+        RETURN_IF_ERROR(_open_file());
+    }
     _t_metadata = &(_file_metadata->to_thrift());
     if (_file_metadata == nullptr) {
         return Status::InternalError("failed to init parquet reader, please open reader first");
@@ -429,30 +452,127 @@ Status ParquetReader::init_reader(
     }
     _current_row_group_index = RowGroupReader::RowGroupIndex {-1, 0, 0};
 
-    _table_column_names = &all_column_names;
-    auto schema_desc = _file_metadata->schema();
-
-    std::map<std::string, std::string> required_file_columns; //file column -> table column
-    for (auto table_column_name : all_column_names) {
-        if (_table_info_node_ptr->children_column_exists(table_column_name)) {
-            required_file_columns.emplace(
-                    _table_info_node_ptr->children_file_column_name(table_column_name),
-                    table_column_name);
-        } else {
-            _missing_cols.emplace_back(table_column_name);
+    // Compute missing columns and file↔table column mapping.
+    // This runs in _do_init_reader (not on_before_init_reader) because table-format readers
+    // (Iceberg, Paimon, Hive, Hudi) override on_before_init_reader completely.
+    if (has_column_descs()) {
+        _fill_missing_cols.clear();
+        _fill_missing_defaults.clear();
+        for (const auto& col_name : base_ctx->column_names) {
+            if (!_table_info_node_ptr->children_column_exists(col_name)) {
+                _fill_missing_cols.insert(col_name);
+            }
         }
+        if (_column_descs && !_fill_missing_cols.empty()) {
+            for (const auto& desc : *_column_descs) {
+                if (_fill_missing_cols.contains(desc.name) &&
+                    !_fill_partition_values.contains(desc.name)) {
+                    _fill_missing_defaults[desc.name] = desc.default_expr;
+                }
+            }
+        }
+        // Resolve file-column ↔ table-column mapping in file-schema order.
+        // Iterating schema_desc preserves the physical column order for efficient reads.
+        auto schema_desc = _file_metadata->schema();
+        std::map<std::string, std::string> required_file_columns;
+        for (const auto& table_column_name : base_ctx->column_names) {
+            if (_fill_missing_cols.contains(table_column_name)) {
+                continue;
+            }
+            auto file_col = _table_info_node_ptr->children_file_column_name(table_column_name);
+            required_file_columns.emplace(file_col, table_column_name);
+        }
+        for (int i = 0; i < schema_desc.size(); ++i) {
+            const auto& name = schema_desc.get_column(i)->name;
+            if (required_file_columns.contains(name)) {
+                _read_file_columns.emplace_back(name);
+                _read_table_columns.emplace_back(required_file_columns[name]);
+                _read_table_columns_set.insert(required_file_columns[name]);
+            }
+        }
+        // Register row-position-based synthesized column handler.
     }
-    for (int i = 0; i < schema_desc.size(); ++i) {
-        const auto& name = schema_desc.get_column(i)->name;
-        if (required_file_columns.contains(name)) {
-            _read_file_columns.emplace_back(name);
-            _read_table_columns.emplace_back(required_file_columns[name]);
-            _read_table_columns_set.insert(required_file_columns[name]);
+
+    // Register row-position-based synthesized column handler.
+    // _row_id_column_iterator_pair and _row_lineage_columns are set before init_reader
+    // by FileScanner. This must be outside has_column_descs() guard because standalone
+    // readers also need synthesized column handlers.
+    if (_row_id_column_iterator_pair.first != nullptr ||
+        (_row_lineage_columns != nullptr &&
+         (_row_lineage_columns->need_row_ids() ||
+          _row_lineage_columns->has_last_updated_sequence_number_column()))) {
+        register_synthesized_column_handler(
+                BeConsts::ROWID_COL, [this](Block* block, size_t rows) -> Status {
+                    if (_current_group_reader) {
+                        return _current_group_reader->fill_topn_row_id(block, rows);
+                    }
+                    return Status::OK();
+                });
+    }
+
+    // Standalone callers (column_descs == nullptr) skip on_before_init_reader,
+    // so _read_file_columns etc. are not populated. Use table_info_node for name mapping
+    // when available, otherwise fall back to 1:1 mapping using file schema.
+    // Must iterate in file schema order (not user column order) so that
+    // _generate_random_access_ranges sees monotonically increasing chunk offsets.
+    if (!has_column_descs() && _read_file_columns.empty()) {
+        auto schema_desc = _file_metadata->schema();
+        // Build map: file_col_name -> table_col_name for requested columns.
+        std::unordered_map<std::string, std::string> required_file_columns;
+        for (const auto& col_name : base_ctx->column_names) {
+            std::string file_col_name = col_name;
+            if (_table_info_node_ptr && _table_info_node_ptr->children_column_exists(col_name)) {
+                file_col_name = _table_info_node_ptr->children_file_column_name(col_name);
+            }
+            required_file_columns[file_col_name] = col_name;
+        }
+        // Iterate file schema to preserve physical column order.
+        for (int i = 0; i < schema_desc.size(); ++i) {
+            const auto& name = schema_desc.get_column(i)->name;
+            if (required_file_columns.contains(name)) {
+                _read_file_columns.emplace_back(name);
+                _read_table_columns.emplace_back(required_file_columns[name]);
+                _read_table_columns_set.insert(required_file_columns[name]);
+            }
         }
     }
+
     // build column predicates for column lazy read
-    _lazy_read_ctx.conjuncts = conjuncts;
-    _lazy_read_ctx.slot_id_to_predicates = slot_id_to_predicates;
+    if (ctx->conjuncts != nullptr) {
+        _lazy_read_ctx.conjuncts = *ctx->conjuncts;
+    }
+    if (ctx->slot_id_to_predicates != nullptr) {
+        _lazy_read_ctx.slot_id_to_predicates = *ctx->slot_id_to_predicates;
+    }
+
+    // ---- Inlined set_fill_columns logic (partition/missing/synthesized classification) ----
+
+    // 1. Collect predicate columns from conjuncts for lazy materialization
+    std::unordered_map<std::string, std::pair<uint32_t, int>> predicate_columns;
+    _collect_predicate_columns_from_conjuncts(predicate_columns);
+
+    // 2. Classify read/partition/missing/synthesized columns into lazy vs predicate groups
+    _classify_columns_for_lazy_read(predicate_columns, _fill_partition_values,
+                                    _fill_missing_defaults);
+
+    // 3. Populate col_names vectors for ColumnProcessor path
+    for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
+        _lazy_read_ctx.predicate_partition_col_names.emplace_back(kv.first);
+    }
+    for (auto& kv : _lazy_read_ctx.predicate_missing_columns) {
+        _lazy_read_ctx.predicate_missing_col_names.emplace_back(kv.first);
+    }
+    for (auto& kv : _lazy_read_ctx.partition_columns) {
+        _lazy_read_ctx.partition_col_names.emplace_back(kv.first);
+    }
+    for (auto& kv : _lazy_read_ctx.missing_columns) {
+        _lazy_read_ctx.missing_col_names.emplace_back(kv.first);
+    }
+
+    if (_filter_groups && (_total_groups == 0 || _t_metadata->num_rows == 0 || _range_size < 0)) {
+        return Status::EndOfFile("No row group to read");
+    }
+
     return Status::OK();
 }
 
@@ -478,18 +598,8 @@ bool ParquetReader::_type_matches(const int cid) const {
            !is_complex_type(table_col_type->get_primitive_type());
 }
 
-Status ParquetReader::set_fill_columns(
-        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                partition_columns,
-        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
-    _lazy_read_ctx.fill_partition_columns = partition_columns;
-    _lazy_read_ctx.fill_missing_columns = missing_columns;
-
-    // std::unordered_map<column_name, std::pair<col_id, slot_id>>
-    std::unordered_map<std::string, std::pair<uint32_t, int>> predicate_columns;
-
-    // TODO(gabriel): we should try to clear too much structs which are used to represent conjuncts and predicates.
-    // visit_slot for lazy mat.
+void ParquetReader::_collect_predicate_columns_from_conjuncts(
+        std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns) {
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
         if (expr->is_slot_ref()) {
             VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
@@ -505,19 +615,18 @@ Status ParquetReader::set_fill_columns(
             visit_slot(child.get());
         }
     };
+
     for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
         auto expr = conjunct->root();
-
         if (expr->is_rf_wrapper()) {
-            // REF: src/runtime_filter/runtime_filter_consumer.cpp
             VRuntimeFilterWrapper* runtime_filter = assert_cast<VRuntimeFilterWrapper*>(expr.get());
-
             auto filter_impl = runtime_filter->get_impl();
             visit_slot(filter_impl.get());
         } else {
             visit_slot(expr.get());
         }
     }
+
     if (!_lazy_read_ctx.slot_id_to_predicates.empty()) {
         auto and_pred = AndBlockColumnPredicate::create_unique();
         for (const auto& entry : _lazy_read_ctx.slot_id_to_predicates) {
@@ -533,7 +642,13 @@ Status ParquetReader::set_fill_columns(
             _push_down_predicates.push_back(std::move(and_pred));
         }
     }
+}
 
+void ParquetReader::_classify_columns_for_lazy_read(
+        const std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns,
+        const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                partition_columns,
+        const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
     const FieldDescriptor& schema = _file_metadata->schema();
 
     auto check_iceberg_row_lineage_column_idx = [&](const auto& col_name) -> int {
@@ -585,7 +700,7 @@ Status ParquetReader::set_fill_columns(
         _lazy_read_ctx.all_predicate_col_ids.emplace_back(_row_id_column_iterator_pair.second);
     }
 
-    for (auto& kv : _lazy_read_ctx.fill_partition_columns) {
+    for (auto& kv : partition_columns) {
         auto iter = predicate_columns.find(kv.first);
         if (iter == predicate_columns.end()) {
             _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
@@ -595,7 +710,7 @@ Status ParquetReader::set_fill_columns(
         }
     }
 
-    for (auto& kv : _lazy_read_ctx.fill_missing_columns) {
+    for (auto& kv : missing_columns) {
         auto iter = predicate_columns.find(kv.first);
         if (iter != predicate_columns.end()) {
             //For check missing column :   missing column == xx, missing column is null,missing column is not null.
@@ -605,7 +720,6 @@ Status ParquetReader::set_fill_columns(
                     _lazy_read_ctx.missing_columns_conjuncts.emplace_back(ctx);
                 }
             }
-
             _lazy_read_ctx.predicate_missing_columns.emplace(kv.first, kv.second);
             _lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first);
         } else if (auto row_lineage_idx = check_iceberg_row_lineage_column_idx(kv.first);
@@ -630,12 +744,6 @@ Status ParquetReader::set_fill_columns(
             _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
         }
     }
-
-    if (_filter_groups && (_total_groups == 0 || _t_metadata->num_rows == 0 || _range_size < 0)) {
-        return Status::EndOfFile("No row group to read");
-    }
-    _fill_all_columns = true;
-    return Status::OK();
 }
 
 // init file reader and file metadata for parsing schema
@@ -657,22 +765,8 @@ Status ParquetReader::get_parsed_schema(std::vector<std::string>* col_names,
     return Status::OK();
 }
 
-void ParquetReader::set_iceberg_rowid_params(const std::string& file_path,
-                                             int32_t partition_spec_id,
-                                             const std::string& partition_data_json,
-                                             int row_id_column_pos) {
-    _iceberg_rowid_params.enabled = true;
-    _iceberg_rowid_params.file_path = file_path;
-    _iceberg_rowid_params.partition_spec_id = partition_spec_id;
-    _iceberg_rowid_params.partition_data_json = partition_data_json;
-    _iceberg_rowid_params.row_id_column_pos = row_id_column_pos;
-    if (_current_group_reader != nullptr) {
-        _current_group_reader->set_iceberg_rowid_params(_iceberg_rowid_params);
-    }
-}
-
-Status ParquetReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                  std::unordered_set<std::string>* missing_cols) {
+Status ParquetReader::_get_columns_impl(
+        std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     const auto& schema_desc = _file_metadata->schema();
     std::unordered_set<std::string> column_names;
     schema_desc.get_column_names(&column_names);
@@ -680,13 +774,10 @@ Status ParquetReader::get_columns(std::unordered_map<std::string, DataTypePtr>*
         auto field = schema_desc.get_column(name);
         name_to_type->emplace(name, field->data_type);
     }
-    for (auto& col : _missing_cols) {
-        missing_cols->insert(col);
-    }
     return Status::OK();
 }
 
-Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status ParquetReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_current_group_reader == nullptr || _row_group_eof) {
         Status st = _next_row_group_reader();
         if (!st.ok() && !st.is<ErrorCode::END_OF_FILE>()) {
@@ -700,24 +791,6 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
             return Status::OK();
         }
     }
-    if (_push_down_agg_type == TPushAggOp::type::COUNT) {
-        auto rows = std::min(_current_group_reader->get_remaining_rows(), (int64_t)_batch_size);
-
-        _current_group_reader->set_remaining_rows(_current_group_reader->get_remaining_rows() -
-                                                  rows);
-        auto mutate_columns = block->mutate_columns();
-        for (auto& col : mutate_columns) {
-            col->resize(rows);
-        }
-        block->set_columns(std::move(mutate_columns));
-
-        *read_rows = rows;
-        if (_current_group_reader->get_remaining_rows() == 0) {
-            _current_group_reader.reset(nullptr);
-        }
-
-        return Status::OK();
-    }
 
     SCOPED_RAW_TIMER(&_reader_statistics.column_read_time);
     Status batch_st =
@@ -759,7 +832,13 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
 
 RowGroupReader::PositionDeleteContext ParquetReader::_get_position_delete_ctx(
         const tparquet::RowGroup& row_group, const RowGroupReader::RowGroupIndex& row_group_index) {
+    LOG(INFO) << "[PosDeleteDebug] _get_position_delete_ctx: _delete_rows="
+              << (_delete_rows ? "set(" + std::to_string(_delete_rows->size()) + ")" : "null")
+              << " row_group.num_rows=" << row_group.num_rows
+              << " first_row=" << row_group_index.first_row;
     if (_delete_rows == nullptr) {
+        LOG(INFO) << "[PosDeleteDebug] _get_position_delete_ctx: NO delete rows, returning "
+                     "no-filter ctx";
         return RowGroupReader::PositionDeleteContext(row_group.num_rows, row_group_index.first_row);
     }
     const int64_t* delete_rows = &(*_delete_rows)[0];
@@ -871,9 +950,6 @@ Status ParquetReader::_next_row_group_reader() {
                     : group_file_reader,
             _read_table_columns, _current_row_group_index.row_group_id, row_group, _ctz, _io_ctx,
             position_delete_ctx, _lazy_read_ctx, _state, _column_ids, _filter_column_ids));
-    if (_iceberg_rowid_params.enabled) {
-        _current_group_reader->set_iceberg_rowid_params(_iceberg_rowid_params);
-    }
     _row_group_eof = false;
 
     _current_group_reader->set_current_row_group_idx(_current_row_group_index);
@@ -883,6 +959,7 @@ Status ParquetReader::_next_row_group_reader() {
     if (_condition_cache_ctx) {
         _current_group_reader->set_condition_cache_context(_condition_cache_ctx);
     }
+    _current_group_reader->set_table_format_reader(this);
 
     _current_group_reader->_table_info_node_ptr = _table_info_node_ptr;
     return _current_group_reader->init(_file_metadata->schema(), candidate_row_ranges, _col_offsets,
diff --git a/be/src/format/parquet/vparquet_reader.h b/be/src/format/parquet/vparquet_reader.h
index 5172c8efdb9df3..b347608a5bc397 100644
--- a/be/src/format/parquet/vparquet_reader.h
+++ b/be/src/format/parquet/vparquet_reader.h
@@ -30,12 +30,12 @@
 #include <vector>
 
 #include "common/status.h"
-#include "format/generic_reader.h"
 #include "format/parquet/parquet_common.h"
 #include "format/parquet/parquet_predicate.h"
 #include "format/parquet/vparquet_column_reader.h"
 #include "format/parquet/vparquet_group_reader.h"
 #include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "io/file_factory.h"
 #include "io/fs/file_meta_cache.h"
 #include "io/fs/file_reader.h"
@@ -69,7 +69,26 @@ struct RowLineageColumns;
 
 namespace doris {
 #include "common/compile_check_begin.h"
-class ParquetReader : public GenericReader {
+
+/// Parquet-specific initialization context.
+/// Extends ReaderInitContext with predicate pushdown fields.
+struct ParquetInitContext final : public ReaderInitContext {
+    // Safe defaults for standalone readers (delete file readers, push handler)
+    // that don't have conjuncts/predicates. Dereferenced by _do_init_reader.
+    static inline const VExprContextSPtrs EMPTY_CONJUNCTS {};
+    static inline phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>
+            EMPTY_SLOT_PREDICATES {};
+
+    const VExprContextSPtrs* conjuncts = &EMPTY_CONJUNCTS;
+    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>*
+            slot_id_to_predicates = &EMPTY_SLOT_PREDICATES;
+    const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
+    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
+    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
+    bool filter_groups = true;
+};
+
+class ParquetReader : public TableFormatReader {
     ENABLE_FACTORY_CREATOR(ParquetReader);
 
 public:
@@ -123,23 +142,17 @@ class ParquetReader : public GenericReader {
     void set_file_reader(io::FileReaderSPtr file_reader);
 #endif
 
-    Status init_reader(
-            const std::vector<std::string>& all_column_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts,
-            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                    slot_id_to_predicates,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts,
-            std::shared_ptr<TableSchemaChangeHelper::Node> table_info_node_ptr =
-                    TableSchemaChangeHelper::ConstNode::get_instance(),
-            bool filter_groups = true, const std::set<uint64_t>& column_ids = {},
-            const std::set<uint64_t>& filter_column_ids = {});
-
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    // Override to build table_info_node from Parquet file metadata using by_parquet_name.
+    // Subclasses (HiveParquetReader, etc.) call GenericReader::on_before_init_reader directly,
+    // so this override only applies to plain ParquetReader (TVF, load).
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
+
+protected:
+    // ---- Unified init_reader(ReaderInitContext*) overrides ----
+    Status _open_file_reader(ReaderInitContext* ctx) override;
+    Status _do_init_reader(ReaderInitContext* ctx) override;
 
+public:
     Status close() override;
 
     // set the delete rows in current parquet file
@@ -147,8 +160,7 @@ class ParquetReader : public GenericReader {
 
     int64_t size() const { return _file_reader->size(); }
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status init_schema_reader() override;
 
@@ -159,12 +171,6 @@ class ParquetReader : public GenericReader {
 
     const tparquet::FileMetaData* get_meta_data() const { return _t_metadata; }
 
-    // Partition columns will not be materialized in parquet files. So we should fill it with missing columns.
-    Status set_fill_columns(
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) override;
-
     Status get_file_metadata_schema(const FieldDescriptor** ptr);
 
     void set_row_id_column_iterator(
@@ -172,8 +178,11 @@ class ParquetReader : public GenericReader {
         _row_id_column_iterator_pair = iterator_pair;
     }
 
-    void set_iceberg_rowid_params(const std::string& file_path, int32_t partition_spec_id,
-                                  const std::string& partition_data_json, int row_id_column_pos);
+    /// Access current batch row positions (delegates to RowGroupReader).
+    /// Used by IcebergReaderMixin to build $row_id column.
+    const std::vector<segment_v2::rowid_t>& current_batch_row_positions() const {
+        return _current_group_reader->current_batch_row_positions();
+    }
 
     void set_row_lineage_columns(std::shared_ptr<RowLineageColumns> row_lineage_columns) {
         _row_lineage_columns = std::move(row_lineage_columns);
@@ -183,15 +192,43 @@ class ParquetReader : public GenericReader {
 
     void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override;
 
+    bool supports_count_pushdown() const override { return true; }
+
     int64_t get_total_rows() const override;
 
     bool has_delete_operations() const override {
         return _delete_rows != nullptr && !_delete_rows->empty();
     }
 
+    /// Disable row-group range filtering (needed when reading delete files
+    /// whose TFileRangeDesc has size=-1).
+    void set_filter_groups(bool v) { _filter_groups = v; }
+
 protected:
     void _collect_profile_before_close() override;
 
+    // Core block reading implementation
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+
+    // Parquet fills partition/missing columns per-batch internally via RowGroupReader,
+    // so suppress TableFormatReader's default on_after_read_block fill.
+    Status on_after_read_block(Block* /*block*/, size_t* /*read_rows*/) override {
+        return Status::OK();
+    }
+
+    // Protected accessors so CRTP mixin subclasses can reach private members
+    io::IOContext* get_io_ctx() const { return _io_ctx; }
+    std::unordered_map<std::string, uint32_t>*& col_name_to_block_idx_ref() {
+        return _col_name_to_block_idx;
+    }
+    RuntimeProfile* get_profile() const { return _profile; }
+    RuntimeState* get_state() const { return _state; }
+    const TFileScanRangeParams& get_scan_params() const { return _scan_params; }
+    const TFileRangeDesc& get_scan_range() const { return _scan_range; }
+    const TupleDescriptor* get_tuple_descriptor() const { return _tuple_descriptor; }
+    const RowDescriptor* get_row_descriptor() const { return _row_descriptor; }
+    const FileMetaData* get_file_metadata() const { return _file_metadata; }
+
 private:
     struct ParquetProfile {
         RuntimeProfile::Counter* filtered_row_groups = nullptr;
@@ -239,6 +276,15 @@ class ParquetReader : public GenericReader {
         RuntimeProfile::Counter* bloom_filter_read_time = nullptr;
     };
 
+    // ---- set_fill_columns sub-functions ----
+    void _collect_predicate_columns_from_conjuncts(
+            std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns);
+    void _classify_columns_for_lazy_read(
+            const std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns,
+            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                    partition_columns,
+            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);
+
     Status _open_file();
     void _init_profile();
     void _close_internal();
@@ -290,9 +336,6 @@ class ParquetReader : public GenericReader {
     bool _exists_in_file(const std::string& expr_name) const;
     bool _type_matches(const int cid) const;
 
-    RuntimeProfile* _profile = nullptr;
-    const TFileScanRangeParams& _scan_params;
-    const TFileRangeDesc& _scan_range;
     io::FileSystemProperties _system_properties;
     io::FileDescription _file_description;
 
@@ -305,7 +348,6 @@ class ParquetReader : public GenericReader {
     // after _file_reader. Otherwise, there may be heap-use-after-free bug.
     ObjLRUCache::CacheHandle _meta_cache_handle;
     std::unique_ptr<FileMetaData> _file_metadata_ptr;
-    const FileMetaData* _file_metadata = nullptr;
     const tparquet::FileMetaData* _t_metadata = nullptr;
 
     // _tracing_file_reader wraps _file_reader.
@@ -336,10 +378,10 @@ class ParquetReader : public GenericReader {
     const std::vector<int64_t>* _delete_rows = nullptr;
     int64_t _delete_rows_index = 0;
 
-    // Used for column lazy read.
-    RowGroupReader::LazyReadContext _lazy_read_ctx;
-
     // parquet file reader object
+    RuntimeProfile* _profile = nullptr;
+    const TFileScanRangeParams& _scan_params;
+    const TFileRangeDesc& _scan_range;
     size_t _batch_size;
     int64_t _range_start_offset;
     int64_t _range_size;
@@ -347,10 +389,6 @@ class ParquetReader : public GenericReader {
 
     std::unordered_map<int, tparquet::OffsetIndex> _col_offsets;
 
-    std::vector<std::string> _missing_cols;
-    // _table_column_names = _missing_cols + _read_table_columns
-    const std::vector<std::string>* _table_column_names = nullptr;
-
     ReaderStatistics _reader_statistics;
     ParquetColumnReader::ColumnStatistics _column_statistics;
     ParquetProfile _parquet_profile;
@@ -358,11 +396,14 @@ class ParquetReader : public GenericReader {
     io::IOContext* _io_ctx = nullptr;
     std::shared_ptr<io::IOContext> _io_ctx_holder;
     RuntimeState* _state = nullptr;
+    const TupleDescriptor* _tuple_descriptor = nullptr;
+    const RowDescriptor* _row_descriptor = nullptr;
+    const FileMetaData* _file_metadata = nullptr;
+    // Pointer to external column name to block index mapping (from FileScanner)
+    std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
     bool _enable_lazy_mat = true;
     bool _enable_filter_by_min_max = true;
     bool _enable_filter_by_bloom_filter = true;
-    const TupleDescriptor* _tuple_descriptor = nullptr;
-    const RowDescriptor* _row_descriptor = nullptr;
     const std::unordered_map<std::string, int>* _colname_to_slot_id = nullptr;
     const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
     const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr;
@@ -373,14 +414,16 @@ class ParquetReader : public GenericReader {
     std::shared_ptr<RowLineageColumns> _row_lineage_columns;
 
 protected:
+    // Used for column lazy read. Protected so Iceberg/Paimon subclasses can
+    // register synthesized columns in on_before_init_reader.
+    RowGroupReader::LazyReadContext _lazy_read_ctx;
     bool _filter_groups = true;
-    RowGroupReader::IcebergRowIdParams _iceberg_rowid_params;
+    size_t get_batch_size() const { return _batch_size; }
 
+private:
     std::set<uint64_t> _column_ids;
     std::set<uint64_t> _filter_column_ids;
 
-    std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
-
     std::vector<std::unique_ptr<MutilColumnBlockPredicate>> _push_down_predicates;
     Arena _arena;
 };
diff --git a/be/src/format/table/equality_delete.cpp b/be/src/format/table/equality_delete.cpp
index d1e3954836a81b..ee799a14a5b4a7 100644
--- a/be/src/format/table/equality_delete.cpp
+++ b/be/src/format/table/equality_delete.cpp
@@ -52,8 +52,14 @@ Status SimpleEqualityDelete::filter_data_block(
     DCHECK(_delete_col_ids.size() == 1);
     auto column_field_id = _delete_col_ids[0];
 
-    auto column_and_type = data_block->get_by_position(
-            col_name_to_block_idx->at(id_to_block_column_name.at(column_field_id)));
+    const auto& block_col_name = id_to_block_column_name.at(column_field_id);
+    auto block_idx = col_name_to_block_idx->at(block_col_name);
+    LOG(INFO) << "[EqDeleteDebug] SimpleEqualityDelete::filter_data_block: field_id="
+              << column_field_id << ", block_col_name=" << block_col_name
+              << ", block_idx=" << block_idx << ", delete_block_rows=" << _delete_block->rows()
+              << ", data_block_rows=" << data_block->rows();
+
+    auto column_and_type = data_block->get_by_position(block_idx);
 
     size_t rows = data_block->rows();
     //     _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
diff --git a/be/src/format/table/hive/hive_orc_nested_column_utils.cpp b/be/src/format/table/hive/hive_orc_nested_column_utils.cpp
index 8ff065490e8cf7..0e014c95e5dcf6 100644
--- a/be/src/format/table/hive/hive_orc_nested_column_utils.cpp
+++ b/be/src/format/table/hive/hive_orc_nested_column_utils.cpp
@@ -25,7 +25,8 @@
 #include <vector>
 
 #include "common/logging.h"
-#include "format/table/table_format_reader.h"
+#include "format/generic_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "orc/Type.hh"
 
 namespace doris {
diff --git a/be/src/format/table/hive/hive_orc_nested_column_utils.h b/be/src/format/table/hive/hive_orc_nested_column_utils.h
index 6cc28e001cf1b5..a410f8d29ea198 100644
--- a/be/src/format/table/hive/hive_orc_nested_column_utils.h
+++ b/be/src/format/table/hive/hive_orc_nested_column_utils.h
@@ -23,7 +23,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace orc {
 class Type;
diff --git a/be/src/format/table/hive/hive_parquet_nested_column_utils.cpp b/be/src/format/table/hive/hive_parquet_nested_column_utils.cpp
index 5096a328b1de04..d990ff0b86f685 100644
--- a/be/src/format/table/hive/hive_parquet_nested_column_utils.cpp
+++ b/be/src/format/table/hive/hive_parquet_nested_column_utils.cpp
@@ -25,7 +25,7 @@
 #include <vector>
 
 #include "format/parquet/schema_desc.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
diff --git a/be/src/format/table/hive/hive_parquet_nested_column_utils.h b/be/src/format/table/hive/hive_parquet_nested_column_utils.h
index 5e4b528800c823..1e953ef5ea34ae 100644
--- a/be/src/format/table/hive/hive_parquet_nested_column_utils.h
+++ b/be/src/format/table/hive/hive_parquet_nested_column_utils.h
@@ -23,7 +23,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
diff --git a/be/src/format/table/hive_reader.cpp b/be/src/format/table/hive_reader.cpp
index 205becbc7b62d0..fc6dfbc025c235 100644
--- a/be/src/format/table/hive_reader.cpp
+++ b/be/src/format/table/hive_reader.cpp
@@ -28,73 +28,68 @@
 namespace doris {
 #include "common/compile_check_begin.h"
 
-Status HiveReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) {
-    RETURN_IF_ERROR(_file_format_reader->get_next_block(block, read_rows, eof));
-    return Status::OK();
-};
-
-Status HiveOrcReader::init_reader(
-        const std::vector<std::string>& read_table_col_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-        const RowDescriptor* row_descriptor,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
+Status HiveOrcReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
 
+    // Get file type (available because _create_file_reader() runs before this hook)
     const orc::Type* orc_type_ptr = nullptr;
-    RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
+    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
     bool is_hive_col_name = OrcReader::is_hive1_col_name(orc_type_ptr);
 
-    if (_state->query_options().hive_orc_use_column_names && !is_hive_col_name) {
-        // Directly use the table column name to match the file column name, but pay attention to the case issue.
-        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(tuple_descriptor, orc_type_ptr,
-                                                        table_info_node_ptr, _is_file_slot));
+    // Build table_info_node based on config
+    if (get_state()->query_options().hive_orc_use_column_names && !is_hive_col_name) {
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(ctx->tuple_descriptor, orc_type_ptr,
+                                                        ctx->table_info_node, _is_file_slot));
     } else {
-        // hive1 / use index
-        std::map<std::string, const SlotDescriptor*> slot_map; // table_name to slot
-        for (const auto& slot : tuple_descriptor->slots()) {
+        ctx->table_info_node = std::make_shared<StructNode>();
+        std::map<std::string, const SlotDescriptor*> slot_map;
+        for (const auto& slot : ctx->tuple_descriptor->slots()) {
             slot_map.emplace(slot->col_name_lower_case(), slot);
         }
 
-        // For top-level columns, use indexes to match, and for sub-columns, still use name to match columns.
-        for (size_t idx = 0; idx < _params.column_idxs.size(); idx++) {
-            auto table_column_name = read_table_col_names[idx];
-            auto file_index = _params.column_idxs[idx];
+        for (size_t idx = 0; idx < get_scan_params().column_idxs.size(); idx++) {
+            auto table_column_name = ctx->column_names[idx];
+            auto file_index = get_scan_params().column_idxs[idx];
 
             if (file_index >= orc_type_ptr->getSubtypeCount()) {
-                table_info_node_ptr->add_not_exist_children(table_column_name);
+                ctx->table_info_node->add_not_exist_children(table_column_name);
             } else {
                 auto field_node = std::make_shared<Node>();
-                // For sub-columns, still use name to match columns.
                 RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(
                         slot_map[table_column_name]->type(), orc_type_ptr->getSubtype(file_index),
                         field_node));
-                table_info_node_ptr->add_children(
+                ctx->table_info_node->add_children(
                         table_column_name, orc_type_ptr->getFieldName(file_index), field_node);
             }
             slot_map.erase(table_column_name);
         }
         for (const auto& [partition_col_name, _] : slot_map) {
-            table_info_node_ptr->add_not_exist_children(partition_col_name);
+            ctx->table_info_node->add_not_exist_children(partition_col_name);
         }
     }
 
+    // Compute column_ids
     auto column_id_result = ColumnIdResult();
-    if (_state->query_options().hive_orc_use_column_names && !is_hive_col_name) {
-        column_id_result = _create_column_ids(orc_type_ptr, tuple_descriptor);
+    if (get_state()->query_options().hive_orc_use_column_names && !is_hive_col_name) {
+        column_id_result = _create_column_ids(orc_type_ptr, ctx->tuple_descriptor);
     } else {
         column_id_result =
-                _create_column_ids_by_top_level_col_index(orc_type_ptr, tuple_descriptor);
+                _create_column_ids_by_top_level_col_index(orc_type_ptr, ctx->tuple_descriptor);
     }
+    ctx->column_ids = std::move(column_id_result.column_ids);
+    ctx->filter_column_ids = std::move(column_id_result.filter_column_ids);
 
-    const auto& column_ids = column_id_result.column_ids;
-    const auto& filter_column_ids = column_id_result.filter_column_ids;
-
-    return orc_reader->init_reader(&read_table_col_names, col_name_to_block_idx, conjuncts, false,
-                                   tuple_descriptor, row_descriptor,
-                                   not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts,
-                                   table_info_node_ptr, column_ids, filter_column_ids);
+    // _is_acid is false by default, no need to set explicitly
+    return Status::OK();
 }
 
 ColumnIdResult HiveOrcReader::_create_column_ids(const orc::Type* orc_type,
@@ -210,86 +205,69 @@ ColumnIdResult HiveOrcReader::_create_column_ids_by_top_level_col_index(
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
 }
 
-Status HiveParquetReader::init_reader(
-        const std::vector<std::string>& read_table_col_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts,
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                slot_id_to_predicates,
-        const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    auto* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
+Status HiveParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
+
+    // Get file metadata schema (available because _open_file() runs before this hook)
     const FieldDescriptor* field_desc = nullptr;
-    RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&field_desc));
+    RETURN_IF_ERROR(get_file_metadata_schema(&field_desc));
     DCHECK(field_desc != nullptr);
 
-    if (_state->query_options().hive_parquet_use_column_names) {
-        // Directly use the table column name to match the file column name, but pay attention to the case issue.
-        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(tuple_descriptor, *field_desc,
-                                                            table_info_node_ptr, _is_file_slot));
-    } else {                                                   // use idx
-        std::map<std::string, const SlotDescriptor*> slot_map; //table_name to slot
-        for (const auto& slot : tuple_descriptor->slots()) {
+    // Build table_info_node based on config
+    if (get_state()->query_options().hive_parquet_use_column_names) {
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(ctx->tuple_descriptor, *field_desc,
+                                                            ctx->table_info_node, _is_file_slot));
+    } else {
+        ctx->table_info_node = std::make_shared<StructNode>();
+        std::map<std::string, const SlotDescriptor*> slot_map;
+        for (const auto& slot : ctx->tuple_descriptor->slots()) {
             slot_map.emplace(slot->col_name_lower_case(), slot);
         }
 
-        // For top-level columns, use indexes to match, and for sub-columns, still use name to match columns.
         auto parquet_fields_schema = field_desc->get_fields_schema();
-        for (size_t idx = 0; idx < _params.column_idxs.size(); idx++) {
-            auto table_column_name = read_table_col_names[idx];
-            auto file_index = _params.column_idxs[idx];
+        for (size_t idx = 0; idx < get_scan_params().column_idxs.size(); idx++) {
+            auto table_column_name = ctx->column_names[idx];
+            auto file_index = get_scan_params().column_idxs[idx];
 
             if (file_index >= parquet_fields_schema.size()) {
-                // Non-partitioning columns, which may be columns added later.
-                table_info_node_ptr->add_not_exist_children(table_column_name);
+                ctx->table_info_node->add_not_exist_children(table_column_name);
             } else {
-                // Non-partitioning columns, columns that exist in both the table and the file.
                 auto field_node = std::make_shared<Node>();
-                // for sub-columns, still use name to match columns.
                 RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(
                         slot_map[table_column_name]->type(), parquet_fields_schema[file_index],
                         field_node));
-                table_info_node_ptr->add_children(
+                ctx->table_info_node->add_children(
                         table_column_name, parquet_fields_schema[file_index].name, field_node);
             }
-
             slot_map.erase(table_column_name);
         }
-        /*
-         * `_params.column_idxs` only have `isIsFileSlot()`, so we need add `partition slot`.
-         * eg:
-         * Table : A, B, C, D     (D: partition column)
-         * Parquet file : A, B
-         * Column C is obtained by add column.
-         *
-         * sql : select * from table;
-         * slot : A, B, C, D
-         * _params.column_idxs: 0, 1, 2 (There is no 3, because column D is the partition column)
-         *
-         */
         for (const auto& [partition_col_name, _] : slot_map) {
-            table_info_node_ptr->add_not_exist_children(partition_col_name);
+            ctx->table_info_node->add_not_exist_children(partition_col_name);
         }
     }
 
+    // Compute column_ids for lazy materialization
     auto column_id_result = ColumnIdResult();
-    if (_state->query_options().hive_parquet_use_column_names) {
-        column_id_result = _create_column_ids(field_desc, tuple_descriptor);
+    if (get_state()->query_options().hive_parquet_use_column_names) {
+        column_id_result = _create_column_ids(field_desc, ctx->tuple_descriptor);
     } else {
-        column_id_result = _create_column_ids_by_top_level_col_index(field_desc, tuple_descriptor);
+        column_id_result =
+                _create_column_ids_by_top_level_col_index(field_desc, ctx->tuple_descriptor);
     }
+    ctx->column_ids = std::move(column_id_result.column_ids);
+    ctx->filter_column_ids = std::move(column_id_result.filter_column_ids);
 
-    const auto& column_ids = column_id_result.column_ids;
-    const auto& filter_column_ids = column_id_result.filter_column_ids;
-
-    RETURN_IF_ERROR(init_row_filters());
-
-    return parquet_reader->init_reader(
-            read_table_col_names, col_name_to_block_idx, conjuncts, slot_id_to_predicates,
-            tuple_descriptor, row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts,
-            slot_id_to_filter_conjuncts, table_info_node_ptr, true, column_ids, filter_column_ids);
+    _filter_groups = true;
+    return Status::OK();
 }
 
 ColumnIdResult HiveParquetReader::_create_column_ids(const FieldDescriptor* field_desc,
diff --git a/be/src/format/table/hive_reader.h b/be/src/format/table/hive_reader.h
index c741b434f166dc..32c4101993dfe3 100644
--- a/be/src/format/table/hive_reader.h
+++ b/be/src/format/table/hive_reader.h
@@ -21,50 +21,25 @@
 
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 namespace doris {
 #include "common/compile_check_begin.h"
 
-// By holding a parquet/orc reader, used to read the parquet/orc table of hive.
-class HiveReader : public TableFormatReader, public TableSchemaChangeHelper {
-public:
-    HiveReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-               RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range,
-               io::IOContext* io_ctx, const std::set<TSlotId>* is_file_slot,
-               FileMetaCache* meta_cache)
-            : TableFormatReader(std::move(file_format_reader), state, profile, params, range,
-                                io_ctx, meta_cache),
-              _is_file_slot(is_file_slot) {};
-
-    ~HiveReader() override = default;
-
-    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
-
-    Status init_row_filters() final { return Status::OK(); };
-
-protected:
-    // https://github.com/apache/doris/pull/23369
-    const std::set<TSlotId>* _is_file_slot = nullptr;
-};
-
-class HiveOrcReader final : public HiveReader {
+class HiveOrcReader final : public OrcReader, public TableSchemaChangeHelper {
 public:
     ENABLE_FACTORY_CREATOR(HiveOrcReader);
-    HiveOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                  RuntimeState* state, const TFileScanRangeParams& params,
-                  const TFileRangeDesc& range, io::IOContext* io_ctx,
-                  const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
-            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
-                         is_file_slot, meta_cache) {};
+    HiveOrcReader(RuntimeProfile* profile, RuntimeState* state, const TFileScanRangeParams& params,
+                  const TFileRangeDesc& range, size_t batch_size, const std::string& ctz,
+                  io::IOContext* io_ctx, const std::set<TSlotId>* is_file_slot,
+                  FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true)
+            : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache,
+                        enable_lazy_mat),
+              _is_file_slot(is_file_slot) {}
+
     ~HiveOrcReader() final = default;
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
 private:
     static ColumnIdResult _create_column_ids(const orc::Type* orc_type,
@@ -72,29 +47,26 @@ class HiveOrcReader final : public HiveReader {
 
     static ColumnIdResult _create_column_ids_by_top_level_col_index(
             const orc::Type* orc_type, const TupleDescriptor* tuple_descriptor);
+
+    const std::set<TSlotId>* _is_file_slot = nullptr;
 };
 
-class HiveParquetReader final : public HiveReader {
+class HiveParquetReader final : public ParquetReader, public TableSchemaChangeHelper {
 public:
     ENABLE_FACTORY_CREATOR(HiveParquetReader);
-    HiveParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                      RuntimeState* state, const TFileScanRangeParams& params,
-                      const TFileRangeDesc& range, io::IOContext* io_ctx,
-                      const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
-            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
-                         is_file_slot, meta_cache) {};
+    HiveParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
+                      const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
+                      io::IOContext* io_ctx, RuntimeState* state,
+                      const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache = nullptr,
+                      bool enable_lazy_mat = true)
+            : ParquetReader(profile, params, range, batch_size, ctz, io_ctx, state, meta_cache,
+                            enable_lazy_mat),
+              _is_file_slot(is_file_slot) {}
+
     ~HiveParquetReader() final = default;
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts,
-            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                    slot_id_to_predicates,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
 private:
     static ColumnIdResult _create_column_ids(const FieldDescriptor* field_desc,
@@ -102,6 +74,8 @@ class HiveParquetReader final : public HiveReader {
 
     static ColumnIdResult _create_column_ids_by_top_level_col_index(
             const FieldDescriptor* field_desc, const TupleDescriptor* tuple_descriptor);
+
+    const std::set<TSlotId>* _is_file_slot = nullptr;
 };
 #include "common/compile_check_end.h"
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/format/table/hudi_jni_reader.h b/be/src/format/table/hudi_jni_reader.h
index 47bc6bc8de2df0..514cfe68171068 100644
--- a/be/src/format/table/hudi_jni_reader.h
+++ b/be/src/format/table/hudi_jni_reader.h
@@ -50,6 +50,9 @@ class HudiJniReader : public JniReader {
     ~HudiJniReader() override = default;
 
     Status init_reader();
+
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
 };
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/hudi_reader.cpp b/be/src/format/table/hudi_reader.cpp
index 2e296d158adef1..631b32d368b1c3 100644
--- a/be/src/format/table/hudi_reader.cpp
+++ b/be/src/format/table/hudi_reader.cpp
@@ -20,39 +20,61 @@
 #include <vector>
 
 #include "common/status.h"
-#include "runtime/runtime_state.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
 
-Status HudiReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) {
-    RETURN_IF_ERROR(_file_format_reader->get_next_block(block, read_rows, eof));
-    return Status::OK();
-};
-
-Status HudiParquetReader::init_reader(
-        const std::vector<std::string>& read_table_col_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts,
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                slot_id_to_predicates,
-        const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    auto* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
+// ============================================================================
+// HudiParquetReader: on_before_init_reader
+// ============================================================================
+Status HudiParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    // Get parquet file metadata schema (file already opened by init_reader)
     const FieldDescriptor* field_desc = nullptr;
-    RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&field_desc));
+    RETURN_IF_ERROR(get_file_metadata_schema(&field_desc));
     DCHECK(field_desc != nullptr);
 
-    auto parquet_fields_schema = field_desc->get_fields_schema();
+    // Build table_info_node using field_id matching (shared with Paimon/Iceberg)
     RETURN_IF_ERROR(gen_table_info_node_by_field_id(
-            _params, _range.table_format_params.hudi_params.schema_id, tuple_descriptor,
-            *field_desc));
-    return parquet_reader->init_reader(read_table_col_names, col_name_to_block_idx, conjuncts,
-                                       slot_id_to_predicates, tuple_descriptor, row_descriptor,
-                                       colname_to_slot_id, not_single_slot_filter_conjuncts,
-                                       slot_id_to_filter_conjuncts, table_info_node_ptr);
+            get_scan_params(), get_scan_range().table_format_params.hudi_params.schema_id,
+            get_tuple_descriptor(), *field_desc));
+    ctx->table_info_node = table_info_node_ptr;
+
+    // Extract column names from descriptors
+    for (const auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
+    return Status::OK();
+}
+
+// ============================================================================
+// HudiOrcReader: on_before_init_reader
+// ============================================================================
+Status HudiOrcReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    // Get ORC file type (file already opened by init_reader)
+    const orc::Type* orc_type_ptr = nullptr;
+    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
+
+    // Build table_info_node using field_id matching
+    RETURN_IF_ERROR(gen_table_info_node_by_field_id(
+            get_scan_params(), get_scan_range().table_format_params.hudi_params.schema_id,
+            get_tuple_descriptor(), orc_type_ptr));
+    ctx->table_info_node = table_info_node_ptr;
+
+    // Extract column names from descriptors
+    for (const auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
+    return Status::OK();
 }
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/table/hudi_reader.h b/be/src/format/table/hudi_reader.h
index 319c6c5af05f60..c3628ac6044a01 100644
--- a/be/src/format/table/hudi_reader.h
+++ b/be/src/format/table/hudi_reader.h
@@ -20,76 +20,42 @@
 
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 namespace doris {
 #include "common/compile_check_begin.h"
-class HudiReader : public TableFormatReader, public TableSchemaChangeHelper {
-public:
-    HudiReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-               RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range,
-               io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : TableFormatReader(std::move(file_format_reader), state, profile, params, range,
-                                io_ctx, meta_cache) {};
-
-    ~HudiReader() override = default;
-
-    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
 
-    Status init_row_filters() final { return Status::OK(); };
-};
-
-class HudiParquetReader final : public HudiReader {
+// HudiParquetReader: directly inherits ParquetReader (no composition wrapping).
+// Schema mapping is done in on_before_init_reader hook via field_id matching.
+class HudiParquetReader final : public ParquetReader, public TableSchemaChangeHelper {
 public:
     ENABLE_FACTORY_CREATOR(HudiParquetReader);
-    HudiParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                      RuntimeState* state, const TFileScanRangeParams& params,
-                      const TFileRangeDesc& range, io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : HudiReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
-                         meta_cache) {};
+    HudiParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
+                      const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
+                      io::IOContext* io_ctx, RuntimeState* state,
+                      FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true)
+            : ParquetReader(profile, params, range, batch_size, ctz, io_ctx, state, meta_cache,
+                            enable_lazy_mat) {}
     ~HudiParquetReader() final = default;
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts,
-            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                    slot_id_to_predicates,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 };
 
-class HudiOrcReader final : public HudiReader {
+// HudiOrcReader: directly inherits OrcReader (no composition wrapping).
+class HudiOrcReader final : public OrcReader, public TableSchemaChangeHelper {
 public:
     ENABLE_FACTORY_CREATOR(HudiOrcReader);
-    HudiOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                  RuntimeState* state, const TFileScanRangeParams& params,
-                  const TFileRangeDesc& range, io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : HudiReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
-                         meta_cache) {};
+    HudiOrcReader(RuntimeProfile* profile, RuntimeState* state, const TFileScanRangeParams& params,
+                  const TFileRangeDesc& range, size_t batch_size, const std::string& ctz,
+                  io::IOContext* io_ctx, FileMetaCache* meta_cache = nullptr,
+                  bool enable_lazy_mat = true)
+            : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache,
+                        enable_lazy_mat) {}
     ~HudiOrcReader() final = default;
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-        auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
-        const orc::Type* orc_type_ptr = nullptr;
-        RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
-        RETURN_IF_ERROR(gen_table_info_node_by_field_id(
-                _params, _range.table_format_params.hudi_params.schema_id, tuple_descriptor,
-                orc_type_ptr));
-
-        return orc_reader->init_reader(&read_table_col_names, col_name_to_block_idx, conjuncts,
-                                       false, tuple_descriptor, row_descriptor,
-                                       not_single_slot_filter_conjuncts,
-                                       slot_id_to_filter_conjuncts, table_info_node_ptr);
-    }
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 };
 
 #include "common/compile_check_end.h"
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.cpp b/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
index da9ad8168106a2..c2f0593b3f59b1 100644
--- a/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
+++ b/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
@@ -24,7 +24,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/generic_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "orc/Type.hh"
 
 namespace doris {
diff --git a/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.h b/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.h
index 142dee706b89f6..cc5761854736fc 100644
--- a/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.h
+++ b/be/src/format/table/iceberg/iceberg_orc_nested_column_utils.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace orc {
 class Type;
diff --git a/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp b/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
index e84cc3700d0f32..a9ad8f27d0c6bc 100644
--- a/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
+++ b/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
@@ -26,7 +26,7 @@
 #include <vector>
 
 #include "format/parquet/schema_desc.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
diff --git a/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.h b/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.h
index 5d16d1053c898e..fd47ed37c69fe8 100644
--- a/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.h
+++ b/be/src/format/table/iceberg/iceberg_parquet_nested_column_utils.h
@@ -23,7 +23,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
diff --git a/be/src/format/table/iceberg_delete_file_reader_helper.cpp b/be/src/format/table/iceberg_delete_file_reader_helper.cpp
index 55525a0635cc29..2e7045c81ad551 100644
--- a/be/src/format/table/iceberg_delete_file_reader_helper.cpp
+++ b/be/src/format/table/iceberg_delete_file_reader_helper.cpp
@@ -34,7 +34,6 @@
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
 #include "exec/common/endian.h"
-#include "exprs/vexpr_context.h"
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_column_chunk_reader.h"
 #include "format/parquet/vparquet_reader.h"
@@ -42,7 +41,6 @@
 #include "format/table/iceberg_reader.h"
 #include "format/table/table_format_reader.h"
 #include "io/hdfs_builder.h"
-#include "runtime/descriptors.h"
 #include "runtime/runtime_state.h"
 #include "storage/predicate/column_predicate.h"
 
@@ -121,16 +119,11 @@ Status init_parquet_delete_reader(ParquetReader* reader, bool* dictionary_coded)
         return Status::InvalidArgument("invalid parquet delete reader arguments");
     }
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> slot_id_to_predicates;
-    RETURN_IF_ERROR(reader->init_reader(DELETE_COL_NAMES, &DELETE_COL_NAME_TO_BLOCK_IDX, {},
-                                        slot_id_to_predicates, nullptr, nullptr, nullptr, nullptr,
-                                        nullptr, TableSchemaChangeHelper::ConstNode::get_instance(),
-                                        false));
-
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    RETURN_IF_ERROR(reader->set_fill_columns(partition_columns, missing_columns));
+    ParquetInitContext ctx;
+    ctx.column_names = DELETE_COL_NAMES;
+    ctx.col_name_to_block_idx = &DELETE_COL_NAME_TO_BLOCK_IDX;
+    ctx.filter_groups = false;
+    RETURN_IF_ERROR(reader->init_reader(&ctx));
 
     const tparquet::FileMetaData* meta_data = reader->get_meta_data();
     *dictionary_coded = true;
@@ -150,14 +143,10 @@ Status init_orc_delete_reader(OrcReader* reader) {
         return Status::InvalidArgument("orc delete reader is null");
     }
 
-    RETURN_IF_ERROR(reader->init_reader(&DELETE_COL_NAMES, &DELETE_COL_NAME_TO_BLOCK_IDX, {}, false,
-                                        nullptr, nullptr, nullptr, nullptr,
-                                        TableSchemaChangeHelper::ConstNode::get_instance()));
-
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    RETURN_IF_ERROR(reader->set_fill_columns(partition_columns, missing_columns));
+    OrcInitContext ctx;
+    ctx.column_names = DELETE_COL_NAMES;
+    ctx.col_name_to_block_idx = &DELETE_COL_NAME_TO_BLOCK_IDX;
+    RETURN_IF_ERROR(reader->init_reader(&ctx));
     return Status::OK();
 }
 
diff --git a/be/src/format/table/iceberg_reader.cpp b/be/src/format/table/iceberg_reader.cpp
index 574324f4d55a02..b9afea2fb2abd7 100644
--- a/be/src/format/table/iceberg_reader.cpp
+++ b/be/src/format/table/iceberg_reader.cpp
@@ -29,15 +29,20 @@
 #include <cstring>
 #include <functional>
 #include <memory>
-#include <set>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/consts.h"
 #include "common/status.h"
 #include "core/assert_cast.h"
 #include "core/block/block.h"
 #include "core/block/column_with_type_and_name.h"
 #include "core/column/column.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
 #include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/define_primitive_type.h"
+#include "core/data_type/primitive_type.h"
+#include "core/string_ref.h"
 #include "exprs/aggregate/aggregate_function.h"
 #include "format/format_common.h"
 #include "format/generic_reader.h"
@@ -47,9 +52,8 @@
 #include "format/table/deletion_vector_reader.h"
 #include "format/table/iceberg/iceberg_orc_nested_column_utils.h"
 #include "format/table/iceberg/iceberg_parquet_nested_column_utils.h"
-#include "format/table/iceberg_delete_file_reader_helper.h"
 #include "format/table/nested_column_access_helper.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 #include "runtime/runtime_state.h"
 #include "util/coding.h"
 
@@ -69,42 +73,6 @@ class VExprContext;
 } // namespace doris
 
 namespace doris {
-namespace {
-
-class GroupedDeleteRowsVisitor final : public IcebergPositionDeleteVisitor {
-public:
-    using DeleteRows = std::vector<int64_t>;
-    using DeleteFile = phmap::parallel_flat_hash_map<
-            std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>, std::equal_to<>,
-            std::allocator<std::pair<const std::string, std::unique_ptr<DeleteRows>>>, 8,
-            std::mutex>;
-
-    explicit GroupedDeleteRowsVisitor(DeleteFile* position_delete)
-            : _position_delete(position_delete) {}
-
-    Status visit(const std::string& file_path, int64_t pos) override {
-        if (_position_delete == nullptr) {
-            return Status::InvalidArgument("position delete map is null");
-        }
-
-        auto iter = _position_delete->find(file_path);
-        DeleteRows* delete_rows = nullptr;
-        if (iter == _position_delete->end()) {
-            delete_rows = new DeleteRows;
-            (*_position_delete)[file_path] = std::unique_ptr<DeleteRows>(delete_rows);
-        } else {
-            delete_rows = iter->second.get();
-        }
-        delete_rows->push_back(pos);
-        return Status::OK();
-    }
-
-private:
-    DeleteFile* _position_delete;
-};
-
-} // namespace
-
 const std::string IcebergOrcReader::ICEBERG_ORC_ATTRIBUTE = "iceberg.id";
 
 bool IcebergTableReader::_is_fully_dictionary_encoded(
@@ -157,461 +125,185 @@ bool IcebergTableReader::_is_fully_dictionary_encoded(
     return true;
 }
 
-IcebergTableReader::IcebergTableReader(std::unique_ptr<GenericReader> file_format_reader,
-                                       RuntimeProfile* profile, RuntimeState* state,
-                                       const TFileScanRangeParams& params,
-                                       const TFileRangeDesc& range, ShardedKVCache* kv_cache,
-                                       io::IOContext* io_ctx, FileMetaCache* meta_cache)
-        : TableFormatReader(std::move(file_format_reader), state, profile, params, range, io_ctx,
-                            meta_cache),
-          _kv_cache(kv_cache) {
-    static const char* iceberg_profile = "IcebergProfile";
-    ADD_TIMER(_profile, iceberg_profile);
-    _iceberg_profile.num_delete_files =
-            ADD_CHILD_COUNTER(_profile, "NumDeleteFiles", TUnit::UNIT, iceberg_profile);
-    _iceberg_profile.num_delete_rows =
-            ADD_CHILD_COUNTER(_profile, "NumDeleteRows", TUnit::UNIT, iceberg_profile);
-    _iceberg_profile.delete_files_read_time =
-            ADD_CHILD_TIMER(_profile, "DeleteFileReadTime", iceberg_profile);
-    _iceberg_profile.delete_rows_sort_time =
-            ADD_CHILD_TIMER(_profile, "DeleteRowsSortTime", iceberg_profile);
-    _iceberg_profile.parse_delete_file_time =
-            ADD_CHILD_TIMER(_profile, "ParseDeleteFileTime", iceberg_profile);
-}
-
-Status IcebergTableReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) {
-    RETURN_IF_ERROR(_expand_block_if_need(block));
-
-    RETURN_IF_ERROR(_file_format_reader->get_next_block(block, read_rows, eof));
+// ============================================================================
+// IcebergParquetReader: on_before_init_reader (Parquet-specific schema matching)
+// ============================================================================
+Status IcebergParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    _file_format = Fileformat::PARQUET;
 
-    if (_equality_delete_impls.size() > 0) {
-        std::unique_ptr<IColumn::Filter> filter =
-                std::make_unique<IColumn::Filter>(block->rows(), 1);
-        for (auto& equality_delete_impl : _equality_delete_impls) {
-            RETURN_IF_ERROR(equality_delete_impl->filter_data_block(
-                    block, _col_name_to_block_idx, _id_to_block_column_name, *filter));
+    // Get file metadata schema first (available because _open_file() already ran)
+    const FieldDescriptor* field_desc = nullptr;
+    RETURN_IF_ERROR(this->get_file_metadata_schema(&field_desc));
+    DCHECK(field_desc != nullptr);
+
+    // Build table_info_node by field_id or name matching.
+    // This must happen BEFORE column classification so we can use children_column_exists
+    // to check if a column exists in the file (by field ID, not name).
+    if (!get_scan_params().__isset.history_schema_info ||
+        get_scan_params().history_schema_info.empty()) [[unlikely]] {
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(ctx->tuple_descriptor, *field_desc,
+                                                            ctx->table_info_node));
+    } else {
+        bool exist_field_id = true;
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_field_id(
+                get_scan_params().history_schema_info.front().root_field, *field_desc,
+                ctx->table_info_node, exist_field_id));
+        if (!exist_field_id) {
+            RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(ctx->tuple_descriptor, *field_desc,
+                                                                ctx->table_info_node));
+        }
+    }
+
+    std::unordered_set<std::string> partition_col_names;
+    if (ctx->range->__isset.columns_from_path_keys) {
+        partition_col_names.insert(ctx->range->columns_from_path_keys.begin(),
+                                   ctx->range->columns_from_path_keys.end());
+    }
+
+    // Single pass: classify columns, detect $row_id, handle partition fallback.
+    bool has_partition_from_path = false;
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::SYNTHESIZED &&
+            desc.name == BeConsts::ICEBERG_ROWID_COL) {
+            _need_row_id_column = true;
+            this->register_synthesized_column_handler(BeConsts::ICEBERG_ROWID_COL,
+                                                      [this](Block* block, size_t rows) -> Status {
+                                                          return _fill_iceberg_row_id(block, rows);
+                                                      });
+            continue;
+        }
+        if (desc.category == ColumnCategory::REGULAR) {
+            // Partition fallback: if column is a partition key and NOT in the file
+            // (checked via field ID matching in table_info_node), read from path instead.
+            if (partition_col_names.contains(desc.name) &&
+                !ctx->table_info_node->children_column_exists(desc.name)) {
+                if (config::enable_iceberg_partition_column_fallback) {
+                    desc.category = ColumnCategory::PARTITION_KEY;
+                    has_partition_from_path = true;
+                    continue;
+                }
+            }
+            ctx->column_names.push_back(desc.name);
+        } else if (desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
         }
-        Block::filter_block_internal(block, *filter, block->columns());
     }
 
-    *read_rows = block->rows();
-    return _shrink_block_if_need(block);
-}
-
-Status IcebergTableReader::init_row_filters() {
-    // We get the count value by doris's be, so we don't need to read the delete file
-    if (_push_down_agg_type == TPushAggOp::type::COUNT && _table_level_row_count > 0) {
-        return Status::OK();
+    // Set up partition value extraction if any partition columns need filling from path
+    if (has_partition_from_path) {
+        RETURN_IF_ERROR(_extract_partition_values(*ctx->range, ctx->tuple_descriptor,
+                                                  _fill_partition_values));
     }
 
-    const auto& table_desc = _range.table_format_params.iceberg_params;
-    const auto& version = table_desc.format_version;
-    if (version < MIN_SUPPORT_DELETE_FILES_VERSION) {
-        return Status::OK();
-    }
+    _all_required_col_names = ctx->column_names;
 
-    auto* parquet_reader = dynamic_cast<ParquetReader*>(_file_format_reader.get());
-    auto* orc_reader = dynamic_cast<OrcReader*>(_file_format_reader.get());
-
-    // Initialize file information for $row_id generation
-    // Extract from table_desc which contains current file's metadata
-    if (_need_row_id_column) {
-        std::string file_path = table_desc.original_file_path;
-        int32_t partition_spec_id = 0;
-        std::string partition_data_json;
-        if (table_desc.__isset.partition_spec_id) {
-            partition_spec_id = table_desc.partition_spec_id;
-        }
-        if (table_desc.__isset.partition_data_json) {
-            partition_data_json = table_desc.partition_data_json;
-        }
+    // Create column IDs from field descriptor
+    auto column_id_result = _create_column_ids(field_desc, ctx->tuple_descriptor);
+    ctx->column_ids = std::move(column_id_result.column_ids);
+    ctx->filter_column_ids = std::move(column_id_result.filter_column_ids);
 
-        if (parquet_reader != nullptr) {
-            parquet_reader->set_iceberg_rowid_params(file_path, partition_spec_id,
-                                                     partition_data_json, _row_id_column_position);
-        } else if (orc_reader != nullptr) {
-            orc_reader->set_iceberg_rowid_params(file_path, partition_spec_id, partition_data_json,
-                                                 _row_id_column_position);
-        }
-        LOG(INFO) << "Initialized $row_id generation for file: " << file_path
-                  << ", partition_spec_id: " << partition_spec_id;
+    // Build field_id -> block_column_name mapping for equality delete filtering.
+    // This was previously done in init_reader() column matching (pre-CRTP refactoring).
+    for (const auto* slot : ctx->tuple_descriptor->slots()) {
+        _id_to_block_column_name.emplace(slot->col_unique_id(), slot->col_name());
     }
 
-    std::vector<TIcebergDeleteFileDesc> position_delete_files;
-    std::vector<TIcebergDeleteFileDesc> equality_delete_files;
-    std::vector<TIcebergDeleteFileDesc> deletion_vector_files;
-    for (const TIcebergDeleteFileDesc& desc : table_desc.delete_files) {
-        if (desc.content == POSITION_DELETE) {
-            position_delete_files.emplace_back(desc);
-        } else if (desc.content == EQUALITY_DELETE) {
-            equality_delete_files.emplace_back(desc);
-        } else if (desc.content == DELETION_VECTOR) {
-            deletion_vector_files.emplace_back(desc);
-        }
-    }
+    // Process delete files (must happen before _do_init_reader so expand col IDs are included)
+    RETURN_IF_ERROR(_init_row_filters());
 
-    if (!equality_delete_files.empty()) {
-        RETURN_IF_ERROR(_process_equality_delete(equality_delete_files));
-        _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE);
+    // Add expand column IDs for equality delete and remap expand column names
+    // to match master's behavior:
+    // - Use field_id to find the actual file column name in Parquet schema
+    // - Prefix with __equality_delete_column__ to avoid name conflicts
+    // - Correctly map table_col_name → file_col_name in table_info_node
+    const static std::string EQ_DELETE_PRE = "__equality_delete_column__";
+    std::unordered_map<int, std::string> field_id_to_file_col_name;
+    for (int i = 0; i < field_desc->size(); ++i) {
+        auto field_schema = field_desc->get_column(i);
+        if (field_schema) {
+            field_id_to_file_col_name[field_schema->field_id] = field_schema->name;
+        }
     }
 
-    if (!deletion_vector_files.empty()) {
-        if (deletion_vector_files.size() != 1) [[unlikely]] {
-            /*
-             * Deletion vectors are a binary representation of deletes for a single data file that is more efficient
-             * at execution time than position delete files. Unlike equality or position delete files, there can be
-             * at most one deletion vector for a given data file in a snapshot.
-             */
-            return Status::DataQualityError("This iceberg data file has multiple DVs.");
+    // Rebuild _expand_col_names with proper file-column-based names
+    std::vector<std::string> new_expand_col_names;
+    for (size_t i = 0; i < _expand_col_names.size(); ++i) {
+        const auto& old_name = _expand_col_names[i];
+        // Find the field_id for this expand column
+        int field_id = -1;
+        for (auto& [fid, name] : _id_to_block_column_name) {
+            if (name == old_name) {
+                field_id = fid;
+                break;
+            }
         }
-        RETURN_IF_ERROR(
-                read_deletion_vector(table_desc.original_file_path, deletion_vector_files[0]));
-
-        _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE);
-        // Readers can safely ignore position delete files if there is a DV for a data file.
-    } else if (!position_delete_files.empty()) {
-        RETURN_IF_ERROR(
-                _position_delete_base(table_desc.original_file_path, position_delete_files));
-        _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE);
-    }
 
-    COUNTER_UPDATE(_iceberg_profile.num_delete_files, table_desc.delete_files.size());
-    return Status::OK();
-}
+        std::string file_col_name = old_name;
+        auto it = field_id_to_file_col_name.find(field_id);
+        if (it != field_id_to_file_col_name.end()) {
+            file_col_name = it->second;
+        }
 
-void IcebergTableReader::_generate_equality_delete_block(
-        Block* block, const std::vector<std::string>& equality_delete_col_names,
-        const std::vector<DataTypePtr>& equality_delete_col_types) {
-    for (int i = 0; i < equality_delete_col_names.size(); ++i) {
-        DataTypePtr data_type = make_nullable(equality_delete_col_types[i]);
-        MutableColumnPtr data_column = data_type->create_column();
-        block->insert(ColumnWithTypeAndName(std::move(data_column), data_type,
-                                            equality_delete_col_names[i]));
-    }
-}
+        std::string table_col_name = EQ_DELETE_PRE + file_col_name;
 
-Status IcebergTableReader::_expand_block_if_need(Block* block) {
-    std::set<std::string> names;
-    auto block_names = block->get_names();
-    names.insert(block_names.begin(), block_names.end());
-    for (auto& col : _expand_columns) {
-        col.column->assume_mutable()->clear();
-        if (names.contains(col.name)) {
-            return Status::InternalError("Wrong expand column '{}'", col.name);
+        // Update _id_to_block_column_name
+        if (field_id >= 0) {
+            _id_to_block_column_name[field_id] = table_col_name;
         }
-        names.insert(col.name);
-        (*_col_name_to_block_idx)[col.name] = static_cast<uint32_t>(block->columns());
-        block->insert(col);
-    }
-    return Status::OK();
-}
 
-Status IcebergTableReader::_shrink_block_if_need(Block* block) {
-    std::set<size_t> positions_to_erase;
-    for (const std::string& expand_col : _expand_col_names) {
-        if (!_col_name_to_block_idx->contains(expand_col)) {
-            return Status::InternalError("Wrong erase column '{}', block: {}", expand_col,
-                                         block->dump_names());
+        // Update _expand_columns name
+        if (i < _expand_columns.size()) {
+            _expand_columns[i].name = table_col_name;
         }
-        positions_to_erase.emplace((*_col_name_to_block_idx)[expand_col]);
-    }
-    block->erase(positions_to_erase);
-    for (const std::string& expand_col : _expand_col_names) {
-        _col_name_to_block_idx->erase(expand_col);
-    }
-    return Status::OK();
-}
 
-Status IcebergTableReader::_position_delete_base(
-        const std::string data_file_path, const std::vector<TIcebergDeleteFileDesc>& delete_files) {
-    std::vector<DeleteRows*> delete_rows_array;
-    int64_t num_delete_rows = 0;
-    for (const auto& delete_file : delete_files) {
-        SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
-        Status create_status = Status::OK();
-        auto* delete_file_cache = _kv_cache->get<DeleteFile>(
-                _delet_file_cache_key(delete_file.path), [&]() -> DeleteFile* {
-                    auto* position_delete = new DeleteFile;
-                    create_status = _read_position_delete_file(delete_file, position_delete);
-
-                    if (!create_status) {
-                        return nullptr;
-                    }
-
-                    return position_delete;
-                });
-        if (create_status.is<ErrorCode::END_OF_FILE>()) {
-            continue;
-        } else if (!create_status.ok()) {
-            return create_status;
-        }
+        new_expand_col_names.push_back(table_col_name);
 
-        DeleteFile& delete_file_map = *((DeleteFile*)delete_file_cache);
-        auto get_value = [&](const auto& v) {
-            DeleteRows* row_ids = v.second.get();
-            if (!row_ids->empty()) {
-                delete_rows_array.emplace_back(row_ids);
-                num_delete_rows += row_ids->size();
+        // Add column IDs
+        if (it != field_id_to_file_col_name.end()) {
+            for (int j = 0; j < field_desc->size(); ++j) {
+                auto field_schema = field_desc->get_column(j);
+                if (field_schema && field_schema->field_id == field_id) {
+                    ctx->column_ids.insert(field_schema->get_column_id());
+                    break;
+                }
             }
-        };
-        delete_file_map.if_contains(data_file_path, get_value);
-    }
-    // Use a KV cache to store the delete rows corresponding to a data file path.
-    // The Parquet/ORC reader holds a reference (pointer) to this cached entry.
-    // This allows delete rows to be reused when a single data file is split into
-    // multiple splits, avoiding excessive memory usage when delete rows are large.
-    if (num_delete_rows > 0) {
-        SCOPED_TIMER(_iceberg_profile.delete_rows_sort_time);
-        _iceberg_delete_rows =
-                _kv_cache->get<DeleteRows>(data_file_path,
-                                           [&]() -> DeleteRows* {
-                                               auto* data_file_position_delete = new DeleteRows;
-                                               _sort_delete_rows(delete_rows_array, num_delete_rows,
-                                                                 *data_file_position_delete);
-
-                                               return data_file_position_delete;
-                                           }
-
-                );
-        set_delete_rows();
-        COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows);
-    }
-    return Status::OK();
-}
-
-Status IcebergTableReader::_read_position_delete_file(const TIcebergDeleteFileDesc& delete_file,
-                                                      DeleteFile* position_delete) {
-    GroupedDeleteRowsVisitor visitor(position_delete);
-    IcebergDeleteFileReaderOptions options;
-    options.state = _state;
-    options.profile = _profile;
-    options.scan_params = &_params;
-    options.io_ctx = _io_ctx;
-    options.meta_cache = _meta_cache;
-    options.fs_name = &_range.fs_name;
-    options.batch_size = READ_DELETE_FILE_BATCH_SIZE;
-    return read_iceberg_position_delete_file(delete_file, options, &visitor);
-}
+        }
 
-/**
- * https://iceberg.apache.org/spec/#position-delete-files
- * The rows in the delete file must be sorted by file_path then position to optimize filtering rows while scanning.
- * Sorting by file_path allows filter pushdown by file in columnar storage formats.
- * Sorting by position allows filtering rows while scanning, to avoid keeping deletes in memory.
- */
-void IcebergTableReader::_sort_delete_rows(
-        const std::vector<std::vector<int64_t>*>& delete_rows_array, int64_t num_delete_rows,
-        std::vector<int64_t>& result) {
-    if (delete_rows_array.empty()) {
-        return;
-    }
-    if (delete_rows_array.size() == 1) {
-        result.resize(num_delete_rows);
-        memcpy(result.data(), delete_rows_array.front()->data(), sizeof(int64_t) * num_delete_rows);
-        return;
-    }
-    if (delete_rows_array.size() == 2) {
-        result.resize(num_delete_rows);
-        std::merge(delete_rows_array.front()->begin(), delete_rows_array.front()->end(),
-                   delete_rows_array.back()->begin(), delete_rows_array.back()->end(),
-                   result.begin());
-        return;
+        // Register in table_info_node: table_col_name → file_col_name
+        ctx->column_names.push_back(table_col_name);
+        ctx->table_info_node->add_children(table_col_name, file_col_name,
+                                           TableSchemaChangeHelper::ConstNode::get_instance());
     }
+    _expand_col_names = std::move(new_expand_col_names);
 
-    using vec_pair = std::pair<std::vector<int64_t>::iterator, std::vector<int64_t>::iterator>;
-    result.resize(num_delete_rows);
-    auto row_id_iter = result.begin();
-    auto iter_end = result.end();
-    std::vector<vec_pair> rows_array;
-    for (auto* rows : delete_rows_array) {
-        if (!rows->empty()) {
-            rows_array.emplace_back(rows->begin(), rows->end());
-        }
+    // Debug logging
+    for (const auto& name : _expand_col_names) {
+        LOG(INFO) << "[EqDeleteDebug] final expand col: " << name;
     }
-    size_t array_size = rows_array.size();
-    while (row_id_iter != iter_end) {
-        int64_t min_index = 0;
-        int64_t min = *rows_array[0].first;
-        for (size_t i = 0; i < array_size; ++i) {
-            if (*rows_array[i].first < min) {
-                min_index = i;
-                min = *rows_array[i].first;
-            }
-        }
-        *row_id_iter++ = min;
-        rows_array[min_index].first++;
-        if (UNLIKELY(rows_array[min_index].first == rows_array[min_index].second)) {
-            rows_array.erase(rows_array.begin() + min_index);
-            array_size--;
-        }
-    }
-}
-
-Status IcebergParquetReader::init_reader(
-        const std::vector<std::string>& file_col_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts,
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                slot_id_to_predicates,
-        const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    _file_format = Fileformat::PARQUET;
-    _col_name_to_block_idx = col_name_to_block_idx;
-    auto* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
-    RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&_data_file_field_desc));
-    DCHECK(_data_file_field_desc != nullptr);
-    if (_row_lineage_columns != nullptr) {
-        const auto& table_desc = _range.table_format_params.iceberg_params;
-        _row_lineage_columns->first_row_id =
-                table_desc.__isset.first_row_id ? table_desc.first_row_id : -1;
-        _row_lineage_columns->last_updated_sequence_number =
-                table_desc.__isset.last_updated_sequence_number
-                        ? table_desc.last_updated_sequence_number
-                        : -1;
-        parquet_reader->set_row_lineage_columns(_row_lineage_columns);
+    for (auto& [fid, name] : _id_to_block_column_name) {
+        LOG(INFO) << "[EqDeleteDebug] final _id_to_block_column_name[" << fid << "] = " << name;
     }
 
-    auto column_id_result = _create_column_ids(_data_file_field_desc, tuple_descriptor);
-    auto& column_ids = column_id_result.column_ids;
-    const auto& filter_column_ids = column_id_result.filter_column_ids;
-
-    RETURN_IF_ERROR(init_row_filters());
-    _all_required_col_names = file_col_names;
-
-    if (!_params.__isset.history_schema_info || _params.history_schema_info.empty()) [[unlikely]] {
-        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(
-                tuple_descriptor, *_data_file_field_desc, table_info_node_ptr));
-    } else {
-        std::set<std::string> read_col_name_set(file_col_names.begin(), file_col_names.end());
+    // Enable group filtering for Iceberg
+    _filter_groups = true;
 
-        bool exist_field_id = true;
-        for (int idx = 0; idx < _data_file_field_desc->size(); idx++) {
-            if (_data_file_field_desc->get_column(idx)->field_id == -1) {
-                // the data file may be from hive table migrated to iceberg, field id is missing
-                exist_field_id = false;
-                break;
-            }
-        }
-        const auto& table_schema = _params.history_schema_info.front().root_field;
-
-        table_info_node_ptr = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        if (exist_field_id) {
-            // id -> table column name. columns that need read data file.
-            std::unordered_map<int, std::shared_ptr<schema::external::TField>> id_to_table_field;
-            for (const auto& table_field : table_schema.fields) {
-                auto field = table_field.field_ptr;
-                DCHECK(field->__isset.name);
-                if (!read_col_name_set.contains(field->name)) {
-                    continue;
-                }
-                id_to_table_field.emplace(field->id, field);
-            }
-
-            for (int idx = 0; idx < _data_file_field_desc->size(); idx++) {
-                const auto& data_file_field = _data_file_field_desc->get_column(idx);
-                auto data_file_column_id = _data_file_field_desc->get_column(idx)->field_id;
-
-                if (id_to_table_field.contains(data_file_column_id)) {
-                    const auto& table_field = id_to_table_field[data_file_column_id];
-
-                    std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                    RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_field_id(
-                            *table_field, *data_file_field, exist_field_id, field_node));
-                    table_info_node_ptr->add_children(table_field->name, data_file_field->name,
-                                                      field_node);
-
-                    _id_to_block_column_name.emplace(data_file_column_id, table_field->name);
-                    id_to_table_field.erase(data_file_column_id);
-                } else if (_equality_delete_col_ids.contains(data_file_column_id)) {
-                    // Columns that need to be read for equality delete.
-                    const static std::string EQ_DELETE_PRE = "__equality_delete_column__";
-
-                    // Construct table column names that avoid duplication with current table schema.
-                    // As the columns currently being read may have been deleted in the latest
-                    // table structure or have undergone a series of schema changes...
-                    std::string table_column_name = EQ_DELETE_PRE + data_file_field->name;
-                    table_info_node_ptr->add_children(
-                            table_column_name, data_file_field->name,
-                            std::make_shared<TableSchemaChangeHelper::ConstNode>());
-
-                    _id_to_block_column_name.emplace(data_file_column_id, table_column_name);
-                    _expand_col_names.emplace_back(table_column_name);
-                    auto expand_data_type = make_nullable(data_file_field->data_type);
-                    _expand_columns.emplace_back(
-                            ColumnWithTypeAndName {expand_data_type->create_column(),
-                                                   expand_data_type, table_column_name});
-
-                    _all_required_col_names.emplace_back(table_column_name);
-                    column_ids.insert(data_file_field->get_column_id());
-                }
-            }
-            for (const auto& [id, table_field] : id_to_table_field) {
-                table_info_node_ptr->add_not_exist_children(table_field->name);
-            }
-        } else {
-            if (!_equality_delete_col_ids.empty()) [[unlikely]] {
-                return Status::InternalError(
-                        "Can not read missing field id data file when have equality delete");
-            }
-            std::map<std::string, size_t> file_column_idx_map;
-            for (size_t idx = 0; idx < _data_file_field_desc->size(); idx++) {
-                file_column_idx_map.emplace(_data_file_field_desc->get_column(idx)->name, idx);
-            }
-
-            for (const auto& table_field : table_schema.fields) {
-                DCHECK(table_field.__isset.field_ptr);
-                DCHECK(table_field.field_ptr->__isset.name);
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (!read_col_name_set.contains(table_column_name)) {
-                    continue;
-                }
-                if (!table_field.field_ptr->__isset.name_mapping ||
-                    table_field.field_ptr->name_mapping.size() == 0) {
-                    return Status::DataQualityError(
-                            "name_mapping must be set when read missing field id data file.");
-                }
-                bool have_mapping = false;
-                for (const auto& mapped_name : table_field.field_ptr->name_mapping) {
-                    if (file_column_idx_map.contains(mapped_name)) {
-                        std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                        const auto& file_field = _data_file_field_desc->get_column(
-                                file_column_idx_map.at(mapped_name));
-                        RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_field_id(
-                                *table_field.field_ptr, *file_field, exist_field_id, field_node));
-                        table_info_node_ptr->add_children(table_column_name, file_field->name,
-                                                          field_node);
-                        have_mapping = true;
-                        break;
-                    }
-                }
-                if (!have_mapping) {
-                    table_info_node_ptr->add_not_exist_children(table_column_name);
-                }
-            }
-        }
-    }
-
-    return parquet_reader->init_reader(
-            _all_required_col_names, _col_name_to_block_idx, conjuncts, slot_id_to_predicates,
-            tuple_descriptor, row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts,
-            slot_id_to_filter_conjuncts, table_info_node_ptr, true, column_ids, filter_column_ids);
+    return Status::OK();
 }
 
+// ============================================================================
+// IcebergParquetReader: _create_column_ids
+// ============================================================================
 ColumnIdResult IcebergParquetReader::_create_column_ids(const FieldDescriptor* field_desc,
                                                         const TupleDescriptor* tuple_descriptor) {
-    // First, assign column IDs to the field descriptor
     auto* mutable_field_desc = const_cast<FieldDescriptor*>(field_desc);
     mutable_field_desc->assign_ids();
 
-    // map top-level table column iceberg_id -> FieldSchema*
     std::unordered_map<int, const FieldSchema*> iceberg_id_to_field_schema_map;
-
     for (int i = 0; i < field_desc->size(); ++i) {
         auto field_schema = field_desc->get_column(i);
         if (!field_schema) continue;
-
         int iceberg_id = field_schema->field_id;
         iceberg_id_to_field_schema_map[iceberg_id] = field_schema;
     }
@@ -619,7 +311,6 @@ ColumnIdResult IcebergParquetReader::_create_column_ids(const FieldDescriptor* f
     std::set<uint64_t> column_ids;
     std::set<uint64_t> filter_column_ids;
 
-    // helper to process access paths for a given top-level parquet field
     auto process_access_paths = [](const FieldSchema* parquet_field,
                                    const std::vector<TColumnAccessPath>& access_paths,
                                    std::set<uint64_t>& out_ids) {
@@ -633,23 +324,19 @@ ColumnIdResult IcebergParquetReader::_create_column_ids(const FieldDescriptor* f
     for (const auto* slot : tuple_descriptor->slots()) {
         auto it = iceberg_id_to_field_schema_map.find(slot->col_unique_id());
         if (it == iceberg_id_to_field_schema_map.end()) {
-            // Column not found in file (e.g., partition column, added column)
             continue;
         }
         auto field_schema = it->second;
 
-        // primitive (non-nested) types: direct mapping by name
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY &&
              slot->col_type() != TYPE_MAP)) {
             column_ids.insert(field_schema->column_id);
-
             if (slot->is_predicate()) {
                 filter_column_ids.insert(field_schema->column_id);
             }
             continue;
         }
 
-        // complex types:
         const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(field_schema, all_access_paths, column_ids);
 
@@ -661,173 +348,209 @@ ColumnIdResult IcebergParquetReader::_create_column_ids(const FieldDescriptor* f
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
 }
 
-Status IcebergOrcReader::init_reader(
-        const std::vector<std::string>& file_col_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-        const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    _file_format = Fileformat::ORC;
-    _col_name_to_block_idx = col_name_to_block_idx;
-    auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
-    RETURN_IF_ERROR(orc_reader->get_file_type(&_data_file_type_desc));
-    std::vector<std::string> data_file_col_names;
-    std::vector<DataTypePtr> data_file_col_types;
-    RETURN_IF_ERROR(orc_reader->get_parsed_schema(&data_file_col_names, &data_file_col_types));
-    if (_row_lineage_columns != nullptr) {
-        const auto& table_desc = _range.table_format_params.iceberg_params;
-        _row_lineage_columns->first_row_id =
-                table_desc.__isset.first_row_id ? table_desc.first_row_id : -1;
-        _row_lineage_columns->last_updated_sequence_number =
-                table_desc.__isset.last_updated_sequence_number
-                        ? table_desc.last_updated_sequence_number
-                        : -1;
-        orc_reader->set_row_lineage_columns(_row_lineage_columns);
+// ============================================================================
+// IcebergParquetReader: _read_position_delete_file
+// ============================================================================
+Status IcebergParquetReader::_read_position_delete_file(const TFileRangeDesc* delete_range,
+                                                        DeleteFile* position_delete) {
+    ParquetReader parquet_delete_reader(get_profile(), get_scan_params(), *delete_range,
+                                        READ_DELETE_FILE_BATCH_SIZE, &get_state()->timezone_obj(),
+                                        get_io_ctx(), get_state(), _meta_cache);
+    // The delete file range has size=-1 (read whole file). We must disable
+    // row group filtering before init; otherwise _do_init_reader returns EndOfFile
+    // when _filter_groups && _range_size < 0.
+    ParquetInitContext delete_ctx;
+    delete_ctx.filter_groups = false;
+    delete_ctx.column_names = delete_file_col_names;
+    delete_ctx.col_name_to_block_idx =
+            const_cast<std::unordered_map<std::string, uint32_t>*>(&DELETE_COL_NAME_TO_BLOCK_IDX);
+    RETURN_IF_ERROR(parquet_delete_reader.init_reader(&delete_ctx));
+
+    const tparquet::FileMetaData* meta_data = parquet_delete_reader.get_meta_data();
+    bool dictionary_coded = true;
+    for (const auto& row_group : meta_data->row_groups) {
+        const auto& column_chunk = row_group.columns[ICEBERG_FILE_PATH_INDEX];
+        if (!(column_chunk.__isset.meta_data && has_dict_page(column_chunk.meta_data))) {
+            dictionary_coded = false;
+            break;
+        }
     }
+    DataTypePtr data_type_file_path {new DataTypeString};
+    DataTypePtr data_type_pos {new DataTypeInt64};
+    bool eof = false;
+    while (!eof) {
+        Block block = {dictionary_coded
+                               ? ColumnWithTypeAndName {ColumnDictI32::create(
+                                                                FieldType::OLAP_FIELD_TYPE_VARCHAR),
+                                                        data_type_file_path, ICEBERG_FILE_PATH}
+                               : ColumnWithTypeAndName {data_type_file_path, ICEBERG_FILE_PATH},
 
-    auto column_id_result = _create_column_ids(_data_file_type_desc, tuple_descriptor);
-    auto& column_ids = column_id_result.column_ids;
-    const auto& filter_column_ids = column_id_result.filter_column_ids;
+                       {data_type_pos, ICEBERG_ROW_POS}};
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(parquet_delete_reader.get_next_block(&block, &read_rows, &eof));
 
-    RETURN_IF_ERROR(init_row_filters());
+        if (read_rows <= 0) {
+            break;
+        }
+        _gen_position_delete_file_range(block, position_delete, read_rows, dictionary_coded);
+    }
+    return Status::OK();
+};
 
-    _all_required_col_names = file_col_names;
-    if (!_params.__isset.history_schema_info || _params.history_schema_info.empty()) [[unlikely]] {
-        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(tuple_descriptor, _data_file_type_desc,
-                                                        table_info_node_ptr));
-    } else {
-        std::set<std::string> read_col_name_set(file_col_names.begin(), file_col_names.end());
+// ============================================================================
+// IcebergOrcReader: on_before_init_reader (ORC-specific schema matching)
+// ============================================================================
+Status IcebergOrcReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    _file_format = Fileformat::ORC;
 
+    // Get ORC file type first (available because _create_file_reader() already ran)
+    const orc::Type* orc_type_ptr = nullptr;
+    RETURN_IF_ERROR(this->get_file_type(&orc_type_ptr));
+
+    // Build table_info_node by field_id or name matching.
+    // This must happen BEFORE column classification so we can use children_column_exists
+    // to check if a column exists in the file (by field ID, not name).
+    if (!get_scan_params().__isset.history_schema_info ||
+        get_scan_params().history_schema_info.empty()) [[unlikely]] {
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(ctx->tuple_descriptor, orc_type_ptr,
+                                                        ctx->table_info_node));
+    } else {
         bool exist_field_id = true;
-        for (size_t idx = 0; idx < _data_file_type_desc->getSubtypeCount(); idx++) {
-            if (!_data_file_type_desc->getSubtype(idx)->hasAttributeKey(ICEBERG_ORC_ATTRIBUTE)) {
-                exist_field_id = false;
-                break;
-            }
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_field_id(
+                get_scan_params().history_schema_info.front().root_field, orc_type_ptr,
+                ICEBERG_ORC_ATTRIBUTE, ctx->table_info_node, exist_field_id));
+        if (!exist_field_id) {
+            RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(ctx->tuple_descriptor, orc_type_ptr,
+                                                            ctx->table_info_node));
+        }
+    }
+
+    std::unordered_set<std::string> partition_col_names;
+    if (ctx->range->__isset.columns_from_path_keys) {
+        partition_col_names.insert(ctx->range->columns_from_path_keys.begin(),
+                                   ctx->range->columns_from_path_keys.end());
+    }
+
+    // Single pass: classify columns, detect $row_id, handle partition fallback.
+    bool has_partition_from_path = false;
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::SYNTHESIZED &&
+            desc.name == BeConsts::ICEBERG_ROWID_COL) {
+            _need_row_id_column = true;
+            this->register_synthesized_column_handler(BeConsts::ICEBERG_ROWID_COL,
+                                                      [this](Block* block, size_t rows) -> Status {
+                                                          return _fill_iceberg_row_id(block, rows);
+                                                      });
+            continue;
         }
-
-        const auto& table_schema = _params.history_schema_info.front().root_field;
-        table_info_node_ptr = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        if (exist_field_id) {
-            // id -> table column name. columns that need read data file.
-            std::unordered_map<int, std::shared_ptr<schema::external::TField>> id_to_table_field;
-            for (const auto& table_field : table_schema.fields) {
-                auto field = table_field.field_ptr;
-                DCHECK(field->__isset.name);
-                if (!read_col_name_set.contains(field->name)) {
+        if (desc.category == ColumnCategory::REGULAR) {
+            // Partition fallback: if column is a partition key and NOT in the file
+            // (checked via field ID matching in table_info_node), read from path instead.
+            if (partition_col_names.contains(desc.name) &&
+                !ctx->table_info_node->children_column_exists(desc.name)) {
+                if (config::enable_iceberg_partition_column_fallback) {
+                    desc.category = ColumnCategory::PARTITION_KEY;
+                    has_partition_from_path = true;
                     continue;
                 }
-
-                id_to_table_field.emplace(field->id, field);
             }
+            ctx->column_names.push_back(desc.name);
+        } else if (desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
 
-            for (int idx = 0; idx < _data_file_type_desc->getSubtypeCount(); idx++) {
-                const auto& data_file_field = _data_file_type_desc->getSubtype(idx);
-                auto data_file_column_id =
-                        std::stoi(data_file_field->getAttributeValue(ICEBERG_ORC_ATTRIBUTE));
-                auto const& file_column_name = _data_file_type_desc->getFieldName(idx);
-
-                if (id_to_table_field.contains(data_file_column_id)) {
-                    const auto& table_field = id_to_table_field[data_file_column_id];
-
-                    std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                    RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_field_id(
-                            *table_field, data_file_field, ICEBERG_ORC_ATTRIBUTE, exist_field_id,
-                            field_node));
-                    table_info_node_ptr->add_children(table_field->name, file_column_name,
-                                                      field_node);
-
-                    _id_to_block_column_name.emplace(data_file_column_id, table_field->name);
-                    id_to_table_field.erase(data_file_column_id);
-                } else if (_equality_delete_col_ids.contains(data_file_column_id)) {
-                    // Columns that need to be read for equality delete.
-                    const static std::string EQ_DELETE_PRE = "__equality_delete_column__";
-
-                    // Construct table column names that avoid duplication with current table schema.
-                    // As the columns currently being read may have been deleted in the latest
-                    // table structure or have undergone a series of schema changes...
-                    std::string table_column_name = EQ_DELETE_PRE + file_column_name;
-                    table_info_node_ptr->add_children(
-                            table_column_name, file_column_name,
-                            std::make_shared<TableSchemaChangeHelper::ConstNode>());
-
-                    _id_to_block_column_name.emplace(data_file_column_id, table_column_name);
-                    _expand_col_names.emplace_back(table_column_name);
-
-                    auto expand_data_type = make_nullable(data_file_col_types[idx]);
-                    _expand_columns.emplace_back(
-                            ColumnWithTypeAndName {expand_data_type->create_column(),
-                                                   expand_data_type, table_column_name});
-
-                    _all_required_col_names.emplace_back(table_column_name);
-                    column_ids.insert(data_file_field->getColumnId());
-                }
-            }
-            for (const auto& [id, table_field] : id_to_table_field) {
-                table_info_node_ptr->add_not_exist_children(table_field->name);
-            }
-        } else {
-            if (!_equality_delete_col_ids.empty()) [[unlikely]] {
-                return Status::InternalError(
-                        "Can not read missing field id data file when have equality delete");
-            }
-            std::map<std::string, size_t> file_column_idx_map;
-            for (int idx = 0; idx < _data_file_type_desc->getSubtypeCount(); idx++) {
-                auto const& file_column_name = _data_file_type_desc->getFieldName(idx);
-                file_column_idx_map.emplace(file_column_name, idx);
+    if (has_partition_from_path) {
+        RETURN_IF_ERROR(_extract_partition_values(*ctx->range, ctx->tuple_descriptor,
+                                                  _fill_partition_values));
+    }
+
+    _all_required_col_names = ctx->column_names;
+
+    // Create column IDs from ORC type
+    auto column_id_result = _create_column_ids(orc_type_ptr, ctx->tuple_descriptor);
+    ctx->column_ids = std::move(column_id_result.column_ids);
+    ctx->filter_column_ids = std::move(column_id_result.filter_column_ids);
+
+    // Build field_id -> block_column_name mapping for equality delete filtering.
+    for (const auto* slot : ctx->tuple_descriptor->slots()) {
+        _id_to_block_column_name.emplace(slot->col_unique_id(), slot->col_name());
+    }
+
+    // Process delete files (must happen before _do_init_reader so expand col IDs are included)
+    RETURN_IF_ERROR(_init_row_filters());
+
+    // Add expand column IDs for equality delete and remap expand column names
+    // (matching master's behavior with __equality_delete_column__ prefix)
+    const static std::string EQ_DELETE_PRE = "__equality_delete_column__";
+    std::unordered_map<int, std::string> field_id_to_file_col_name;
+    for (uint64_t i = 0; i < orc_type_ptr->getSubtypeCount(); ++i) {
+        std::string col_name = orc_type_ptr->getFieldName(i);
+        const orc::Type* sub_type = orc_type_ptr->getSubtype(i);
+        if (sub_type->hasAttributeKey(ICEBERG_ORC_ATTRIBUTE)) {
+            int fid = std::stoi(sub_type->getAttributeValue(ICEBERG_ORC_ATTRIBUTE));
+            field_id_to_file_col_name[fid] = col_name;
+        }
+    }
+
+    std::vector<std::string> new_expand_col_names;
+    for (size_t i = 0; i < _expand_col_names.size(); ++i) {
+        const auto& old_name = _expand_col_names[i];
+        int field_id = -1;
+        for (auto& [fid, name] : _id_to_block_column_name) {
+            if (name == old_name) {
+                field_id = fid;
+                break;
             }
+        }
 
-            for (const auto& table_field : table_schema.fields) {
-                DCHECK(table_field.__isset.field_ptr);
-                DCHECK(table_field.field_ptr->__isset.name);
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (!read_col_name_set.contains(table_column_name)) {
-                    continue;
-                }
-                if (!table_field.field_ptr->__isset.name_mapping ||
-                    table_field.field_ptr->name_mapping.size() == 0) {
-                    return Status::DataQualityError(
-                            "name_mapping must be set when read missing field id data file.");
-                }
-                auto have_mapping = false;
-                for (const auto& mapped_name : table_field.field_ptr->name_mapping) {
-                    if (file_column_idx_map.contains(mapped_name)) {
-                        auto file_column_idx = file_column_idx_map.at(mapped_name);
-                        std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                        const auto& file_field = _data_file_type_desc->getSubtype(file_column_idx);
-                        RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_field_id(
-                                *table_field.field_ptr, file_field, ICEBERG_ORC_ATTRIBUTE,
-                                exist_field_id, field_node));
-                        table_info_node_ptr->add_children(
-                                table_column_name,
-                                _data_file_type_desc->getFieldName(file_column_idx), field_node);
-                        have_mapping = true;
-                        break;
-                    }
-                }
-                if (!have_mapping) {
-                    table_info_node_ptr->add_not_exist_children(table_column_name);
+        std::string file_col_name = old_name;
+        auto it = field_id_to_file_col_name.find(field_id);
+        if (it != field_id_to_file_col_name.end()) {
+            file_col_name = it->second;
+        }
+
+        std::string table_col_name = EQ_DELETE_PRE + file_col_name;
+
+        if (field_id >= 0) {
+            _id_to_block_column_name[field_id] = table_col_name;
+        }
+        if (i < _expand_columns.size()) {
+            _expand_columns[i].name = table_col_name;
+        }
+        new_expand_col_names.push_back(table_col_name);
+
+        // Add column IDs
+        if (it != field_id_to_file_col_name.end()) {
+            for (uint64_t j = 0; j < orc_type_ptr->getSubtypeCount(); ++j) {
+                const orc::Type* sub_type = orc_type_ptr->getSubtype(j);
+                if (orc_type_ptr->getFieldName(j) == file_col_name) {
+                    ctx->column_ids.insert(sub_type->getColumnId());
+                    break;
                 }
             }
         }
+
+        ctx->column_names.push_back(table_col_name);
+        ctx->table_info_node->add_children(table_col_name, file_col_name,
+                                           TableSchemaChangeHelper::ConstNode::get_instance());
     }
+    _expand_col_names = std::move(new_expand_col_names);
 
-    return orc_reader->init_reader(&_all_required_col_names, _col_name_to_block_idx, conjuncts,
-                                   false, tuple_descriptor, row_descriptor,
-                                   not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts,
-                                   table_info_node_ptr, column_ids, filter_column_ids);
+    return Status::OK();
 }
 
+// ============================================================================
+// IcebergOrcReader: _create_column_ids
+// ============================================================================
 ColumnIdResult IcebergOrcReader::_create_column_ids(const orc::Type* orc_type,
                                                     const TupleDescriptor* tuple_descriptor) {
-    // map top-level table column iceberg_id -> orc::Type*
     std::unordered_map<int, const orc::Type*> iceberg_id_to_orc_type_map;
     for (uint64_t i = 0; i < orc_type->getSubtypeCount(); ++i) {
         auto orc_sub_type = orc_type->getSubtype(i);
         if (!orc_sub_type) continue;
-
         if (!orc_sub_type->hasAttributeKey(ICEBERG_ORC_ATTRIBUTE)) {
             continue;
         }
@@ -838,7 +561,6 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const orc::Type* orc_type,
     std::set<uint64_t> column_ids;
     std::set<uint64_t> filter_column_ids;
 
-    // helper to process access paths for a given top-level orc field
     auto process_access_paths = [](const orc::Type* orc_field,
                                    const std::vector<TColumnAccessPath>& access_paths,
                                    std::set<uint64_t>& out_ids) {
@@ -852,12 +574,10 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const orc::Type* orc_type,
     for (const auto* slot : tuple_descriptor->slots()) {
         auto it = iceberg_id_to_orc_type_map.find(slot->col_unique_id());
         if (it == iceberg_id_to_orc_type_map.end()) {
-            // Column not found in file
             continue;
         }
         const orc::Type* orc_field = it->second;
 
-        // primitive (non-nested) types
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY &&
              slot->col_type() != TYPE_MAP)) {
             column_ids.insert(orc_field->getColumnId());
@@ -867,7 +587,6 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const orc::Type* orc_type,
             continue;
         }
 
-        // complex types
         const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(orc_field, all_access_paths, column_ids);
 
@@ -880,378 +599,33 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const orc::Type* orc_type,
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
 }
 
-// Directly read the deletion vector using the `content_offset` and
-// `content_size_in_bytes` provided by FE in `delete_file_desc`.
-// These two fields indicate the location of a blob in storage.
-// Since the current format is `deletion-vector-v1`, which does not
-// compress any blobs, we can temporarily skip parsing the Puffin footer.
-Status IcebergTableReader::read_deletion_vector(const std::string& data_file_path,
-                                                const TIcebergDeleteFileDesc& delete_file_desc) {
-    Status create_status = Status::OK();
-    SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
-    _iceberg_delete_rows = _kv_cache->get<DeleteRows>(data_file_path, [&]() -> DeleteRows* {
-        auto* delete_rows = new DeleteRows;
-
-        TFileRangeDesc delete_range;
-        // must use __set() method to make sure __isset is true
-        delete_range.__set_fs_name(_range.fs_name);
-        delete_range.path = delete_file_desc.path;
-        delete_range.start_offset = delete_file_desc.content_offset;
-        delete_range.size = delete_file_desc.content_size_in_bytes;
-        delete_range.file_size = -1;
-
-        // We may consider caching the DeletionVectorReader when reading Puffin files,
-        // where the underlying reader is an `InMemoryFileReader` and a single data file is
-        // split into multiple splits. However, we need to ensure that the underlying
-        // reader supports multi-threaded access.
-        DeletionVectorReader dv_reader(_state, _profile, _params, delete_range, _io_ctx);
-        create_status = dv_reader.open();
-        if (!create_status.ok()) [[unlikely]] {
-            return nullptr;
-        }
-
-        size_t buffer_size = delete_range.size;
-        std::vector<char> buf(buffer_size);
-        if (buffer_size < 12) [[unlikely]] {
-            // Minimum size: 4 bytes length + 4 bytes magic + 4 bytes CRC32
-            create_status = Status::DataQualityError("Deletion vector file size too small: {}",
-                                                     buffer_size);
-            return nullptr;
-        }
-
-        create_status = dv_reader.read_at(delete_range.start_offset, {buf.data(), buffer_size});
-        if (!create_status) [[unlikely]] {
-            return nullptr;
-        }
-        // The serialized blob contains:
-        //
-        // Combined length of the vector and magic bytes stored as 4 bytes, big-endian
-        // A 4-byte magic sequence, D1 D3 39 64
-        // The vector, serialized as described below
-        // A CRC-32 checksum of the magic bytes and serialized vector as 4 bytes, big-endian
-
-        auto total_length = BigEndian::Load32(buf.data());
-        if (total_length + 8 != buffer_size) [[unlikely]] {
-            create_status = Status::DataQualityError(
-                    "Deletion vector length mismatch, expected: {}, actual: {}", total_length + 8,
-                    buffer_size);
-            return nullptr;
-        }
-
-        constexpr static char MAGIC_NUMBER[] = {'\xD1', '\xD3', '\x39', '\x64'};
-        if (memcmp(buf.data() + sizeof(total_length), MAGIC_NUMBER, 4)) [[unlikely]] {
-            create_status = Status::DataQualityError("Deletion vector magic number mismatch");
-            return nullptr;
-        }
-
-        roaring::Roaring64Map bitmap;
-        SCOPED_TIMER(_iceberg_profile.parse_delete_file_time);
-        try {
-            bitmap = roaring::Roaring64Map::readSafe(buf.data() + 8, buffer_size - 12);
-        } catch (const std::runtime_error& e) {
-            create_status = Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
-            return nullptr;
-        }
-        // skip CRC-32 checksum
-
-        delete_rows->reserve(bitmap.cardinality());
-        for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
-            delete_rows->push_back(*it);
-        }
-        COUNTER_UPDATE(_iceberg_profile.num_delete_rows, delete_rows->size());
-        return delete_rows;
-    });
-
-    RETURN_IF_ERROR(create_status);
-    if (!_iceberg_delete_rows->empty()) [[likely]] {
-        set_delete_rows();
-    }
-    return Status::OK();
-}
-
-// Similar to the code structure of IcebergOrcReader::_process_equality_delete,
-// but considering the significant differences in how parquet/orc obtains
-// attributes/column IDs, it is not easy to combine them.
-Status IcebergParquetReader::_process_equality_delete(
-        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-
-    std::map<int, const FieldSchema*> data_file_id_to_field_schema;
-    for (int idx = 0; idx < _data_file_field_desc->size(); ++idx) {
-        auto field_schema = _data_file_field_desc->get_column(idx);
-        if (_data_file_field_desc->get_column(idx)->field_id == -1) {
-            return Status::DataQualityError("Iceberg equality delete data file missing field id.");
-        }
-        data_file_id_to_field_schema[_data_file_field_desc->get_column(idx)->field_id] =
-                field_schema;
-    }
-
-    for (const auto& delete_file : delete_files) {
-        TFileRangeDesc delete_desc;
-        // must use __set() method to make sure __isset is true
-        delete_desc.__set_fs_name(_range.fs_name);
-        delete_desc.path = delete_file.path;
-        delete_desc.start_offset = 0;
-        delete_desc.size = -1;
-        delete_desc.file_size = -1;
-
-        if (!delete_file.__isset.field_ids) [[unlikely]] {
-            return Status::InternalError(
-                    "missing delete field ids when reading equality delete file");
-        }
-        auto& read_column_field_ids = delete_file.field_ids;
-        std::set<int> read_column_field_ids_set;
-        for (const auto& field_id : read_column_field_ids) {
-            read_column_field_ids_set.insert(field_id);
-            _equality_delete_col_ids.insert(field_id);
-        }
-
-        auto delete_reader = ParquetReader::create_unique(
-                _profile, _params, delete_desc, READ_DELETE_FILE_BATCH_SIZE,
-                &_state->timezone_obj(), _io_ctx, _state, _meta_cache);
-        RETURN_IF_ERROR(delete_reader->init_schema_reader());
-
-        // the column that to read equality delete file.
-        // (delete file may be have extra columns that don't need to read)
-        std::vector<std::string> delete_col_names;
-        std::vector<DataTypePtr> delete_col_types;
-        std::vector<int> delete_col_ids;
-        std::unordered_map<std::string, uint32_t> delete_col_name_to_block_idx;
-
-        const FieldDescriptor* delete_field_desc = nullptr;
-        RETURN_IF_ERROR(delete_reader->get_file_metadata_schema(&delete_field_desc));
-        DCHECK(delete_field_desc != nullptr);
-
-        auto eq_file_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        for (const auto& delete_file_field : delete_field_desc->get_fields_schema()) {
-            if (delete_file_field.field_id == -1) [[unlikely]] { // missing delete_file_field id
-                // equality delete file must have delete_file_field id to match column.
-                return Status::DataQualityError(
-                        "missing delete_file_field id when reading equality delete file");
-            } else if (read_column_field_ids_set.contains(delete_file_field.field_id)) {
-                // the column that need to read.
-                if (delete_file_field.children.size() > 0) [[unlikely]] { // complex column
-                    return Status::InternalError(
-                            "can not support read complex column in equality delete file");
-                } else if (!data_file_id_to_field_schema.contains(delete_file_field.field_id))
-                        [[unlikely]] {
-                    return Status::DataQualityError(
-                            "can not find delete field id in data file schema when reading "
-                            "equality delete file");
-                }
-                auto data_file_field = data_file_id_to_field_schema[delete_file_field.field_id];
-                if (data_file_field->data_type->get_primitive_type() !=
-                    delete_file_field.data_type->get_primitive_type()) [[unlikely]] {
-                    return Status::NotSupported(
-                            "Not Support type change in equality delete, field: {}, delete "
-                            "file type: {}, data file type: {}",
-                            delete_file_field.field_id, delete_file_field.data_type->get_name(),
-                            data_file_field->data_type->get_name());
-                }
-
-                std::string filed_lower_name = to_lower(delete_file_field.name);
-                eq_file_node->add_children(filed_lower_name, delete_file_field.name,
-                                           std::make_shared<TableSchemaChangeHelper::ScalarNode>());
-
-                delete_col_ids.emplace_back(delete_file_field.field_id);
-                delete_col_names.emplace_back(filed_lower_name);
-                delete_col_types.emplace_back(make_nullable(delete_file_field.data_type));
-
-                read_column_field_ids_set.erase(delete_file_field.field_id);
-            } else {
-                // delete file may be have extra columns that don't need to read
-            }
-        }
-        if (!read_column_field_ids_set.empty()) [[unlikely]] {
-            return Status::DataQualityError("some field ids not found in equality delete file.");
-        }
-
-        for (uint32_t idx = 0; idx < delete_col_names.size(); ++idx) {
-            delete_col_name_to_block_idx[delete_col_names[idx]] = idx;
-        }
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-        RETURN_IF_ERROR(delete_reader->init_reader(delete_col_names, &delete_col_name_to_block_idx,
-                                                   {}, tmp, nullptr, nullptr, nullptr, nullptr,
-                                                   nullptr, eq_file_node, false));
-        RETURN_IF_ERROR(delete_reader->set_fill_columns(partition_columns, missing_columns));
-
-        if (!_equality_delete_block_map.contains(delete_col_ids)) {
-            _equality_delete_block_map.emplace(delete_col_ids, _equality_delete_blocks.size());
-            Block block;
-            _generate_equality_delete_block(&block, delete_col_names, delete_col_types);
-            _equality_delete_blocks.emplace_back(block);
-        }
-        Block& eq_file_block = _equality_delete_blocks[_equality_delete_block_map[delete_col_ids]];
-        bool eof = false;
-        while (!eof) {
-            Block tmp_block;
-            _generate_equality_delete_block(&tmp_block, delete_col_names, delete_col_types);
-            size_t read_rows = 0;
-            RETURN_IF_ERROR(delete_reader->get_next_block(&tmp_block, &read_rows, &eof));
-            if (read_rows > 0) {
-                MutableBlock mutable_block(&eq_file_block);
-                RETURN_IF_ERROR(mutable_block.merge(tmp_block));
-            }
-        }
-    }
-
-    for (const auto& [delete_col_ids, block_idx] : _equality_delete_block_map) {
-        auto& eq_file_block = _equality_delete_blocks[block_idx];
-        auto equality_delete_impl =
-                EqualityDeleteBase::get_delete_impl(&eq_file_block, delete_col_ids);
-        RETURN_IF_ERROR(equality_delete_impl->init(_profile));
-        _equality_delete_impls.emplace_back(std::move(equality_delete_impl));
+// ============================================================================
+// IcebergOrcReader: _read_position_delete_file
+// ============================================================================
+Status IcebergOrcReader::_read_position_delete_file(const TFileRangeDesc* delete_range,
+                                                    DeleteFile* position_delete) {
+    OrcReader orc_delete_reader(get_profile(), get_state(), get_scan_params(), *delete_range,
+                                READ_DELETE_FILE_BATCH_SIZE, get_state()->timezone(), get_io_ctx(),
+                                _meta_cache);
+    OrcInitContext delete_ctx;
+    delete_ctx.column_names = delete_file_col_names;
+    delete_ctx.col_name_to_block_idx =
+            const_cast<std::unordered_map<std::string, uint32_t>*>(&DELETE_COL_NAME_TO_BLOCK_IDX);
+    RETURN_IF_ERROR(orc_delete_reader.init_reader(&delete_ctx));
+
+    bool eof = false;
+    DataTypePtr data_type_file_path {new DataTypeString};
+    DataTypePtr data_type_pos {new DataTypeInt64};
+    while (!eof) {
+        Block block = {{data_type_file_path, ICEBERG_FILE_PATH}, {data_type_pos, ICEBERG_ROW_POS}};
+
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(orc_delete_reader.get_next_block(&block, &read_rows, &eof));
+
+        _gen_position_delete_file_range(block, position_delete, read_rows, false);
     }
     return Status::OK();
 }
 
-Status IcebergOrcReader::_process_equality_delete(
-        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-
-    std::map<int, int> data_file_id_to_field_idx;
-    for (int idx = 0; idx < _data_file_type_desc->getSubtypeCount(); ++idx) {
-        if (!_data_file_type_desc->getSubtype(idx)->hasAttributeKey(ICEBERG_ORC_ATTRIBUTE)) {
-            return Status::DataQualityError("Iceberg equality delete data file missing field id.");
-        }
-        auto field_id = std::stoi(
-                _data_file_type_desc->getSubtype(idx)->getAttributeValue(ICEBERG_ORC_ATTRIBUTE));
-        data_file_id_to_field_idx[field_id] = idx;
-    }
-
-    for (const auto& delete_file : delete_files) {
-        TFileRangeDesc delete_desc;
-        // must use __set() method to make sure __isset is true
-        delete_desc.__set_fs_name(_range.fs_name);
-        delete_desc.path = delete_file.path;
-        delete_desc.start_offset = 0;
-        delete_desc.size = -1;
-        delete_desc.file_size = -1;
-
-        if (!delete_file.__isset.field_ids) [[unlikely]] {
-            return Status::InternalError(
-                    "missing delete field ids when reading equality delete file");
-        }
-        auto& read_column_field_ids = delete_file.field_ids;
-        std::set<int> read_column_field_ids_set;
-        for (const auto& field_id : read_column_field_ids) {
-            read_column_field_ids_set.insert(field_id);
-            _equality_delete_col_ids.insert(field_id);
-        }
-
-        auto delete_reader = OrcReader::create_unique(_profile, _state, _params, delete_desc,
-                                                      READ_DELETE_FILE_BATCH_SIZE,
-                                                      _state->timezone(), _io_ctx, _meta_cache);
-        RETURN_IF_ERROR(delete_reader->init_schema_reader());
-        // delete file schema
-        std::vector<std::string> delete_file_col_names;
-        std::vector<DataTypePtr> delete_file_col_types;
-        RETURN_IF_ERROR(
-                delete_reader->get_parsed_schema(&delete_file_col_names, &delete_file_col_types));
-
-        // the column that to read equality delete file.
-        // (delete file maybe have extra columns that don't need to read)
-        std::vector<std::string> delete_col_names;
-        std::vector<DataTypePtr> delete_col_types;
-        std::vector<int> delete_col_ids;
-        std::unordered_map<std::string, uint32_t> delete_col_name_to_block_idx;
-
-        const orc::Type* delete_field_desc = nullptr;
-        RETURN_IF_ERROR(delete_reader->get_file_type(&delete_field_desc));
-        DCHECK(delete_field_desc != nullptr);
-
-        auto eq_file_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-
-        for (size_t idx = 0; idx < delete_field_desc->getSubtypeCount(); idx++) {
-            auto delete_file_field = delete_field_desc->getSubtype(idx);
-
-            if (!delete_file_field->hasAttributeKey(ICEBERG_ORC_ATTRIBUTE))
-                    [[unlikely]] { // missing delete_file_field id
-                // equality delete file must have delete_file_field id to match column.
-                return Status::DataQualityError(
-                        "missing delete_file_field id when reading equality delete file");
-            } else {
-                auto delete_field_id =
-                        std::stoi(delete_file_field->getAttributeValue(ICEBERG_ORC_ATTRIBUTE));
-                if (read_column_field_ids_set.contains(delete_field_id)) {
-                    // the column that need to read.
-                    if (is_complex_type(delete_file_col_types[idx]->get_primitive_type()))
-                            [[unlikely]] {
-                        return Status::InternalError(
-                                "can not support read complex column in equality delete file.");
-                    } else if (!data_file_id_to_field_idx.contains(delete_field_id)) [[unlikely]] {
-                        return Status::DataQualityError(
-                                "can not find delete field id in data file schema when reading "
-                                "equality delete file");
-                    }
-
-                    auto data_file_field = _data_file_type_desc->getSubtype(
-                            data_file_id_to_field_idx[delete_field_id]);
-
-                    if (delete_file_field->getKind() != data_file_field->getKind()) [[unlikely]] {
-                        return Status::NotSupported(
-                                "Not Support type change in equality delete, field: {}, delete "
-                                "file type: {}, data file type: {}",
-                                delete_field_id, delete_file_field->getKind(),
-                                data_file_field->getKind());
-                    }
-                    std::string filed_lower_name = to_lower(delete_field_desc->getFieldName(idx));
-                    eq_file_node->add_children(
-                            filed_lower_name, delete_field_desc->getFieldName(idx),
-                            std::make_shared<TableSchemaChangeHelper::ScalarNode>());
-
-                    delete_col_ids.emplace_back(delete_field_id);
-                    delete_col_names.emplace_back(filed_lower_name);
-                    delete_col_types.emplace_back(make_nullable(delete_file_col_types[idx]));
-                    read_column_field_ids_set.erase(delete_field_id);
-                }
-            }
-        }
-        if (!read_column_field_ids_set.empty()) [[unlikely]] {
-            return Status::DataQualityError("some field ids not found in equality delete file.");
-        }
-
-        for (uint32_t idx = 0; idx < delete_col_names.size(); ++idx) {
-            delete_col_name_to_block_idx[delete_col_names[idx]] = idx;
-        }
-
-        RETURN_IF_ERROR(delete_reader->init_reader(&delete_col_names, &delete_col_name_to_block_idx,
-                                                   {}, false, nullptr, nullptr, nullptr, nullptr,
-                                                   eq_file_node));
-        RETURN_IF_ERROR(delete_reader->set_fill_columns(partition_columns, missing_columns));
-
-        if (!_equality_delete_block_map.contains(delete_col_ids)) {
-            _equality_delete_block_map.emplace(delete_col_ids, _equality_delete_blocks.size());
-            Block block;
-            _generate_equality_delete_block(&block, delete_col_names, delete_col_types);
-            _equality_delete_blocks.emplace_back(block);
-        }
-        Block& eq_file_block = _equality_delete_blocks[_equality_delete_block_map[delete_col_ids]];
-        bool eof = false;
-        while (!eof) {
-            Block tmp_block;
-            _generate_equality_delete_block(&tmp_block, delete_col_names, delete_col_types);
-            size_t read_rows = 0;
-            RETURN_IF_ERROR(delete_reader->get_next_block(&tmp_block, &read_rows, &eof));
-            if (read_rows > 0) {
-                MutableBlock mutable_block(&eq_file_block);
-                RETURN_IF_ERROR(mutable_block.merge(tmp_block));
-            }
-        }
-    }
-
-    for (const auto& [delete_col_ids, block_idx] : _equality_delete_block_map) {
-        auto& eq_file_block = _equality_delete_blocks[block_idx];
-        auto equality_delete_impl =
-                EqualityDeleteBase::get_delete_impl(&eq_file_block, delete_col_ids);
-        RETURN_IF_ERROR(equality_delete_impl->init(_profile));
-        _equality_delete_impls.emplace_back(std::move(equality_delete_impl));
-    }
-    return Status::OK();
-}
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/iceberg_reader.h b/be/src/format/table/iceberg_reader.h
index e8f33c9ee29006..d21c661f207ad7 100644
--- a/be/src/format/table/iceberg_reader.h
+++ b/be/src/format/table/iceberg_reader.h
@@ -26,15 +26,18 @@
 #include <vector>
 
 #include "common/status.h"
-#include "exprs/vslot_ref.h"
+#include "core/column/column_dictionary.h"
+#include "core/data_type/define_primitive_type.h"
+#include "core/data_type/primitive_type.h"
+#include "core/types.h"
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
-#include "format/table/equality_delete.h"
-#include "format/table/table_format_reader.h"
-#include "storage/olap_scan_common.h"
+#include "format/table/iceberg_reader_mixin.h"
+#include "storage/olap_common.h"
 
 namespace tparquet {
 class KeyValue;
+class ColumnMetaData;
 } // namespace tparquet
 
 namespace doris {
@@ -70,191 +73,92 @@ struct RowLineageColumns {
     }
 };
 
-class IcebergTableReader : public TableFormatReader, public TableSchemaChangeHelper {
-public:
+struct IcebergTableReader {
     static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
     static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER =
             "_last_updated_sequence_number";
 
-    IcebergTableReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                       RuntimeState* state, const TFileScanRangeParams& params,
-                       const TFileRangeDesc& range, ShardedKVCache* kv_cache, io::IOContext* io_ctx,
-                       FileMetaCache* meta_cache);
-    ~IcebergTableReader() override = default;
-
-    void set_need_row_id_column(bool need) { _need_row_id_column = need; }
-    bool need_row_id_column() const { return _need_row_id_column; }
-    void set_row_id_column_position(int position) { _row_id_column_position = position; }
-
-    Status init_row_filters() final;
-
-    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
-
-    enum { DATA, POSITION_DELETE, EQUALITY_DELETE, DELETION_VECTOR };
-    enum Fileformat { NONE, PARQUET, ORC, AVRO };
-
-    virtual void set_delete_rows() = 0;
-
-    bool has_delete_operations() const override {
-        return _equality_delete_impls.size() > 0 || TableFormatReader::has_delete_operations();
-    }
-
-    Status read_deletion_vector(const std::string& data_file_path,
-                                const TIcebergDeleteFileDesc& delete_file_desc);
-
-    void set_row_lineage_columns(std::shared_ptr<RowLineageColumns> row_lineage_columns) {
-        _row_lineage_columns = std::move(row_lineage_columns);
-    }
-
     static bool _is_fully_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata);
-
-protected:
-    struct IcebergProfile {
-        RuntimeProfile::Counter* num_delete_files;
-        RuntimeProfile::Counter* num_delete_rows;
-        RuntimeProfile::Counter* delete_files_read_time;
-        RuntimeProfile::Counter* delete_rows_sort_time;
-        RuntimeProfile::Counter* parse_delete_file_time;
-    };
-    using DeleteRows = std::vector<int64_t>;
-    using DeleteFile = phmap::parallel_flat_hash_map<
-            std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>, std::equal_to<>,
-            std::allocator<std::pair<const std::string, std::unique_ptr<DeleteRows>>>, 8,
-            std::mutex>;
-
-    // $row_id metadata column generation state
-    bool _need_row_id_column = false;
-    int _row_id_column_position = -1;
-    /**
-     * https://iceberg.apache.org/spec/#position-delete-files
-     * The rows in the delete file must be sorted by file_path then position to optimize filtering rows while scanning.
-     * Sorting by file_path allows filter pushdown by file in columnar storage formats.
-     * Sorting by position allows filtering rows while scanning, to avoid keeping deletes in memory.
-     */
-    static void _sort_delete_rows(const std::vector<std::vector<int64_t>*>& delete_rows_array,
-                                  int64_t num_delete_rows, std::vector<int64_t>& result);
-
-    static std::string _delet_file_cache_key(const std::string& path) { return "delete_" + path; }
-
-    Status _position_delete_base(const std::string data_file_path,
-                                 const std::vector<TIcebergDeleteFileDesc>& delete_files);
-    virtual Status _process_equality_delete(
-            const std::vector<TIcebergDeleteFileDesc>& delete_files) = 0;
-    void _generate_equality_delete_block(Block* block,
-                                         const std::vector<std::string>& equality_delete_col_names,
-                                         const std::vector<DataTypePtr>& equality_delete_col_types);
-    // Equality delete should read the primary columns. Add the missing columns
-    Status _expand_block_if_need(Block* block);
-    // Remove the added delete columns
-    Status _shrink_block_if_need(Block* block);
-
-    // owned by scan node
-    ShardedKVCache* _kv_cache;
-    IcebergProfile _iceberg_profile;
-    // _iceberg_delete_rows from kv_cache
-    const std::vector<int64_t>* _iceberg_delete_rows = nullptr;
-
-    // Pointer to external column name to block index mapping (from FileScanner)
-    // Used to dynamically add expand columns for equality delete
-    std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
-
-    Fileformat _file_format = Fileformat::NONE;
-
-    const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
-    const int READ_DELETE_FILE_BATCH_SIZE = 102400;
-
-    // Read a position delete file from the full Iceberg delete descriptor.
-    Status _read_position_delete_file(const TIcebergDeleteFileDesc&, DeleteFile*);
-
-    // read table colummn + extra equality delete columns
-    std::vector<std::string> _all_required_col_names;
-
-    // extra equality delete name and type
-    std::vector<std::string> _expand_col_names;
-    std::vector<ColumnWithTypeAndName> _expand_columns;
-
-    // all ids that need read for eq delete (from all qe delte file.)
-    std::set<int> _equality_delete_col_ids;
-    // eq delete column ids -> location of _equality_delete_blocks / _equality_delete_impls
-    std::map<std::vector<int>, int> _equality_delete_block_map;
-    // EqualityDeleteBase stores raw pointers to these blocks, so do not modify this vector after
-    // creating entries in _equality_delete_impls.
-    std::vector<Block> _equality_delete_blocks;
-    std::vector<std::unique_ptr<EqualityDeleteBase>> _equality_delete_impls;
-
-    // id -> block column name.
-    std::unordered_map<int, std::string> _id_to_block_column_name;
-
-    std::shared_ptr<RowLineageColumns> _row_lineage_columns;
 };
 
-class IcebergParquetReader final : public IcebergTableReader {
+// IcebergParquetReader: inherits ParquetReader via IcebergReaderMixin CRTP
+class IcebergParquetReader final : public IcebergReaderMixin<ParquetReader> {
 public:
     ENABLE_FACTORY_CREATOR(IcebergParquetReader);
 
-    IcebergParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                         RuntimeState* state, const TFileScanRangeParams& params,
-                         const TFileRangeDesc& range, ShardedKVCache* kv_cache,
-                         io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : IcebergTableReader(std::move(file_format_reader), profile, state, params, range,
-                                 kv_cache, io_ctx, meta_cache) {}
-    Status init_reader(
-            const std::vector<std::string>& file_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts,
-            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                    slot_id_to_predicates,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+    IcebergParquetReader(ShardedKVCache* kv_cache, RuntimeProfile* profile,
+                         const TFileScanRangeParams& params, const TFileRangeDesc& range,
+                         size_t batch_size, const cctz::time_zone* ctz, io::IOContext* io_ctx,
+                         RuntimeState* state, FileMetaCache* meta_cache)
+            : IcebergReaderMixin<ParquetReader>(kv_cache, profile, params, range, batch_size, ctz,
+                                                io_ctx, state, meta_cache) {}
 
     void set_delete_rows() final {
-        auto* parquet_reader = (ParquetReader*)(_file_format_reader.get());
-        parquet_reader->set_delete_rows(_iceberg_delete_rows);
+        LOG(INFO) << "[PosDeleteDebug] IcebergParquetReader::set_delete_rows: _iceberg_delete_rows="
+                  << (_iceberg_delete_rows
+                              ? "set(" + std::to_string(_iceberg_delete_rows->size()) + ")"
+                              : "null");
+        // Call ParquetReader's set_delete_rows(const vector<int64_t>*)
+        ParquetReader::set_delete_rows(_iceberg_delete_rows);
+    }
+
+protected:
+    // Parquet-specific schema matching via on_before_init_reader hook
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
+
+    std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) final {
+        return ParquetReader::create_unique(this->get_profile(), this->get_scan_params(),
+                                            delete_desc, READ_DELETE_FILE_BATCH_SIZE,
+                                            &this->get_state()->timezone_obj(), this->get_io_ctx(),
+                                            this->get_state(), this->_meta_cache);
     }
 
-private:
     static ColumnIdResult _create_column_ids(const FieldDescriptor* field_desc,
                                              const TupleDescriptor* tuple_descriptor);
-    Status _process_equality_delete(const std::vector<TIcebergDeleteFileDesc>& delete_files) final;
 
-    const FieldDescriptor* _data_file_field_desc = nullptr;
+private:
+    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+                                      DeleteFile* position_delete) final;
 };
-class IcebergOrcReader final : public IcebergTableReader {
+
+// IcebergOrcReader: inherits OrcReader via IcebergReaderMixin CRTP
+class IcebergOrcReader final : public IcebergReaderMixin<OrcReader> {
 public:
     ENABLE_FACTORY_CREATOR(IcebergOrcReader);
 
-    IcebergOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                     RuntimeState* state, const TFileScanRangeParams& params,
-                     const TFileRangeDesc& range, ShardedKVCache* kv_cache, io::IOContext* io_ctx,
+    IcebergOrcReader(ShardedKVCache* kv_cache, RuntimeProfile* profile, RuntimeState* state,
+                     const TFileScanRangeParams& params, const TFileRangeDesc& range,
+                     size_t batch_size, const std::string& ctz, io::IOContext* io_ctx,
                      FileMetaCache* meta_cache)
-            : IcebergTableReader(std::move(file_format_reader), profile, state, params, range,
-                                 kv_cache, io_ctx, meta_cache) {}
+            : IcebergReaderMixin<OrcReader>(kv_cache, profile, state, params, range, batch_size,
+                                            ctz, io_ctx, meta_cache) {}
 
     void set_delete_rows() final {
-        auto* orc_reader = (OrcReader*)_file_format_reader.get();
-        orc_reader->set_position_delete_rowids(_iceberg_delete_rows);
+        // Call OrcReader's set_position_delete_rowids
+        this->set_position_delete_rowids(_iceberg_delete_rows);
     }
 
-    Status init_reader(
-            const std::vector<std::string>& file_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
-
-private:
-    Status _process_equality_delete(const std::vector<TIcebergDeleteFileDesc>& delete_files) final;
+protected:
+    // ORC-specific schema matching via on_before_init_reader hook
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
+
+    std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) override {
+        return OrcReader::create_unique(this->get_profile(), this->get_state(),
+                                        this->get_scan_params(), delete_desc,
+                                        READ_DELETE_FILE_BATCH_SIZE, this->get_state()->timezone(),
+                                        this->get_io_ctx(), this->_meta_cache);
+    }
 
     static ColumnIdResult _create_column_ids(const orc::Type* orc_type,
                                              const TupleDescriptor* tuple_descriptor);
 
-private:
     static const std::string ICEBERG_ORC_ATTRIBUTE;
-    const orc::Type* _data_file_type_desc = nullptr;
+
+private:
+    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+                                      DeleteFile* position_delete) final;
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h
new file mode 100644
index 00000000000000..f598757ab20f01
--- /dev/null
+++ b/be/src/format/table/iceberg_reader_mixin.h
@@ -0,0 +1,899 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "common/consts.h"
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_dictionary.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "format/generic_reader.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format/table/equality_delete.h"
+#include "format/table/table_schema_change_helper.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/olap_common.h"
+
+namespace doris {
+class TIcebergDeleteFileDesc;
+} // namespace doris
+
+namespace doris {
+#include "common/compile_check_begin.h"
+
+class ShardedKVCache;
+
+// CRTP mixin for Iceberg reader functionality.
+// BaseReader should be ParquetReader or OrcReader.
+// Inherits BaseReader + TableSchemaChangeHelper, providing shared Iceberg logic
+// (delete files, deletion vectors, equality delete, $row_id synthesis).
+//
+// Inheritance chain:
+//   IcebergParquetReader -> IcebergReaderMixin<ParquetReader> -> ParquetReader -> GenericReader
+//   IcebergOrcReader     -> IcebergReaderMixin<OrcReader>     -> OrcReader     -> GenericReader
+template <typename BaseReader>
+class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper {
+public:
+    struct PositionDeleteRange {
+        std::vector<std::string> data_file_path;
+        std::vector<std::pair<int, int>> range;
+    };
+
+    // Forward BaseReader constructor arguments + Iceberg-specific kv_cache
+    template <typename... Args>
+    IcebergReaderMixin(ShardedKVCache* kv_cache, Args&&... args)
+            : BaseReader(std::forward<Args>(args)...), _kv_cache(kv_cache) {
+        // Initialize table-level row count from scan range (replicates master's
+        // GenericReader constructor logic).
+        const auto& range = this->get_scan_range();
+        if (range.table_format_params.__isset.table_level_row_count) {
+            _table_level_row_count = range.table_format_params.table_level_row_count;
+        }
+
+        static const char* iceberg_profile = "IcebergProfile";
+        ADD_TIMER(this->get_profile(), iceberg_profile);
+        _iceberg_profile.num_delete_files = ADD_CHILD_COUNTER(this->get_profile(), "NumDeleteFiles",
+                                                              TUnit::UNIT, iceberg_profile);
+        _iceberg_profile.num_delete_rows = ADD_CHILD_COUNTER(this->get_profile(), "NumDeleteRows",
+                                                             TUnit::UNIT, iceberg_profile);
+        _iceberg_profile.delete_files_read_time =
+                ADD_CHILD_TIMER(this->get_profile(), "DeleteFileReadTime", iceberg_profile);
+        _iceberg_profile.delete_rows_sort_time =
+                ADD_CHILD_TIMER(this->get_profile(), "DeleteRowsSortTime", iceberg_profile);
+        _iceberg_profile.parse_delete_file_time =
+                ADD_CHILD_TIMER(this->get_profile(), "ParseDeleteFileTime", iceberg_profile);
+    }
+
+    ~IcebergReaderMixin() override = default;
+
+    void set_current_file_info(const std::string& file_path, int32_t partition_spec_id,
+                               const std::string& partition_data_json) {
+        _current_file_path = file_path;
+        _partition_spec_id = partition_spec_id;
+        _partition_data_json = partition_data_json;
+    }
+
+    enum { DATA, POSITION_DELETE, EQUALITY_DELETE, DELETION_VECTOR };
+    enum Fileformat { NONE, PARQUET, ORC, AVRO };
+
+    virtual void set_delete_rows() = 0;
+
+    // Replicates master's GenericReader::get_next_block().
+    // Handles table-level count pushdown by returning FE-provided _table_level_row_count
+    // directly (without reading any files). For all other cases, resets push_down_agg_type
+    // to NONE and delegates to the base reader — because in master, ParquetReader/OrcReader
+    // never had _push_down_agg_type == COUNT (it was absorbed by the GenericReader layer).
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override {
+        if (this->_push_down_agg_type == TPushAggOp::type::COUNT && _table_level_row_count >= 0) {
+            auto rows = std::min(_table_level_row_count, (int64_t)this->get_batch_size());
+            _table_level_row_count -= rows;
+            auto mutate_columns = block->mutate_columns();
+            for (auto& col : mutate_columns) {
+                col->resize(rows);
+            }
+            block->set_columns(std::move(mutate_columns));
+            *read_rows = rows;
+            if (_table_level_row_count == 0) {
+                *eof = true;
+            }
+            return Status::OK();
+        }
+        this->set_push_down_agg_type(TPushAggOp::NONE);
+        return BaseReader::_do_get_next_block(block, read_rows, eof);
+    }
+
+protected:
+    // ---- Hook implementations ----
+
+    // Called before reading a block: expand block for equality delete columns + detect row_id
+    Status on_before_read_block(Block* block) override {
+        RETURN_IF_ERROR(_expand_block_if_need(block));
+        return Status::OK();
+    }
+
+    /// Fill Iceberg $row_id synthesized column. Registered as handler during init.
+    Status _fill_iceberg_row_id(Block* block, size_t rows) {
+        int row_id_pos = block->get_position_by_name(BeConsts::ICEBERG_ROWID_COL);
+        if (row_id_pos < 0) {
+            return Status::OK();
+        }
+
+        // Lazy-init file info: only set when $row_id is actually needed.
+        const auto& table_desc = this->get_scan_range().table_format_params.iceberg_params;
+        std::string file_path = table_desc.original_file_path;
+        int32_t partition_spec_id =
+                table_desc.__isset.partition_spec_id ? table_desc.partition_spec_id : 0;
+        std::string partition_data_json;
+        if (table_desc.__isset.partition_data_json) {
+            partition_data_json = table_desc.partition_data_json;
+        }
+        set_current_file_info(file_path, partition_spec_id, partition_data_json);
+
+        const auto& row_ids = this->current_batch_row_positions();
+        auto& col_with_type = block->get_by_position(static_cast<size_t>(row_id_pos));
+        MutableColumnPtr row_id_column;
+        RETURN_IF_ERROR(_build_iceberg_rowid_column(col_with_type.type, _current_file_path, row_ids,
+                                                    _partition_spec_id, _partition_data_json,
+                                                    &row_id_column));
+        col_with_type.column = std::move(row_id_column);
+        return Status::OK();
+    }
+
+    // Called after reading a block: apply equality delete filter + shrink block
+    Status on_after_read_block(Block* block, size_t* read_rows) override {
+        if (!_equality_delete_impls.empty()) {
+            LOG(INFO) << "[EqDeleteDebug] on_after_read_block: block has " << block->rows()
+                      << " rows, columns: " << block->dump_names();
+            for (auto& [fid, name] : _id_to_block_column_name) {
+                LOG(INFO) << "[EqDeleteDebug] _id_to_block_column_name[" << fid << "] = " << name;
+            }
+            if (this->col_name_to_block_idx_ref()) {
+                for (auto& [name, idx] : *this->col_name_to_block_idx_ref()) {
+                    LOG(INFO) << "[EqDeleteDebug] col_name_to_block_idx[" << name << "] = " << idx;
+                }
+            }
+            std::unique_ptr<IColumn::Filter> filter =
+                    std::make_unique<IColumn::Filter>(block->rows(), 1);
+            for (auto& equality_delete_impl : _equality_delete_impls) {
+                RETURN_IF_ERROR(equality_delete_impl->filter_data_block(
+                        block, this->col_name_to_block_idx_ref(), _id_to_block_column_name,
+                        *filter));
+            }
+            size_t kept = 0;
+            for (size_t i = 0; i < filter->size(); i++) {
+                if ((*filter)[i]) kept++;
+            }
+            LOG(INFO) << "[EqDeleteDebug] after filter: kept " << kept << " of " << block->rows();
+            Block::filter_block_internal(block, *filter, block->columns());
+            *read_rows = block->rows();
+        }
+        return _shrink_block_if_need(block);
+    }
+
+    // ---- Shared Iceberg methods ----
+
+    Status _init_row_filters();
+    Status _position_delete_base(const std::string data_file_path,
+                                 const std::vector<TIcebergDeleteFileDesc>& delete_files);
+    Status _equality_delete_base(const std::vector<TIcebergDeleteFileDesc>& delete_files);
+    Status read_deletion_vector(const std::string& data_file_path,
+                                const TIcebergDeleteFileDesc& delete_file_desc);
+
+    Status _expand_block_if_need(Block* block);
+    Status _shrink_block_if_need(Block* block);
+
+    // Type aliases — must be defined before member function declarations that use them.
+    using DeleteRows = std::vector<int64_t>;
+    using DeleteFile = phmap::parallel_flat_hash_map<
+            std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>, std::equal_to<>,
+            std::allocator<std::pair<const std::string, std::unique_ptr<DeleteRows>>>, 8,
+            std::mutex>;
+
+    PositionDeleteRange _get_range(const ColumnDictI32& file_path_column);
+    PositionDeleteRange _get_range(const ColumnString& file_path_column);
+    static void _sort_delete_rows(const std::vector<std::vector<int64_t>*>& delete_rows_array,
+                                  int64_t num_delete_rows, std::vector<int64_t>& result);
+    void _gen_position_delete_file_range(Block& block, DeleteFile* position_delete,
+                                         size_t read_rows, bool file_path_column_dictionary_coded);
+    void _generate_equality_delete_block(Block* block,
+                                         const std::vector<std::string>& equality_delete_col_names,
+                                         const std::vector<DataTypePtr>& equality_delete_col_types);
+
+    // Pure virtual: format-specific delete file reading
+    virtual Status _read_position_delete_file(const TFileRangeDesc*, DeleteFile*) = 0;
+    virtual std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) = 0;
+
+    static std::string _delet_file_cache_key(const std::string& path) { return "delete_" + path; }
+
+    /// Build the Iceberg V2 row-id struct column.
+    static Status _build_iceberg_rowid_column(const DataTypePtr& type, const std::string& file_path,
+                                              const std::vector<rowid_t>& row_ids,
+                                              int32_t partition_spec_id,
+                                              const std::string& partition_data_json,
+                                              MutableColumnPtr* column_out) {
+        if (type == nullptr || column_out == nullptr) {
+            return Status::InvalidArgument("Invalid iceberg rowid column type or output column");
+        }
+        MutableColumnPtr column = type->create_column();
+        ColumnNullable* nullable_col = check_and_get_column<ColumnNullable>(column.get());
+        ColumnStruct* struct_col = nullptr;
+        if (nullable_col != nullptr) {
+            struct_col =
+                    check_and_get_column<ColumnStruct>(nullable_col->get_nested_column_ptr().get());
+        } else {
+            struct_col = check_and_get_column<ColumnStruct>(column.get());
+        }
+        if (struct_col == nullptr || struct_col->tuple_size() < 4) {
+            return Status::InternalError("Invalid iceberg rowid column structure");
+        }
+        size_t num_rows = row_ids.size();
+        auto& file_path_col = struct_col->get_column(0);
+        auto& row_pos_col = struct_col->get_column(1);
+        auto& spec_id_col = struct_col->get_column(2);
+        auto& partition_data_col = struct_col->get_column(3);
+        file_path_col.reserve(num_rows);
+        row_pos_col.reserve(num_rows);
+        spec_id_col.reserve(num_rows);
+        partition_data_col.reserve(num_rows);
+        for (size_t i = 0; i < num_rows; ++i) {
+            file_path_col.insert_data(file_path.data(), file_path.size());
+        }
+        for (size_t i = 0; i < num_rows; ++i) {
+            int64_t row_pos = static_cast<int64_t>(row_ids[i]);
+            row_pos_col.insert_data(reinterpret_cast<const char*>(&row_pos), sizeof(row_pos));
+        }
+        for (size_t i = 0; i < num_rows; ++i) {
+            int32_t spec_id = partition_spec_id;
+            spec_id_col.insert_data(reinterpret_cast<const char*>(&spec_id), sizeof(spec_id));
+        }
+        for (size_t i = 0; i < num_rows; ++i) {
+            partition_data_col.insert_data(partition_data_json.data(), partition_data_json.size());
+        }
+        if (nullable_col != nullptr) {
+            nullable_col->get_null_map_data().resize_fill(num_rows, 0);
+        }
+        *column_out = std::move(column);
+        return Status::OK();
+    }
+
+    struct IcebergProfile {
+        RuntimeProfile::Counter* num_delete_files;
+        RuntimeProfile::Counter* num_delete_rows;
+        RuntimeProfile::Counter* delete_files_read_time;
+        RuntimeProfile::Counter* delete_rows_sort_time;
+        RuntimeProfile::Counter* parse_delete_file_time;
+    };
+
+    bool _need_row_id_column = false;
+    std::string _current_file_path;
+    int32_t _partition_spec_id = 0;
+    std::string _partition_data_json;
+
+    ShardedKVCache* _kv_cache;
+    int64_t _table_level_row_count = -1;
+    IcebergProfile _iceberg_profile;
+    const std::vector<int64_t>* _iceberg_delete_rows = nullptr;
+    std::vector<std::string> _expand_col_names;
+    std::vector<ColumnWithTypeAndName> _expand_columns;
+    std::vector<std::string> _all_required_col_names;
+    Fileformat _file_format = Fileformat::NONE;
+
+    const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
+    const std::string ICEBERG_FILE_PATH = "file_path";
+    const std::string ICEBERG_ROW_POS = "pos";
+    const std::vector<std::string> delete_file_col_names {ICEBERG_FILE_PATH, ICEBERG_ROW_POS};
+    const std::unordered_map<std::string, uint32_t> DELETE_COL_NAME_TO_BLOCK_IDX = {
+            {ICEBERG_FILE_PATH, 0}, {ICEBERG_ROW_POS, 1}};
+    const int ICEBERG_FILE_PATH_INDEX = 0;
+    const int ICEBERG_FILE_POS_INDEX = 1;
+    const int READ_DELETE_FILE_BATCH_SIZE = 102400;
+
+    // all ids that need read for eq delete (from all eq delete files)
+    std::set<int> _equality_delete_col_ids;
+    // eq delete column ids -> location of _equality_delete_blocks / _equality_delete_impls
+    std::map<std::vector<int>, int> _equality_delete_block_map;
+    // EqualityDeleteBase stores raw pointers to these blocks, so do not modify this vector after
+    // creating entries in _equality_delete_impls.
+    std::vector<Block> _equality_delete_blocks;
+    std::vector<std::unique_ptr<EqualityDeleteBase>> _equality_delete_impls;
+
+    // id -> block column name
+    std::unordered_map<int, std::string> _id_to_block_column_name;
+
+    // File column names used during init
+    std::vector<std::string> _file_col_names;
+};
+
+// ============================================================================
+// Template method implementations (must be in header for templates)
+// ============================================================================
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::_init_row_filters() {
+    // COUNT(*) short-circuit
+    if (this->_push_down_agg_type == TPushAggOp::type::COUNT &&
+        this->get_scan_range().table_format_params.__isset.table_level_row_count &&
+        this->get_scan_range().table_format_params.table_level_row_count > 0) {
+        return Status::OK();
+    }
+
+    const auto& table_desc = this->get_scan_range().table_format_params.iceberg_params;
+    const auto& version = table_desc.format_version;
+    if (version < MIN_SUPPORT_DELETE_FILES_VERSION) {
+        return Status::OK();
+    }
+
+    std::vector<TIcebergDeleteFileDesc> position_delete_files;
+    std::vector<TIcebergDeleteFileDesc> equality_delete_files;
+    std::vector<TIcebergDeleteFileDesc> deletion_vector_files;
+    for (const TIcebergDeleteFileDesc& desc : table_desc.delete_files) {
+        if (desc.content == POSITION_DELETE) {
+            position_delete_files.emplace_back(desc);
+        } else if (desc.content == EQUALITY_DELETE) {
+            equality_delete_files.emplace_back(desc);
+        } else if (desc.content == DELETION_VECTOR) {
+            deletion_vector_files.emplace_back(desc);
+        }
+    }
+    LOG(INFO) << "[IcebergDebug] _init_row_filters: total_delete_files="
+              << table_desc.delete_files.size() << ", position=" << position_delete_files.size()
+              << ", equality=" << equality_delete_files.size()
+              << ", dv=" << deletion_vector_files.size();
+
+    if (!equality_delete_files.empty()) {
+        RETURN_IF_ERROR(_equality_delete_base(equality_delete_files));
+        this->set_push_down_agg_type(TPushAggOp::NONE);
+    }
+
+    if (!deletion_vector_files.empty()) {
+        if (deletion_vector_files.size() != 1) [[unlikely]] {
+            /*
+             * Deletion vectors are a binary representation of deletes for a single data file that is more efficient
+             * at execution time than position delete files. Unlike equality or position delete files, there can be
+             * at most one deletion vector for a given data file in a snapshot.
+             */
+            return Status::DataQualityError("This iceberg data file has multiple DVs.");
+        }
+        RETURN_IF_ERROR(
+                read_deletion_vector(table_desc.original_file_path, deletion_vector_files[0]));
+        this->set_push_down_agg_type(TPushAggOp::NONE);
+    } else if (!position_delete_files.empty()) {
+        RETURN_IF_ERROR(
+                _position_delete_base(table_desc.original_file_path, position_delete_files));
+        this->set_push_down_agg_type(TPushAggOp::NONE);
+    }
+
+    COUNTER_UPDATE(_iceberg_profile.num_delete_files, table_desc.delete_files.size());
+    return Status::OK();
+}
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::_equality_delete_base(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
+            partition_columns;
+    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+
+    for (const auto& delete_file : delete_files) {
+        TFileRangeDesc delete_desc;
+        delete_desc.__set_fs_name(this->get_scan_range().fs_name);
+        delete_desc.path = delete_file.path;
+        delete_desc.start_offset = 0;
+        delete_desc.size = -1;
+        delete_desc.file_size = -1;
+
+        if (!delete_file.__isset.field_ids) [[unlikely]] {
+            return Status::InternalError(
+                    "missing delete field ids when reading equality delete file");
+        }
+        auto& read_column_field_ids = delete_file.field_ids;
+        std::set<int> read_column_field_ids_set;
+        for (const auto& field_id : read_column_field_ids) {
+            read_column_field_ids_set.insert(field_id);
+            _equality_delete_col_ids.insert(field_id);
+        }
+
+        std::unique_ptr<GenericReader> delete_reader = _create_equality_reader(delete_desc);
+        RETURN_IF_ERROR(delete_reader->init_schema_reader());
+
+        std::vector<std::string> equality_delete_col_names;
+        std::vector<DataTypePtr> equality_delete_col_types;
+
+        // Build delete col names/types/ids by matching field_ids from delete file schema.
+        // Master iterates delete file's FieldDescriptor and uses field_id to match,
+        // NOT idx-based pairing (get_parsed_schema order != field_ids order).
+        std::vector<std::string> delete_col_names;
+        std::vector<DataTypePtr> delete_col_types;
+        std::vector<int> delete_col_ids;
+        std::unordered_map<std::string, uint32_t> delete_col_name_to_block_idx;
+
+        if (auto* parquet_reader = typeid_cast<ParquetReader*>(delete_reader.get())) {
+            LOG(INFO) << "[EqDeleteDebug] step1: parquet delete reader cast OK";
+            const FieldDescriptor* delete_field_desc = nullptr;
+            auto st1 = parquet_reader->get_file_metadata_schema(&delete_field_desc);
+            if (!st1.ok()) {
+                LOG(WARNING) << "[EqDeleteDebug] get_file_metadata_schema FAILED: " << st1;
+                return st1;
+            }
+            LOG(INFO) << "[EqDeleteDebug] step2: get_file_metadata_schema OK, fields="
+                      << delete_field_desc->get_fields_schema().size();
+            DCHECK(delete_field_desc != nullptr);
+
+            for (const auto& delete_file_field : delete_field_desc->get_fields_schema()) {
+                LOG(INFO) << "[EqDeleteDebug] step3: field name=" << delete_file_field.name
+                          << ", field_id=" << delete_file_field.field_id << ", in_set="
+                          << read_column_field_ids_set.contains(delete_file_field.field_id);
+                if (delete_file_field.field_id == -1) [[unlikely]] {
+                    return Status::DataQualityError(
+                            "missing field id when reading equality delete file");
+                }
+                if (!read_column_field_ids_set.contains(delete_file_field.field_id)) {
+                    continue;
+                }
+                if (delete_file_field.children.size() > 0) [[unlikely]] {
+                    return Status::InternalError(
+                            "can not support read complex column in equality delete file");
+                }
+
+                delete_col_ids.emplace_back(delete_file_field.field_id);
+                delete_col_names.emplace_back(delete_file_field.name);
+                delete_col_types.emplace_back(make_nullable(delete_file_field.data_type));
+
+                int field_id = delete_file_field.field_id;
+                if (!_id_to_block_column_name.contains(field_id)) {
+                    _id_to_block_column_name.emplace(field_id, delete_file_field.name);
+                    _expand_col_names.emplace_back(delete_file_field.name);
+                    _expand_columns.emplace_back(
+                            make_nullable(delete_file_field.data_type)->create_column(),
+                            make_nullable(delete_file_field.data_type), delete_file_field.name);
+                }
+            }
+            LOG(INFO) << "[EqDeleteDebug] step4: after loop, delete_col_names.size="
+                      << delete_col_names.size();
+            for (uint32_t idx = 0; idx < delete_col_names.size(); ++idx) {
+                delete_col_name_to_block_idx[delete_col_names[idx]] = idx;
+            }
+            // Delete files have TFileRangeDesc.size=-1, which would cause
+            // set_fill_columns to return EndOfFile("No row group to read")
+            // when _filter_groups is true. Master passes filter_groups=false.
+            ParquetInitContext eq_delete_ctx;
+            eq_delete_ctx.filter_groups = false;
+            eq_delete_ctx.column_names = delete_col_names;
+            eq_delete_ctx.col_name_to_block_idx = &delete_col_name_to_block_idx;
+            auto st2 = parquet_reader->init_reader(&eq_delete_ctx);
+            if (!st2.ok()) {
+                LOG(WARNING) << "[EqDeleteDebug] _do_init_reader for delete reader FAILED: " << st2;
+                return st2;
+            }
+            LOG(INFO) << "[EqDeleteDebug] step5: _do_init_reader OK";
+        } else if (auto* orc_reader = typeid_cast<OrcReader*>(delete_reader.get())) {
+            // For ORC: use get_parsed_schema with field_ids from delete_file
+            // ORC field_ids come from the Thrift descriptor, not from ORC metadata
+            RETURN_IF_ERROR(delete_reader->get_parsed_schema(&equality_delete_col_names,
+                                                             &equality_delete_col_types));
+            for (uint32_t idx = 0; idx < equality_delete_col_names.size(); ++idx) {
+                if (idx < read_column_field_ids.size()) {
+                    int field_id = read_column_field_ids[idx];
+                    if (!read_column_field_ids_set.contains(field_id)) continue;
+                    delete_col_ids.emplace_back(field_id);
+                    delete_col_names.emplace_back(equality_delete_col_names[idx]);
+                    delete_col_types.emplace_back(make_nullable(equality_delete_col_types[idx]));
+                    if (!_id_to_block_column_name.contains(field_id)) {
+                        _id_to_block_column_name.emplace(field_id, equality_delete_col_names[idx]);
+                        _expand_col_names.emplace_back(equality_delete_col_names[idx]);
+                        _expand_columns.emplace_back(
+                                make_nullable(equality_delete_col_types[idx])->create_column(),
+                                make_nullable(equality_delete_col_types[idx]),
+                                equality_delete_col_names[idx]);
+                    }
+                }
+            }
+            for (uint32_t idx = 0; idx < delete_col_names.size(); ++idx) {
+                delete_col_name_to_block_idx[delete_col_names[idx]] = idx;
+            }
+            OrcInitContext eq_delete_ctx;
+            eq_delete_ctx.column_names = delete_col_names;
+            eq_delete_ctx.col_name_to_block_idx = &delete_col_name_to_block_idx;
+            RETURN_IF_ERROR(orc_reader->init_reader(&eq_delete_ctx));
+        } else {
+            return Status::InternalError("Unsupported format of delete file");
+        }
+
+        LOG(INFO) << "[EqDeleteDebug] after init, delete_col_ids.size=" << delete_col_ids.size()
+                  << ", delete_col_names.size=" << delete_col_names.size();
+        for (size_t i = 0; i < delete_col_names.size(); i++) {
+            LOG(INFO) << "[EqDeleteDebug] delete_col[" << i << "]: name=" << delete_col_names[i]
+                      << (i < delete_col_ids.size()
+                                  ? ", field_id=" + std::to_string(delete_col_ids[i])
+                                  : "");
+        }
+
+        if (!_equality_delete_block_map.contains(delete_col_ids)) {
+            _equality_delete_block_map.emplace(delete_col_ids, _equality_delete_blocks.size());
+            Block block;
+            _generate_equality_delete_block(&block, delete_col_names, delete_col_types);
+            _equality_delete_blocks.emplace_back(block);
+        }
+        Block& eq_file_block = _equality_delete_blocks[_equality_delete_block_map[delete_col_ids]];
+
+        bool eof = false;
+        while (!eof) {
+            Block tmp_block;
+            _generate_equality_delete_block(&tmp_block, delete_col_names, delete_col_types);
+            size_t read_rows = 0;
+            auto st = delete_reader->get_next_block(&tmp_block, &read_rows, &eof);
+            if (!st.ok()) {
+                LOG(WARNING) << "[EqDeleteDebug] delete_reader->get_next_block failed: " << st;
+                return st;
+            }
+            LOG(INFO) << "[EqDeleteDebug] read delete file: rows=" << read_rows << ", eof=" << eof;
+            if (read_rows > 0) {
+                MutableBlock mutable_block(&eq_file_block);
+                RETURN_IF_ERROR(mutable_block.merge(tmp_block));
+            }
+        }
+    }
+
+    for (const auto& [delete_col_ids, block_idx] : _equality_delete_block_map) {
+        auto& eq_file_block = _equality_delete_blocks[block_idx];
+        auto equality_delete_impl =
+                EqualityDeleteBase::get_delete_impl(&eq_file_block, delete_col_ids);
+        RETURN_IF_ERROR(equality_delete_impl->init(this->get_profile()));
+        _equality_delete_impls.emplace_back(std::move(equality_delete_impl));
+    }
+    LOG(INFO) << "[EqDeleteDebug] _equality_delete_base done: impls="
+              << _equality_delete_impls.size() << ", expand_cols=" << _expand_col_names.size();
+    return Status::OK();
+}
+
+template <typename BaseReader>
+void IcebergReaderMixin<BaseReader>::_generate_equality_delete_block(
+        Block* block, const std::vector<std::string>& equality_delete_col_names,
+        const std::vector<DataTypePtr>& equality_delete_col_types) {
+    for (int i = 0; i < equality_delete_col_names.size(); ++i) {
+        DataTypePtr data_type = make_nullable(equality_delete_col_types[i]);
+        MutableColumnPtr data_column = data_type->create_column();
+        block->insert(ColumnWithTypeAndName(std::move(data_column), data_type,
+                                            equality_delete_col_names[i]));
+    }
+}
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::_expand_block_if_need(Block* block) {
+    std::set<std::string> names;
+    auto block_names = block->get_names();
+    names.insert(block_names.begin(), block_names.end());
+    for (auto& col : _expand_columns) {
+        col.column->assume_mutable()->clear();
+        if (names.contains(col.name)) {
+            return Status::InternalError("Wrong expand column '{}'", col.name);
+        }
+        names.insert(col.name);
+        (*this->col_name_to_block_idx_ref())[col.name] = static_cast<uint32_t>(block->columns());
+        block->insert(col);
+    }
+    return Status::OK();
+}
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::_shrink_block_if_need(Block* block) {
+    std::set<size_t> positions_to_erase;
+    for (const std::string& expand_col : _expand_col_names) {
+        if (!this->col_name_to_block_idx_ref()->contains(expand_col)) {
+            return Status::InternalError("Wrong erase column '{}', block: {}", expand_col,
+                                         block->dump_names());
+        }
+        positions_to_erase.emplace((*this->col_name_to_block_idx_ref())[expand_col]);
+    }
+    block->erase(positions_to_erase);
+    for (const std::string& expand_col : _expand_col_names) {
+        this->col_name_to_block_idx_ref()->erase(expand_col);
+    }
+    return Status::OK();
+}
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::_position_delete_base(
+        const std::string data_file_path, const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    std::vector<DeleteRows*> delete_rows_array;
+    int64_t num_delete_rows = 0;
+    for (const auto& delete_file : delete_files) {
+        SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
+        Status create_status = Status::OK();
+        auto* delete_file_cache = _kv_cache->template get<DeleteFile>(
+                _delet_file_cache_key(delete_file.path), [&]() -> DeleteFile* {
+                    auto* position_delete = new DeleteFile;
+                    TFileRangeDesc delete_file_range;
+                    delete_file_range.__set_fs_name(this->get_scan_range().fs_name);
+                    delete_file_range.path = delete_file.path;
+                    delete_file_range.start_offset = 0;
+                    delete_file_range.size = -1;
+                    delete_file_range.file_size = -1;
+                    create_status = _read_position_delete_file(&delete_file_range, position_delete);
+                    if (!create_status) {
+                        return nullptr;
+                    }
+                    return position_delete;
+                });
+        if (create_status.is<ErrorCode::END_OF_FILE>()) {
+            LOG(INFO) << "[IcebergDebug] _position_delete_base: END_OF_FILE for "
+                      << delete_file.path << ", skipping";
+            continue;
+        } else if (!create_status.ok()) {
+            LOG(INFO) << "[IcebergDebug] _position_delete_base: ERROR reading " << delete_file.path
+                      << ": " << create_status.to_string();
+            return create_status;
+        }
+        LOG(INFO) << "[IcebergDebug] _position_delete_base: cache returned, "
+                  << "delete_file_cache=" << (void*)delete_file_cache
+                  << ", delete_file=" << delete_file.path;
+
+        DeleteFile& delete_file_map = *((DeleteFile*)delete_file_cache);
+        auto get_value = [&](const auto& v) {
+            DeleteRows* row_ids = v.second.get();
+            if (!row_ids->empty()) {
+                delete_rows_array.emplace_back(row_ids);
+                num_delete_rows += row_ids->size();
+            }
+        };
+        delete_file_map.if_contains(data_file_path, get_value);
+        LOG(INFO) << "[IcebergDebug] _position_delete_base: data_file_path=" << data_file_path
+                  << ", delete_file=" << delete_file.path
+                  << ", num_delete_rows_so_far=" << num_delete_rows
+                  << ", delete_file_map_size=" << delete_file_map.size();
+        // Log all keys in the delete file map for debugging
+        delete_file_map.for_each([&](const auto& kv) {
+            LOG(INFO) << "[IcebergDebug] _position_delete_base: map_key=" << kv.first
+                      << ", rows=" << kv.second->size();
+        });
+    }
+    if (num_delete_rows > 0) {
+        SCOPED_TIMER(_iceberg_profile.delete_rows_sort_time);
+        _iceberg_delete_rows =
+                _kv_cache->template get<DeleteRows>(data_file_path, [&]() -> DeleteRows* {
+                    auto* data_file_position_delete = new DeleteRows;
+                    _sort_delete_rows(delete_rows_array, num_delete_rows,
+                                      *data_file_position_delete);
+                    return data_file_position_delete;
+                });
+        set_delete_rows();
+        COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows);
+    } else {
+        LOG(INFO) << "[IcebergDebug] _position_delete_base: NO delete rows matched for "
+                  << data_file_path;
+    }
+    return Status::OK();
+}
+
+template <typename BaseReader>
+typename IcebergReaderMixin<BaseReader>::PositionDeleteRange
+IcebergReaderMixin<BaseReader>::_get_range(const ColumnDictI32& file_path_column) {
+    PositionDeleteRange range;
+    size_t read_rows = file_path_column.get_data().size();
+    const int* code_path = file_path_column.get_data().data();
+    const int* code_path_start = code_path;
+    const int* code_path_end = code_path + read_rows;
+    while (code_path < code_path_end) {
+        int code = code_path[0];
+        const int* code_end = std::upper_bound(code_path, code_path_end, code);
+        range.data_file_path.emplace_back(file_path_column.get_value(code).to_string());
+        range.range.emplace_back(code_path - code_path_start, code_end - code_path_start);
+        code_path = code_end;
+    }
+    return range;
+}
+
+template <typename BaseReader>
+typename IcebergReaderMixin<BaseReader>::PositionDeleteRange
+IcebergReaderMixin<BaseReader>::_get_range(const ColumnString& file_path_column) {
+    PositionDeleteRange range;
+    size_t read_rows = file_path_column.size();
+    size_t index = 0;
+    while (index < read_rows) {
+        StringRef data_path = file_path_column.get_data_at(index);
+        size_t left = index - 1;
+        size_t right = read_rows;
+        while (left + 1 != right) {
+            size_t mid = left + (right - left) / 2;
+            if (file_path_column.get_data_at(mid) > data_path) {
+                right = mid;
+            } else {
+                left = mid;
+            }
+        }
+        range.data_file_path.emplace_back(data_path.to_string());
+        range.range.emplace_back(index, left + 1);
+        index = left + 1;
+    }
+    return range;
+}
+
+template <typename BaseReader>
+void IcebergReaderMixin<BaseReader>::_sort_delete_rows(
+        const std::vector<std::vector<int64_t>*>& delete_rows_array, int64_t num_delete_rows,
+        std::vector<int64_t>& result) {
+    if (delete_rows_array.empty()) {
+        return;
+    }
+    if (delete_rows_array.size() == 1) {
+        result.resize(num_delete_rows);
+        memcpy(result.data(), delete_rows_array.front()->data(), sizeof(int64_t) * num_delete_rows);
+        return;
+    }
+    if (delete_rows_array.size() == 2) {
+        result.resize(num_delete_rows);
+        std::merge(delete_rows_array.front()->begin(), delete_rows_array.front()->end(),
+                   delete_rows_array.back()->begin(), delete_rows_array.back()->end(),
+                   result.begin());
+        return;
+    }
+
+    using vec_pair = std::pair<std::vector<int64_t>::iterator, std::vector<int64_t>::iterator>;
+    result.resize(num_delete_rows);
+    auto row_id_iter = result.begin();
+    auto iter_end = result.end();
+    std::vector<vec_pair> rows_array;
+    for (auto* rows : delete_rows_array) {
+        if (!rows->empty()) {
+            rows_array.emplace_back(rows->begin(), rows->end());
+        }
+    }
+    size_t array_size = rows_array.size();
+    while (row_id_iter != iter_end) {
+        int64_t min_index = 0;
+        int64_t min = *rows_array[0].first;
+        for (size_t i = 0; i < array_size; ++i) {
+            if (*rows_array[i].first < min) {
+                min_index = i;
+                min = *rows_array[i].first;
+            }
+        }
+        *row_id_iter++ = min;
+        rows_array[min_index].first++;
+        if (UNLIKELY(rows_array[min_index].first == rows_array[min_index].second)) {
+            rows_array.erase(rows_array.begin() + min_index);
+            array_size--;
+        }
+    }
+}
+
+template <typename BaseReader>
+void IcebergReaderMixin<BaseReader>::_gen_position_delete_file_range(
+        Block& block, DeleteFile* position_delete, size_t read_rows,
+        bool file_path_column_dictionary_coded) {
+    SCOPED_TIMER(_iceberg_profile.parse_delete_file_time);
+    auto name_to_pos_map = block.get_name_to_pos_map();
+    ColumnPtr path_column = block.get_by_position(name_to_pos_map[ICEBERG_FILE_PATH]).column;
+    DCHECK_EQ(path_column->size(), read_rows);
+    ColumnPtr pos_column = block.get_by_position(name_to_pos_map[ICEBERG_ROW_POS]).column;
+    using ColumnType = typename PrimitiveTypeTraits<TYPE_BIGINT>::ColumnType;
+    const int64_t* src_data = assert_cast<const ColumnType&>(*pos_column).get_data().data();
+    PositionDeleteRange range;
+    if (file_path_column_dictionary_coded) {
+        range = _get_range(assert_cast<const ColumnDictI32&>(*path_column));
+    } else {
+        range = _get_range(assert_cast<const ColumnString&>(*path_column));
+    }
+    for (int i = 0; i < range.range.size(); ++i) {
+        std::string key = range.data_file_path[i];
+        auto iter = position_delete->find(key);
+        DeleteRows* delete_rows;
+        if (iter == position_delete->end()) {
+            delete_rows = new DeleteRows;
+            std::unique_ptr<DeleteRows> delete_rows_ptr(delete_rows);
+            (*position_delete)[key] = std::move(delete_rows_ptr);
+        } else {
+            delete_rows = iter->second.get();
+        }
+        const int64_t* cpy_start = src_data + range.range[i].first;
+        const int64_t cpy_count = range.range[i].second - range.range[i].first;
+        int64_t origin_size = delete_rows->size();
+        delete_rows->resize(origin_size + cpy_count);
+        int64_t* dest_position = &(*delete_rows)[origin_size];
+        memcpy(dest_position, cpy_start, cpy_count * sizeof(int64_t));
+    }
+}
+
+template <typename BaseReader>
+Status IcebergReaderMixin<BaseReader>::read_deletion_vector(
+        const std::string& data_file_path, const TIcebergDeleteFileDesc& delete_file_desc) {
+    Status create_status = Status::OK();
+    SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
+    _iceberg_delete_rows = _kv_cache->template get<
+            DeleteRows>(data_file_path, [&]() -> DeleteRows* {
+        auto* delete_rows = new DeleteRows;
+
+        TFileRangeDesc delete_range;
+        delete_range.__set_fs_name(this->get_scan_range().fs_name);
+        delete_range.path = delete_file_desc.path;
+        delete_range.start_offset = delete_file_desc.content_offset;
+        delete_range.size = delete_file_desc.content_size_in_bytes;
+        delete_range.file_size = -1;
+
+        DeletionVectorReader dv_reader(this->get_state(), this->get_profile(),
+                                       this->get_scan_params(), delete_range, this->get_io_ctx());
+        create_status = dv_reader.open();
+        if (!create_status.ok()) [[unlikely]] {
+            return nullptr;
+        }
+
+        size_t buffer_size = delete_range.size;
+        std::vector<char> buf(buffer_size);
+        if (buffer_size < 12) [[unlikely]] {
+            create_status = Status::DataQualityError("Deletion vector file size too small: {}",
+                                                     buffer_size);
+            return nullptr;
+        }
+
+        create_status = dv_reader.read_at(delete_range.start_offset, {buf.data(), buffer_size});
+        if (!create_status) [[unlikely]] {
+            return nullptr;
+        }
+
+        auto total_length = BigEndian::Load32(buf.data());
+        if (total_length + 8 != buffer_size) [[unlikely]] {
+            create_status = Status::DataQualityError(
+                    "Deletion vector length mismatch, expected: {}, actual: {}", total_length + 8,
+                    buffer_size);
+            return nullptr;
+        }
+
+        constexpr static char MAGIC_NUMBER[] = {'\xD1', '\xD3', '\x39', '\x64'};
+        if (memcmp(buf.data() + sizeof(total_length), MAGIC_NUMBER, 4)) [[unlikely]] {
+            create_status = Status::DataQualityError("Deletion vector magic number mismatch");
+            return nullptr;
+        }
+
+        roaring::Roaring64Map bitmap;
+        SCOPED_TIMER(_iceberg_profile.parse_delete_file_time);
+        try {
+            bitmap = roaring::Roaring64Map::readSafe(buf.data() + 8, buffer_size - 12);
+        } catch (const std::runtime_error& e) {
+            create_status = Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
+            return nullptr;
+        }
+
+        delete_rows->reserve(bitmap.cardinality());
+        for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
+            delete_rows->push_back(*it);
+        }
+        COUNTER_UPDATE(_iceberg_profile.num_delete_rows, delete_rows->size());
+        return delete_rows;
+    });
+
+    RETURN_IF_ERROR(create_status);
+    if (!_iceberg_delete_rows->empty()) [[likely]] {
+        set_delete_rows();
+    }
+    return Status::OK();
+}
+
+#include "common/compile_check_end.h"
+} // namespace doris
diff --git a/be/src/format/table/iceberg_sys_table_jni_reader.h b/be/src/format/table/iceberg_sys_table_jni_reader.h
index f5bc69f6776772..c9232fb468ae3c 100644
--- a/be/src/format/table/iceberg_sys_table_jni_reader.h
+++ b/be/src/format/table/iceberg_sys_table_jni_reader.h
@@ -53,6 +53,9 @@ class IcebergSysTableJniReader : public JniReader {
 
     Status init_reader();
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     Status _init_status;
 };
diff --git a/be/src/format/table/jdbc_jni_reader.cpp b/be/src/format/table/jdbc_jni_reader.cpp
index 89071563c653ca..101f3b8bef131b 100644
--- a/be/src/format/table/jdbc_jni_reader.cpp
+++ b/be/src/format/table/jdbc_jni_reader.cpp
@@ -105,7 +105,7 @@ bool JdbcJniReader::_is_special_type(PrimitiveType type) {
            type == PrimitiveType::TYPE_QUANTILE_STATE || type == PrimitiveType::TYPE_JSONB;
 }
 
-Status JdbcJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status JdbcJniReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     // Identify columns with special types (bitmap, HLL, quantile_state, JSONB)
     // and temporarily replace them with string columns for JNI data transfer.
     // This follows the same pattern as the old vjdbc_connector.cpp _get_reader_params.
@@ -144,7 +144,7 @@ Status JdbcJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
     }
 
     // Call parent to do the actual JNI read with string columns
-    RETURN_IF_ERROR(JniReader::get_next_block(block, read_rows, eof));
+    RETURN_IF_ERROR(JniReader::_do_get_next_block(block, read_rows, eof));
 
     // Cast string columns back to their target types
     if (*read_rows > 0 && !special_columns.empty()) {
diff --git a/be/src/format/table/jdbc_jni_reader.h b/be/src/format/table/jdbc_jni_reader.h
index c0ef978682c66e..f12f2ba234eede 100644
--- a/be/src/format/table/jdbc_jni_reader.h
+++ b/be/src/format/table/jdbc_jni_reader.h
@@ -77,7 +77,10 @@ class JdbcJniReader : public JniReader {
      * Before reading, replaces block columns of special types with string columns.
      * After reading, casts the string data back to the target types.
      */
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
 
 private:
     std::map<std::string, std::string> _jdbc_params;
diff --git a/be/src/format/table/max_compute_jni_reader.h b/be/src/format/table/max_compute_jni_reader.h
index 71a1e74f4f85a2..5b11b6eed8310f 100644
--- a/be/src/format/table/max_compute_jni_reader.h
+++ b/be/src/format/table/max_compute_jni_reader.h
@@ -55,6 +55,9 @@ class MaxComputeJniReader : public JniReader {
     ~MaxComputeJniReader() override = default;
 
     Status init_reader();
+
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/table/nested_column_access_helper.h b/be/src/format/table/nested_column_access_helper.h
index 5b3d03b358b394..b184eabea3d72f 100644
--- a/be/src/format/table/nested_column_access_helper.h
+++ b/be/src/format/table/nested_column_access_helper.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
diff --git a/be/src/format/table/paimon_cpp_reader.cpp b/be/src/format/table/paimon_cpp_reader.cpp
index e4b182c41edfc7..4e45c72b96a239 100644
--- a/be/src/format/table/paimon_cpp_reader.cpp
+++ b/be/src/format/table/paimon_cpp_reader.cpp
@@ -70,7 +70,7 @@ Status PaimonCppReader::init_reader() {
     return _init_paimon_reader();
 }
 
-Status PaimonCppReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_push_down_agg_type == TPushAggOp::type::COUNT && _remaining_table_level_row_count >= 0) {
         auto rows = std::min(_remaining_table_level_row_count,
                              (int64_t)_state->query_options().batch_size);
@@ -144,8 +144,8 @@ Status PaimonCppReader::get_next_block(Block* block, size_t* read_rows, bool* eo
     return Status::OK();
 }
 
-Status PaimonCppReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                    std::unordered_set<std::string>* missing_cols) {
+Status PaimonCppReader::_get_columns_impl(
+        std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     for (const auto& slot : _file_slot_descs) {
         name_to_type->emplace(slot->col_name(), slot->type());
     }
diff --git a/be/src/format/table/paimon_cpp_reader.h b/be/src/format/table/paimon_cpp_reader.h
index d567b1b24bb71a..309e21ae55a570 100644
--- a/be/src/format/table/paimon_cpp_reader.h
+++ b/be/src/format/table/paimon_cpp_reader.h
@@ -59,14 +59,16 @@ class PaimonCppReader : public GenericReader {
     ~PaimonCppReader() override;
 
     Status init_reader();
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
     Status close() override;
     void set_predicate(std::shared_ptr<paimon::Predicate> predicate) {
         _predicate = std::move(predicate);
     }
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     Status _init_paimon_reader();
     Status _decode_split(std::shared_ptr<paimon::Split>* split);
diff --git a/be/src/format/table/paimon_jni_reader.cpp b/be/src/format/table/paimon_jni_reader.cpp
index 12e6171b3a5305..0a21ee10ef07d5 100644
--- a/be/src/format/table/paimon_jni_reader.cpp
+++ b/be/src/format/table/paimon_jni_reader.cpp
@@ -101,7 +101,7 @@ PaimonJniReader::PaimonJniReader(const std::vector<SlotDescriptor*>& file_slot_d
     }
 }
 
-Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status PaimonJniReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_push_down_agg_type == TPushAggOp::type::COUNT && _remaining_table_level_row_count >= 0) {
         auto rows = std::min(_remaining_table_level_row_count,
                              (int64_t)_state->query_options().batch_size);
@@ -118,7 +118,7 @@ Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eo
 
         return Status::OK();
     }
-    return JniReader::get_next_block(block, read_rows, eof);
+    return JniReader::_do_get_next_block(block, read_rows, eof);
 }
 
 Status PaimonJniReader::init_reader() {
diff --git a/be/src/format/table/paimon_jni_reader.h b/be/src/format/table/paimon_jni_reader.h
index 548f1c6485a2f7..77cd4fdc518a49 100644
--- a/be/src/format/table/paimon_jni_reader.h
+++ b/be/src/format/table/paimon_jni_reader.h
@@ -54,10 +54,13 @@ class PaimonJniReader : public JniReader {
 
     ~PaimonJniReader() override = default;
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
     Status init_reader();
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     int64_t _remaining_table_level_row_count;
 };
diff --git a/be/src/format/table/paimon_reader.cpp b/be/src/format/table/paimon_reader.cpp
index 0667ad8efff7f4..fdc29b5a27d185 100644
--- a/be/src/format/table/paimon_reader.cpp
+++ b/be/src/format/table/paimon_reader.cpp
@@ -25,34 +25,55 @@
 
 namespace doris {
 #include "common/compile_check_begin.h"
-PaimonReader::PaimonReader(std::unique_ptr<GenericReader> file_format_reader,
-                           RuntimeProfile* profile, RuntimeState* state,
-                           const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                           ShardedKVCache* kv_cache, io::IOContext* io_ctx,
-                           FileMetaCache* meta_cache)
-        : TableFormatReader(std::move(file_format_reader), state, profile, params, range, io_ctx,
-                            meta_cache),
-          _kv_cache(kv_cache) {
+
+// ============================================================================
+// PaimonOrcReader
+// ============================================================================
+void PaimonOrcReader::_init_paimon_profile() {
     static const char* paimon_profile = "PaimonProfile";
-    ADD_TIMER(_profile, paimon_profile);
+    ADD_TIMER(get_profile(), paimon_profile);
     _paimon_profile.num_delete_rows =
-            ADD_CHILD_COUNTER(_profile, "NumDeleteRows", TUnit::UNIT, paimon_profile);
+            ADD_CHILD_COUNTER(get_profile(), "NumDeleteRows", TUnit::UNIT, paimon_profile);
     _paimon_profile.delete_files_read_time =
-            ADD_CHILD_TIMER(_profile, "DeleteFileReadTime", paimon_profile);
+            ADD_CHILD_TIMER(get_profile(), "DeleteFileReadTime", paimon_profile);
     _paimon_profile.parse_deletion_vector_time =
-            ADD_CHILD_TIMER(_profile, "ParseDeletionVectorTime", paimon_profile);
+            ADD_CHILD_TIMER(get_profile(), "ParseDeletionVectorTime", paimon_profile);
+}
+
+Status PaimonOrcReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    const orc::Type* orc_type_ptr = nullptr;
+    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
+
+    RETURN_IF_ERROR(gen_table_info_node_by_field_id(
+            get_scan_params(), get_scan_range().table_format_params.paimon_params.schema_id,
+            get_tuple_descriptor(), orc_type_ptr));
+    ctx->table_info_node = table_info_node_ptr;
+
+    for (const auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
+    return Status::OK();
+}
+
+Status PaimonOrcReader::on_after_init_reader(ReaderInitContext* /*ctx*/) {
+    return _init_deletion_vector();
 }
 
-Status PaimonReader::init_row_filters() {
-    const auto& table_desc = _range.table_format_params.paimon_params;
+Status PaimonOrcReader::_init_deletion_vector() {
+    const auto& table_desc = get_scan_range().table_format_params.paimon_params;
     if (!table_desc.__isset.deletion_file) {
         return Status::OK();
     }
 
-    // set push down agg type to NONE because we can not do count push down opt
-    // if there are delete files.
-    if (!_range.table_format_params.paimon_params.__isset.row_count) {
-        _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE);
+    // Cannot do count push down if there are delete files
+    if (!get_scan_range().table_format_params.paimon_params.__isset.row_count) {
+        set_push_down_agg_type(TPushAggOp::NONE);
+        lock_push_down_agg_type();
     }
     const auto& deletion_file = table_desc.deletion_file;
 
@@ -70,33 +91,29 @@ Status PaimonReader::init_row_filters() {
         auto* delete_rows = new DeleteRows;
 
         TFileRangeDesc delete_range;
-        // must use __set() method to make sure __isset is true
-        delete_range.__set_fs_name(_range.fs_name);
+        delete_range.__set_fs_name(get_scan_range().fs_name);
         delete_range.path = deletion_file.path;
         delete_range.start_offset = deletion_file.offset;
         delete_range.size = deletion_file.length + 4;
         delete_range.file_size = -1;
 
-        DeletionVectorReader dv_reader(_state, _profile, _params, delete_range, _io_ctx);
+        DeletionVectorReader dv_reader(get_state(), get_profile(), get_scan_params(), delete_range,
+                                       get_io_ctx());
         create_status = dv_reader.open();
         if (!create_status.ok()) [[unlikely]] {
             return nullptr;
         }
 
-        // the reason of adding 4: https://github.com/apache/paimon/issues/3313
         size_t bytes_read = deletion_file.length + 4;
-        // TODO: better way to alloc memeory
         std::vector<char> buffer(bytes_read);
         create_status = dv_reader.read_at(deletion_file.offset, {buffer.data(), bytes_read});
         if (!create_status.ok()) [[unlikely]] {
             return nullptr;
         }
 
-        // parse deletion vector
         const char* buf = buffer.data();
         uint32_t actual_length;
         std::memcpy(reinterpret_cast<char*>(&actual_length), buf, 4);
-        // change byte order to big endian
         std::reverse(reinterpret_cast<char*>(&actual_length),
                      reinterpret_cast<char*>(&actual_length) + 4);
         buf += 4;
@@ -109,7 +126,6 @@ Status PaimonReader::init_row_filters() {
         }
         uint32_t magic_number;
         std::memcpy(reinterpret_cast<char*>(&magic_number), buf, 4);
-        // change byte order to big endian
         std::reverse(reinterpret_cast<char*>(&magic_number),
                      reinterpret_cast<char*>(&magic_number) + 4);
         buf += 4;
@@ -139,14 +155,144 @@ Status PaimonReader::init_row_filters() {
     });
     RETURN_IF_ERROR(create_status);
     if (!_delete_rows->empty()) [[likely]] {
-        set_delete_rows();
+        set_position_delete_rowids(_delete_rows);
     }
     return Status::OK();
 }
 
-Status PaimonReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) {
-    RETURN_IF_ERROR(_file_format_reader->get_next_block(block, read_rows, eof));
+// ============================================================================
+// PaimonParquetReader
+// ============================================================================
+void PaimonParquetReader::_init_paimon_profile() {
+    static const char* paimon_profile = "PaimonProfile";
+    ADD_TIMER(get_profile(), paimon_profile);
+    _paimon_profile.num_delete_rows =
+            ADD_CHILD_COUNTER(get_profile(), "NumDeleteRows", TUnit::UNIT, paimon_profile);
+    _paimon_profile.delete_files_read_time =
+            ADD_CHILD_TIMER(get_profile(), "DeleteFileReadTime", paimon_profile);
+    _paimon_profile.parse_deletion_vector_time =
+            ADD_CHILD_TIMER(get_profile(), "ParseDeletionVectorTime", paimon_profile);
+}
+
+Status PaimonParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    const FieldDescriptor* field_desc = nullptr;
+    RETURN_IF_ERROR(get_file_metadata_schema(&field_desc));
+    DCHECK(field_desc != nullptr);
+
+    RETURN_IF_ERROR(gen_table_info_node_by_field_id(
+            get_scan_params(), get_scan_range().table_format_params.paimon_params.schema_id,
+            get_tuple_descriptor(), *field_desc));
+    ctx->table_info_node = table_info_node_ptr;
+
+    for (const auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
+        }
+    }
     return Status::OK();
 }
+
+Status PaimonParquetReader::on_after_init_reader(ReaderInitContext* /*ctx*/) {
+    return _init_deletion_vector();
+}
+
+Status PaimonParquetReader::_init_deletion_vector() {
+    const auto& table_desc = get_scan_range().table_format_params.paimon_params;
+    if (!table_desc.__isset.deletion_file) {
+        return Status::OK();
+    }
+
+    if (!get_scan_range().table_format_params.paimon_params.__isset.row_count) {
+        set_push_down_agg_type(TPushAggOp::NONE);
+        lock_push_down_agg_type();
+    }
+    const auto& deletion_file = table_desc.deletion_file;
+
+    Status create_status = Status::OK();
+
+    std::string key;
+    key.resize(deletion_file.path.size() + sizeof(deletion_file.offset));
+    memcpy(key.data(), deletion_file.path.data(), deletion_file.path.size());
+    memcpy(key.data() + deletion_file.path.size(), &deletion_file.offset,
+           sizeof(deletion_file.offset));
+
+    SCOPED_TIMER(_paimon_profile.delete_files_read_time);
+    using DeleteRows = std::vector<int64_t>;
+    _delete_rows = _kv_cache->get<DeleteRows>(key, [&]() -> DeleteRows* {
+        auto* delete_rows = new DeleteRows;
+
+        TFileRangeDesc delete_range;
+        delete_range.__set_fs_name(get_scan_range().fs_name);
+        delete_range.path = deletion_file.path;
+        delete_range.start_offset = deletion_file.offset;
+        delete_range.size = deletion_file.length + 4;
+        delete_range.file_size = -1;
+
+        DeletionVectorReader dv_reader(get_state(), get_profile(), get_scan_params(), delete_range,
+                                       get_io_ctx());
+        create_status = dv_reader.open();
+        if (!create_status.ok()) [[unlikely]] {
+            return nullptr;
+        }
+
+        size_t bytes_read = deletion_file.length + 4;
+        std::vector<char> buffer(bytes_read);
+        create_status = dv_reader.read_at(deletion_file.offset, {buffer.data(), bytes_read});
+        if (!create_status.ok()) [[unlikely]] {
+            return nullptr;
+        }
+
+        const char* buf = buffer.data();
+        uint32_t actual_length;
+        std::memcpy(reinterpret_cast<char*>(&actual_length), buf, 4);
+        std::reverse(reinterpret_cast<char*>(&actual_length),
+                     reinterpret_cast<char*>(&actual_length) + 4);
+        buf += 4;
+        if (actual_length != bytes_read - 4) [[unlikely]] {
+            create_status = Status::RuntimeError(
+                    "DeletionVector deserialize error: length not match, "
+                    "actual length: {}, expect length: {}",
+                    actual_length, bytes_read - 4);
+            return nullptr;
+        }
+        uint32_t magic_number;
+        std::memcpy(reinterpret_cast<char*>(&magic_number), buf, 4);
+        std::reverse(reinterpret_cast<char*>(&magic_number),
+                     reinterpret_cast<char*>(&magic_number) + 4);
+        buf += 4;
+        const static uint32_t MAGIC_NUMBER = 1581511376;
+        if (magic_number != MAGIC_NUMBER) [[unlikely]] {
+            create_status = Status::RuntimeError(
+                    "DeletionVector deserialize error: invalid magic number {}", magic_number);
+            return nullptr;
+        }
+
+        roaring::Roaring roaring_bitmap;
+        SCOPED_TIMER(_paimon_profile.parse_deletion_vector_time);
+        try {
+            roaring_bitmap = roaring::Roaring::readSafe(buf, bytes_read - 4);
+        } catch (const std::runtime_error& e) {
+            create_status = Status::RuntimeError(
+                    "DeletionVector deserialize error: failed to deserialize roaring bitmap, {}",
+                    e.what());
+            return nullptr;
+        }
+        delete_rows->reserve(roaring_bitmap.cardinality());
+        for (auto it = roaring_bitmap.begin(); it != roaring_bitmap.end(); it++) {
+            delete_rows->push_back(*it);
+        }
+        COUNTER_UPDATE(_paimon_profile.num_delete_rows, delete_rows->size());
+        return delete_rows;
+    });
+    RETURN_IF_ERROR(create_status);
+    if (!_delete_rows->empty()) [[likely]] {
+        ParquetReader::set_delete_rows(_delete_rows);
+    }
+    return Status::OK();
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/paimon_reader.h b/be/src/format/table/paimon_reader.h
index de16c63cdd9a75..b0ab2da9e28f68 100644
--- a/be/src/format/table/paimon_reader.h
+++ b/be/src/format/table/paimon_reader.h
@@ -22,116 +22,83 @@
 
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
-#include "format/table/table_format_reader.h"
+#include "format/table/table_schema_change_helper.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
-class PaimonReader : public TableFormatReader, public TableSchemaChangeHelper {
+class ShardedKVCache;
+
+// PaimonOrcReader: directly inherits OrcReader (no composition wrapping).
+// Schema mapping in on_before_init_reader, deletion vector reading in on_after_init_reader.
+class PaimonOrcReader final : public OrcReader, public TableSchemaChangeHelper {
 public:
-    PaimonReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                 RuntimeState* state, const TFileScanRangeParams& params,
-                 const TFileRangeDesc& range, ShardedKVCache* kv_cache, io::IOContext* io_ctx,
-                 FileMetaCache* meta_cache);
+    ENABLE_FACTORY_CREATOR(PaimonOrcReader);
+    PaimonOrcReader(RuntimeProfile* profile, RuntimeState* state,
+                    const TFileScanRangeParams& params, const TFileRangeDesc& range,
+                    size_t batch_size, const std::string& ctz, ShardedKVCache* kv_cache,
+                    io::IOContext* io_ctx, FileMetaCache* meta_cache = nullptr,
+                    bool enable_lazy_mat = true)
+            : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache,
+                        enable_lazy_mat),
+              _kv_cache(kv_cache) {
+        _init_paimon_profile();
+    }
+    ~PaimonOrcReader() final = default;
 
-    ~PaimonReader() override = default;
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
-    Status init_row_filters() final;
+    Status on_after_init_reader(ReaderInitContext* /*ctx*/) override;
 
-    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
+private:
+    void _init_paimon_profile();
+    Status _init_deletion_vector();
 
-protected:
     struct PaimonProfile {
-        RuntimeProfile::Counter* num_delete_rows;
-        RuntimeProfile::Counter* delete_files_read_time;
-        RuntimeProfile::Counter* parse_deletion_vector_time;
+        RuntimeProfile::Counter* num_delete_rows = nullptr;
+        RuntimeProfile::Counter* delete_files_read_time = nullptr;
+        RuntimeProfile::Counter* parse_deletion_vector_time = nullptr;
     };
-    // _delete_rows from kv_cache.
+
     const std::vector<int64_t>* _delete_rows = nullptr;
-    // owned by scan node
     ShardedKVCache* _kv_cache;
     PaimonProfile _paimon_profile;
-
-    virtual void set_delete_rows() = 0;
 };
 
-class PaimonOrcReader final : public PaimonReader {
+// PaimonParquetReader: directly inherits ParquetReader (no composition wrapping).
+class PaimonParquetReader final : public ParquetReader, public TableSchemaChangeHelper {
 public:
-    ENABLE_FACTORY_CREATOR(PaimonOrcReader);
-    PaimonOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                    RuntimeState* state, const TFileScanRangeParams& params,
-                    const TFileRangeDesc& range, ShardedKVCache* kv_cache, io::IOContext* io_ctx,
-                    FileMetaCache* meta_cache)
-            : PaimonReader(std::move(file_format_reader), profile, state, params, range, kv_cache,
-                           io_ctx, meta_cache) {};
-    ~PaimonOrcReader() final = default;
-
-    void set_delete_rows() final {
-        (reinterpret_cast<OrcReader*>(_file_format_reader.get()))
-                ->set_position_delete_rowids(_delete_rows);
+    ENABLE_FACTORY_CREATOR(PaimonParquetReader);
+    PaimonParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
+                        const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
+                        ShardedKVCache* kv_cache, io::IOContext* io_ctx, RuntimeState* state,
+                        FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true)
+            : ParquetReader(profile, params, range, batch_size, ctz, io_ctx, state, meta_cache,
+                            enable_lazy_mat),
+              _kv_cache(kv_cache) {
+        _init_paimon_profile();
     }
+    ~PaimonParquetReader() final = default;
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-        auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
-        const orc::Type* orc_type_ptr = nullptr;
-        RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
-        RETURN_IF_ERROR(gen_table_info_node_by_field_id(
-                _params, _range.table_format_params.paimon_params.schema_id, tuple_descriptor,
-                orc_type_ptr));
-
-        return orc_reader->init_reader(&read_table_col_names, col_name_to_block_idx, conjuncts,
-                                       false, tuple_descriptor, row_descriptor,
-                                       not_single_slot_filter_conjuncts,
-                                       slot_id_to_filter_conjuncts, table_info_node_ptr);
-    }
-};
+protected:
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
-class PaimonParquetReader final : public PaimonReader {
-public:
-    ENABLE_FACTORY_CREATOR(PaimonParquetReader);
-    PaimonParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
-                        RuntimeState* state, const TFileScanRangeParams& params,
-                        const TFileRangeDesc& range, ShardedKVCache* kv_cache,
-                        io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : PaimonReader(std::move(file_format_reader), profile, state, params, range, kv_cache,
-                           io_ctx, meta_cache) {};
-    ~PaimonParquetReader() final = default;
+    Status on_after_init_reader(ReaderInitContext* /*ctx*/) override;
 
-    void set_delete_rows() final {
-        (reinterpret_cast<ParquetReader*>(_file_format_reader.get()))
-                ->set_delete_rows(_delete_rows);
-    }
+private:
+    void _init_paimon_profile();
+    Status _init_deletion_vector();
 
-    Status init_reader(
-            const std::vector<std::string>& read_table_col_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts,
-            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
-                    slot_id_to_predicates,
-            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-        auto* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
-
-        const FieldDescriptor* field_desc = nullptr;
-        RETURN_IF_ERROR(parquet_reader->get_file_metadata_schema(&field_desc));
-        DCHECK(field_desc != nullptr);
-
-        RETURN_IF_ERROR(gen_table_info_node_by_field_id(
-                _params, _range.table_format_params.paimon_params.schema_id, tuple_descriptor,
-                *field_desc));
-
-        return parquet_reader->init_reader(read_table_col_names, col_name_to_block_idx, conjuncts,
-                                           slot_id_to_predicates, tuple_descriptor, row_descriptor,
-                                           colname_to_slot_id, not_single_slot_filter_conjuncts,
-                                           slot_id_to_filter_conjuncts, table_info_node_ptr);
-    }
+    struct PaimonProfile {
+        RuntimeProfile::Counter* num_delete_rows = nullptr;
+        RuntimeProfile::Counter* delete_files_read_time = nullptr;
+        RuntimeProfile::Counter* parse_deletion_vector_time = nullptr;
+    };
+
+    const std::vector<int64_t>* _delete_rows = nullptr;
+    ShardedKVCache* _kv_cache;
+    PaimonProfile _paimon_profile;
 };
+
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/parquet_metadata_reader.cpp b/be/src/format/table/parquet_metadata_reader.cpp
index 6a032e67f76ba5..7df80f673cb602 100644
--- a/be/src/format/table/parquet_metadata_reader.cpp
+++ b/be/src/format/table/parquet_metadata_reader.cpp
@@ -798,7 +798,7 @@ Status ParquetMetadataReader::_init_from_scan_range(const TMetaScanRange& scan_r
     return Status::OK();
 }
 
-Status ParquetMetadataReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status ParquetMetadataReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_eof) {
         *eof = true;
         *read_rows = 0;
diff --git a/be/src/format/table/parquet_metadata_reader.h b/be/src/format/table/parquet_metadata_reader.h
index 33eef93037e65e..d1b338d17f7085 100644
--- a/be/src/format/table/parquet_metadata_reader.h
+++ b/be/src/format/table/parquet_metadata_reader.h
@@ -54,9 +54,12 @@ class ParquetMetadataReader : public GenericReader {
     ~ParquetMetadataReader() override;
 
     Status init_reader();
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
     Status close() override;
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     Status _init_from_scan_range(const TMetaScanRange& scan_range);
     Status _build_rows(std::vector<MutableColumnPtr>& columns);
diff --git a/be/src/format/table/remote_doris_reader.cpp b/be/src/format/table/remote_doris_reader.cpp
index fde4dc49896db5..f97ed62c4fd37c 100644
--- a/be/src/format/table/remote_doris_reader.cpp
+++ b/be/src/format/table/remote_doris_reader.cpp
@@ -59,7 +59,7 @@ Status RemoteDorisReader::init_reader() {
     return Status::OK();
 }
 
-Status RemoteDorisReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     arrow::flight::FlightStreamChunk chunk;
     RETURN_DORIS_STATUS_IF_ERROR(_stream->Next().Value(&chunk));
 
@@ -95,11 +95,12 @@ Status RemoteDorisReader::get_next_block(Block* block, size_t* read_rows, bool*
     }
 
     *read_rows += num_rows;
+
     return Status::OK();
 }
 
-Status RemoteDorisReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                                      std::unordered_set<std::string>* missing_cols) {
+Status RemoteDorisReader::_get_columns_impl(
+        std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     for (const auto& slot : _file_slot_descs) {
         name_to_type->emplace(slot->col_name(), slot->type());
     }
diff --git a/be/src/format/table/remote_doris_reader.h b/be/src/format/table/remote_doris_reader.h
index d8ea431fda288f..d4d6beaac345ce 100644
--- a/be/src/format/table/remote_doris_reader.h
+++ b/be/src/format/table/remote_doris_reader.h
@@ -27,6 +27,7 @@
 
 #include "common/status.h"
 #include "format/jni/jni_reader.h"
+#include "format/table/table_format_reader.h"
 #include "storage/olap_scan_common.h"
 
 namespace doris {
@@ -38,7 +39,7 @@ class Block;
 
 namespace doris {
 #include "common/compile_check_begin.h"
-class RemoteDorisReader : public GenericReader {
+class RemoteDorisReader : public TableFormatReader {
     ENABLE_FACTORY_CREATOR(RemoteDorisReader);
 
 public:
@@ -49,10 +50,9 @@ class RemoteDorisReader : public GenericReader {
 
     Status init_reader();
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status close() override;
 
@@ -64,6 +64,9 @@ class RemoteDorisReader : public GenericReader {
         _col_name_to_block_idx = col_name_to_block_idx;
     }
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     arrow::Status init_stream();
     const TFileRangeDesc& _range;
diff --git a/be/src/format/table/table_format_reader.cpp b/be/src/format/table/table_format_reader.cpp
index 09144f04ebd625..fe7afaa3514bb8 100644
--- a/be/src/format/table/table_format_reader.cpp
+++ b/be/src/format/table/table_format_reader.cpp
@@ -17,658 +17,95 @@
 
 #include "format/table/table_format_reader.h"
 
-#include <gen_cpp/ExternalTableSchema_types.h>
+#include <gen_cpp/PlanNodes_types.h>
+#include <gen_cpp/Types_types.h>
 
-#include <algorithm>
-#include <string>
-
-#include "common/status.h"
-#include "core/block/block.h"
-#include "core/data_type/data_type_array.h"
-#include "core/data_type/data_type_map.h"
-#include "core/data_type/data_type_struct.h"
-#include "format/generic_reader.h"
+#include "runtime/descriptors.h"
 #include "util/string_util.h"
 
 namespace doris {
-#include "common/compile_check_begin.h"
-const Status TableSchemaChangeHelper::BuildTableInfoUtil::SCHEMA_ERROR = Status::NotSupported(
-        "In the parquet/orc reader, it is not possible to read scenarios where the complex column "
-        "types"
-        "of the table and the file are inconsistent.");
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
-        const TupleDescriptor* table_tuple_descriptor, const FieldDescriptor& parquet_field_desc,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node,
-        const std::set<TSlotId>* is_file_slot) {
-    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-    auto parquet_fields_schema = parquet_field_desc.get_fields_schema();
-    std::map<std::string, size_t> file_column_name_idx_map;
-    for (size_t idx = 0; idx < parquet_fields_schema.size(); idx++) {
-        file_column_name_idx_map.emplace(to_lower(parquet_fields_schema[idx].name), idx);
-    }
-
-    for (const auto& slot : table_tuple_descriptor->slots()) {
-        const auto& table_column_name = slot->col_name();
-        // https://github.com/apache/doris/pull/23369/files
-        if ((is_file_slot == nullptr || is_file_slot->contains(slot->id())) &&
-            file_column_name_idx_map.contains(table_column_name)) {
-            auto file_column_idx = file_column_name_idx_map[table_column_name];
-            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-            RETURN_IF_ERROR(by_parquet_name(slot->type(), parquet_fields_schema[file_column_idx],
-                                            field_node));
-
-            struct_node->add_children(table_column_name,
-                                      parquet_fields_schema[file_column_idx].name, field_node);
-        } else {
-            struct_node->add_not_exist_children(table_column_name);
-        }
-    }
-
-    node = struct_node;
-    return Status::OK();
-};
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
-        const DataTypePtr& table_data_type, const FieldSchema& file_field,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    switch (table_data_type->get_primitive_type()) {
-    case TYPE_MAP: {
-        if (file_field.data_type->get_primitive_type() != TYPE_MAP) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(file_field.children.size() == 2));
-        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
-
-        {
-            const auto& key_type = assert_cast<const DataTypePtr&>(
-                    assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
-                            ->get_key_type());
-
-            RETURN_IF_ERROR(by_parquet_name(key_type, file_field.children[0], key_node));
-        }
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
-        {
-            const auto& value_type = assert_cast<const DataTypePtr&>(
-                    assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
-                            ->get_value_type());
-
-            RETURN_IF_ERROR(by_parquet_name(value_type, file_field.children[1], value_node));
-        }
-        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
-        break;
-    }
-    case TYPE_ARRAY: {
-        if (file_field.data_type->get_primitive_type() != TYPE_ARRAY) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(file_field.children.size() == 1));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
-        const auto& element_type = assert_cast<const DataTypePtr&>(
-                assert_cast<const DataTypeArray*>(remove_nullable(table_data_type).get())
-                        ->get_nested_type());
-
-        RETURN_IF_ERROR(by_parquet_name(element_type, file_field.children[0], element_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
-        break;
-    }
-    case TYPE_STRUCT: {
-        if (file_field.data_type->get_primitive_type() != TYPE_STRUCT) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-
-        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-
-        const auto struct_data_type =
-                assert_cast<const DataTypeStruct*>(remove_nullable(table_data_type).get());
-
-        std::map<std::string, size_t> parquet_field_names;
-        for (size_t idx = 0; idx < file_field.children.size(); idx++) {
-            parquet_field_names.emplace(to_lower(file_field.children[idx].name), idx);
-        }
-        for (size_t idx = 0; idx < struct_data_type->get_elements().size(); idx++) {
-            const auto& doris_field_name = struct_data_type->get_element_name(idx);
 
-            if (parquet_field_names.contains(doris_field_name)) {
-                auto parquet_field_idx = parquet_field_names[doris_field_name];
-                std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-
-                RETURN_IF_ERROR(by_parquet_name(struct_data_type->get_element(idx),
-                                                file_field.children[parquet_field_idx],
-                                                field_node));
-                struct_node->add_children(doris_field_name,
-                                          file_field.children[parquet_field_idx].name, field_node);
-            } else {
-                struct_node->add_not_exist_children(doris_field_name);
+/* static */
+Status TableFormatReader::_extract_partition_values(
+        const TFileRangeDesc& range, const TupleDescriptor* tuple_descriptor,
+        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                partition_values) {
+    partition_values.clear();
+    if (range.__isset.columns_from_path_keys && tuple_descriptor != nullptr) {
+        std::unordered_map<std::string, const SlotDescriptor*> name_to_slot;
+        for (auto* slot : tuple_descriptor->slots()) {
+            name_to_slot[slot->col_name()] = slot;
+        }
+        for (size_t i = 0; i < range.columns_from_path_keys.size(); i++) {
+            const auto& key = range.columns_from_path_keys[i];
+            const auto& value = range.columns_from_path[i];
+            auto slot_it = name_to_slot.find(key);
+            if (slot_it != name_to_slot.end()) {
+                partition_values.emplace(key, std::make_tuple(value, slot_it->second));
             }
         }
-        node = struct_node;
-        break;
-    }
-    default: {
-        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
-        break;
-    }
     }
-
     return Status::OK();
 }
 
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
-        const TupleDescriptor* table_tuple_descriptor, const orc::Type* orc_type_ptr,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node,
-        const std::set<TSlotId>* is_file_slot) {
-    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+Status TableFormatReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
 
-    std::map<std::string, uint64_t> file_column_name_idx_map;
-    for (uint64_t idx = 0; idx < orc_type_ptr->getSubtypeCount(); idx++) {
-        // to_lower for match table column name.
-        file_column_name_idx_map.emplace(to_lower(orc_type_ptr->getFieldName(idx)), idx);
-    }
-
-    for (const auto& slot : table_tuple_descriptor->slots()) {
-        const auto& table_column_name = slot->col_name();
-        if ((is_file_slot == nullptr || is_file_slot->contains(slot->id())) &&
-            file_column_name_idx_map.contains(table_column_name)) {
-            auto file_column_idx = file_column_name_idx_map[table_column_name];
-            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-            RETURN_IF_ERROR(by_orc_name(slot->type(), orc_type_ptr->getSubtype(file_column_idx),
-                                        field_node));
-            struct_node->add_children(table_column_name,
-                                      orc_type_ptr->getFieldName(file_column_idx), field_node);
-        } else {
-            struct_node->add_not_exist_children(table_column_name);
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            ctx->column_names.push_back(desc.name);
         }
     }
-    node = struct_node;
-    return Status::OK();
-}
 
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
-        const DataTypePtr& table_data_type, const orc::Type* orc_root,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    switch (table_data_type->get_primitive_type()) {
-    case TYPE_MAP: {
-        if (orc_root->getKind() != orc::TypeKind::MAP) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 2));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
-        const auto& key_type = assert_cast<const DataTypePtr&>(
-                assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
-                        ->get_key_type());
-        RETURN_IF_ERROR(by_orc_name(key_type, orc_root->getSubtype(0), key_node));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
-        const auto& value_type = assert_cast<const DataTypePtr&>(
-                assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
-                        ->get_value_type());
-        RETURN_IF_ERROR(by_orc_name(value_type, orc_root->getSubtype(1), value_node));
-        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
-
-        break;
-    }
-    case TYPE_ARRAY: {
-        if (orc_root->getKind() != orc::TypeKind::LIST) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 1));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
-        const auto& element_type = assert_cast<const DataTypePtr&>(
-                assert_cast<const DataTypeArray*>(remove_nullable(table_data_type).get())
-                        ->get_nested_type());
+    // Build default table_info_node from file column names (case-insensitive matching).
+    // Subclasses (OrcReader, ParquetReader, Hive, Iceberg, etc.) override on_before_init_reader
+    // and build their own table_info_node AFTER calling _extract_partition_values.
+    // For simple readers (CSV, JSON, etc.) that do NOT override, we build it here.
+    std::unordered_map<std::string, DataTypePtr> file_columns;
+    RETURN_IF_ERROR(get_columns(&file_columns));
 
-        RETURN_IF_ERROR(by_orc_name(element_type, orc_root->getSubtype(0), element_node));
-        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
-        break;
+    // lowercase file name → original file name
+    std::unordered_map<std::string, std::string> lower_to_native;
+    for (const auto& [name, _] : file_columns) {
+        lower_to_native[doris::to_lower(name)] = name;
     }
-    case TYPE_STRUCT: {
-        if (orc_root->getKind() != orc::TypeKind::STRUCT) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-
-        const auto struct_data_type =
-                assert_cast<const DataTypeStruct*>(remove_nullable(table_data_type).get());
-        std::map<std::string, uint64_t> orc_field_names;
-        for (uint64_t idx = 0; idx < orc_root->getSubtypeCount(); idx++) {
-            orc_field_names.emplace(to_lower(orc_root->getFieldName(idx)), idx);
-        }
 
-        for (size_t idx = 0; idx < struct_data_type->get_elements().size(); idx++) {
-            const auto& doris_field_name = struct_data_type->get_element_name(idx);
-
-            if (orc_field_names.contains(doris_field_name)) {
-                std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-
-                auto orc_field_idx = orc_field_names[doris_field_name];
-                RETURN_IF_ERROR(by_orc_name(struct_data_type->get_element(idx),
-                                            orc_root->getSubtype(orc_field_idx), field_node));
-                struct_node->add_children(doris_field_name, orc_root->getFieldName(orc_field_idx),
-                                          field_node);
-            } else {
-                struct_node->add_not_exist_children(doris_field_name);
+    // Auto-compute missing columns for simple readers (CSV/JSON/Arrow/etc.).
+    // Parquet/ORC readers compute their own _fill_missing_defaults in _do_init_reader.
+    if (_column_descs) {
+        for (const auto& desc : *_column_descs) {
+            if (desc.category != ColumnCategory::REGULAR &&
+                desc.category != ColumnCategory::GENERATED) {
+                continue;
             }
-        }
-        node = struct_node;
-        break;
-    }
-    default: {
-        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
-        break;
-    }
-    }
-    return Status::OK();
-}
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_table_field_id(
-        const schema::external::TField table_schema, const schema::external::TField file_schema,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    switch (table_schema.type.type) {
-    case TPrimitiveType::MAP: {
-        if (file_schema.type.type != TPrimitiveType::MAP) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.map_field));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.__isset.key_field));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.__isset.value_field));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.key_field.field_ptr != nullptr));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.value_field.field_ptr != nullptr));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
-        RETURN_IF_ERROR(by_table_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
-                                          *file_schema.nestedField.map_field.key_field.field_ptr,
-                                          key_node));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
-        RETURN_IF_ERROR(by_table_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
-                                          *file_schema.nestedField.map_field.value_field.field_ptr,
-                                          value_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
-        break;
-    }
-    case TPrimitiveType::ARRAY: {
-        if (file_schema.type.type != TPrimitiveType::ARRAY) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.array_field));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.array_field.__isset.item_field));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.array_field.item_field.field_ptr != nullptr));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> item_node = nullptr;
-        RETURN_IF_ERROR(by_table_field_id(
-                *table_schema.nestedField.array_field.item_field.field_ptr,
-                *file_schema.nestedField.array_field.item_field.field_ptr, item_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(item_node);
-        break;
-    }
-    case TPrimitiveType::STRUCT: {
-        if (file_schema.type.type != TPrimitiveType::STRUCT) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
-
-        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.struct_field));
-
-        RETURN_IF_ERROR(by_table_field_id(table_schema.nestedField.struct_field,
-                                          file_schema.nestedField.struct_field, node));
-        break;
-    }
-    default: {
-        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
-        break;
-    }
-    }
-
-    return Status::OK();
-}
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_table_field_id(
-        const schema::external::TStructField& table_schema,
-        const schema::external::TStructField& file_schema,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    std::map<int32_t, size_t> file_field_id_to_idx;
-    for (size_t idx = 0; idx < file_schema.fields.size(); ++idx) {
-        file_field_id_to_idx.emplace(file_schema.fields[idx].field_ptr->id, idx);
-    }
-    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-
-    for (const auto& table_field : table_schema.fields) {
-        const auto& table_column_name = table_field.field_ptr->name;
-
-        if (file_field_id_to_idx.contains(table_field.field_ptr->id)) {
-            const auto& file_field =
-                    file_schema.fields.at(file_field_id_to_idx[table_field.field_ptr->id]);
-
-            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-            RETURN_IF_ERROR(
-                    by_table_field_id(*table_field.field_ptr, *file_field.field_ptr, field_node));
-
-            struct_node->add_children(table_column_name, file_field.field_ptr->name, field_node);
-        } else {
-            struct_node->add_not_exist_children(table_column_name);
-        }
-    }
-    node = std::move(struct_node);
-    return Status::OK();
-}
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_field_id(
-        const schema::external::TField& table_schema, const FieldSchema& parquet_field,
-        const bool exist_field_id, std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    switch (table_schema.type.type) {
-    case TPrimitiveType::MAP: {
-        if (parquet_field.data_type->get_primitive_type() != TYPE_MAP) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(parquet_field.children.size() == 2));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
-        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
-
-        RETURN_IF_ERROR(by_parquet_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
-                                            parquet_field.children[0], exist_field_id, key_node));
-
-        RETURN_IF_ERROR(
-                by_parquet_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
-                                    parquet_field.children[1], exist_field_id, value_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
-        break;
-    }
-    case TPrimitiveType::ARRAY: {
-        if (parquet_field.data_type->get_primitive_type() != TYPE_ARRAY) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(parquet_field.children.size() == 1));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
-        RETURN_IF_ERROR(
-                by_parquet_field_id(*table_schema.nestedField.array_field.item_field.field_ptr,
-                                    parquet_field.children[0], exist_field_id, element_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
-        break;
-    }
-    case TPrimitiveType::STRUCT: {
-        if (parquet_field.data_type->get_primitive_type() != TYPE_STRUCT) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
-
-        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-
-        if (exist_field_id) {
-            std::map<int32_t, size_t> file_column_id_idx_map;
-            for (size_t idx = 0; idx < parquet_field.children.size(); idx++) {
-                DCHECK_NE(parquet_field.children[idx].field_id, -1);
-                file_column_id_idx_map.emplace(parquet_field.children[idx].field_id, idx);
+            // Skip columns already handled as partition columns to avoid double-fill.
+            if (_fill_partition_values.contains(desc.name)) {
+                continue;
             }
-
-            for (const auto& table_field : table_schema.nestedField.struct_field.fields) {
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (file_column_id_idx_map.contains(table_field.field_ptr->id)) {
-                    const auto& file_field = parquet_field.children.at(
-                            file_column_id_idx_map[table_field.field_ptr->id]);
-                    std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                    RETURN_IF_ERROR(by_parquet_field_id(*table_field.field_ptr, file_field,
-                                                        exist_field_id, field_node));
-                    struct_node->add_children(table_column_name, file_field.name, field_node);
-                } else {
-                    struct_node->add_not_exist_children(table_column_name);
-                }
-            }
-        } else {
-            std::map<std::string, size_t> file_column_idx_map;
-            for (size_t idx = 0; idx < parquet_field.children.size(); idx++) {
-                file_column_idx_map.emplace(parquet_field.children[idx].name, idx);
+            if (!lower_to_native.contains(doris::to_lower(desc.name))) {
+                _fill_missing_defaults[desc.name] = desc.default_expr;
+                _fill_missing_cols.insert(desc.name);
             }
-
-            for (const auto& table_field : table_schema.nestedField.struct_field.fields) {
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (!table_field.field_ptr->__isset.name_mapping ||
-                    table_field.field_ptr->name_mapping.size() == 0) {
-                    return Status::DataQualityError(
-                            "name_mapping must be set when read missing field id data file.");
-                }
-
-                auto have_mapping = false;
-                for (const auto& mapped_name : table_field.field_ptr->name_mapping) {
-                    if (file_column_idx_map.contains(mapped_name)) {
-                        std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                        const auto& file_field =
-                                parquet_field.children.at(file_column_idx_map.at(mapped_name));
-                        RETURN_IF_ERROR(by_parquet_field_id(*table_field.field_ptr, file_field,
-                                                            exist_field_id, field_node));
-                        struct_node->add_children(table_column_name, file_field.name, field_node);
-                        have_mapping = true;
-                        break;
-                    }
-                }
-                if (!have_mapping) {
-                    struct_node->add_not_exist_children(table_column_name);
-                }
-            }
-        }
-        node = struct_node;
-        break;
-    }
-    default: {
-        node = std::make_shared<ScalarNode>();
-        break;
-    }
-    }
-    return Status::OK();
-}
-
-Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_field_id(
-        const schema::external::TField& table_schema, const orc::Type* orc_root,
-        const std::string& field_id_attribute_key, const bool exist_field_id,
-        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
-    switch (table_schema.type.type) {
-    case TPrimitiveType::MAP: {
-        if (orc_root->getKind() != orc::TypeKind::MAP) [[unlikely]] {
-            return SCHEMA_ERROR;
         }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 2));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
-        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
-
-        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
-                                        orc_root->getSubtype(0), field_id_attribute_key,
-                                        exist_field_id, key_node));
-
-        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
-                                        orc_root->getSubtype(1), field_id_attribute_key,
-                                        exist_field_id, value_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
-        break;
     }
-    case TPrimitiveType::ARRAY: {
-        if (orc_root->getKind() != orc::TypeKind::LIST) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
-
-        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 1));
-
-        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
-        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.array_field.item_field.field_ptr,
-                                        orc_root->getSubtype(0), field_id_attribute_key,
-                                        exist_field_id, element_node));
-
-        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
-        break;
-    }
-    case TPrimitiveType::STRUCT: {
-        if (orc_root->getKind() != orc::TypeKind::STRUCT) [[unlikely]] {
-            return SCHEMA_ERROR;
-        }
-        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
-        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
-
-        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
-        if (exist_field_id) {
-            std::map<int32_t, size_t> file_column_id_idx_map;
-            for (size_t idx = 0; idx < orc_root->getSubtypeCount(); idx++) {
-                auto field_id = std::stoi(
-                        orc_root->getSubtype(idx)->getAttributeValue(field_id_attribute_key));
-                file_column_id_idx_map.emplace(field_id, idx);
-            }
 
-            for (const auto& table_field : table_schema.nestedField.struct_field.fields) {
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (file_column_id_idx_map.contains(table_field.field_ptr->id)) {
-                    auto file_field_idx = file_column_id_idx_map[table_field.field_ptr->id];
-                    const auto& file_field = orc_root->getSubtype(file_field_idx);
-                    std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                    RETURN_IF_ERROR(by_orc_field_id(*table_field.field_ptr, file_field,
-                                                    field_id_attribute_key, exist_field_id,
-                                                    field_node));
-                    struct_node->add_children(table_column_name,
-                                              orc_root->getFieldName(file_field_idx), field_node);
-                } else {
-                    struct_node->add_not_exist_children(table_column_name);
-                }
-            }
+    auto info_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+    for (const auto* slot : ctx->tuple_descriptor->slots()) {
+        auto it = lower_to_native.find(slot->col_name_lower_case());
+        if (it != lower_to_native.end()) {
+            info_node->add_children(slot->col_name(), it->second,
+                                    TableSchemaChangeHelper::ConstNode::get_instance());
         } else {
-            std::map<std::string, size_t> file_column_idx_map;
-
-            for (size_t idx = 0; idx < orc_root->getSubtypeCount(); idx++) {
-                file_column_idx_map.emplace(orc_root->getFieldName(idx), idx);
-            }
-
-            for (const auto& table_field : table_schema.nestedField.struct_field.fields) {
-                const auto& table_column_name = table_field.field_ptr->name;
-                if (!table_field.field_ptr->__isset.name_mapping ||
-                    table_field.field_ptr->name_mapping.size() == 0) {
-                    return Status::DataQualityError(
-                            "name_mapping must be set when read missing field id data file.");
-                }
-                auto have_mapping = false;
-                for (const auto& mapped_name : table_field.field_ptr->name_mapping) {
-                    if (file_column_idx_map.contains(mapped_name)) {
-                        std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
-                        auto file_field_idx = file_column_idx_map.at(mapped_name);
-                        const auto& file_field = orc_root->getSubtype(file_field_idx);
-
-                        RETURN_IF_ERROR(by_orc_field_id(*table_field.field_ptr, file_field,
-                                                        field_id_attribute_key, exist_field_id,
-                                                        field_node));
-                        struct_node->add_children(table_column_name,
-                                                  orc_root->getFieldName(file_field_idx),
-                                                  field_node);
-                        have_mapping = true;
-                        break;
-                    }
-                }
-                if (!have_mapping) {
-                    struct_node->add_not_exist_children(table_column_name);
-                }
-            }
+            info_node->add_not_exist_children(slot->col_name());
         }
-
-        node = struct_node;
-        break;
-    }
-    default: {
-        node = std::make_shared<ScalarNode>();
-        break;
-    }
     }
+    ctx->table_info_node = info_node;
 
     return Status::OK();
 }
 
-std::string TableSchemaChangeHelper::debug(const std::shared_ptr<Node>& root, size_t level) {
-    std::string ans;
-
-    auto indent = [](size_t level) { return std::string(level * 2, ' '); };
-
-    std::string prefix = indent(level);
-
-    if (std::dynamic_pointer_cast<ScalarNode>(root)) {
-        ans += prefix + "ScalarNode\n";
-    } else if (auto struct_node = std::dynamic_pointer_cast<StructNode>(root)) {
-        ans += prefix + "StructNode\n";
-        for (const auto& [table_col_name, value] : struct_node->get_children()) {
-            ans += indent(level + 1) + table_col_name;
-            if (value.exists) {
-                ans += " (file: " + value.column_name + ")";
-            } else {
-                ans += " (not exists)";
-            }
-            ans += "\n";
-            if (value.node) {
-                ans += debug(value.node, level + 2);
-            }
-        }
-    } else if (auto array_node = std::dynamic_pointer_cast<ArrayNode>(root)) {
-        ans += prefix + "ArrayNode\n";
-        ans += indent(level + 1) + "Element:\n";
-        ans += debug(array_node->get_element_node(), level + 2);
-    } else if (auto map_node = std::dynamic_pointer_cast<MapNode>(root)) {
-        ans += prefix + "MapNode\n";
-        ans += indent(level + 1) + "Key:\n";
-        ans += debug(map_node->get_key_node(), level + 2);
-        ans += indent(level + 1) + "Value:\n";
-        ans += debug(map_node->get_value_node(), level + 2);
-    } else if (std::dynamic_pointer_cast<ConstNode>(root)) {
-        ans += prefix + "ConstNode\n";
-    } else {
-        ans += prefix + "UnknownNodeType\n";
-    }
-
-    return ans;
-}
-#include "common/compile_check_end.h"
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/format/table/table_format_reader.h b/be/src/format/table/table_format_reader.h
index 55c93e773bed80..d889420944203a 100644
--- a/be/src/format/table/table_format_reader.h
+++ b/be/src/format/table/table_format_reader.h
@@ -17,406 +17,188 @@
 
 #pragma once
 
-#include <algorithm>
-#include <cstddef>
+#include <functional>
 #include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
 
 #include "common/status.h"
-#include "core/block/block.h"
-#include "core/data_type/data_type_array.h"
-#include "core/data_type/data_type_map.h"
-#include "core/data_type/data_type_struct.h"
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "exprs/vexpr_fwd.h"
 #include "format/generic_reader.h"
-#include "format/parquet/schema_desc.h"
-#include "runtime/runtime_profile.h"
-#include "runtime/runtime_state.h"
-#include "storage/olap_scan_common.h"
-#include "util/string_util.h"
 
 namespace doris {
 class TFileRangeDesc;
-class Block;
+class TupleDescriptor;
+class SlotDescriptor;
 } // namespace doris
 
 namespace doris {
 #include "common/compile_check_begin.h"
+
+/// Intermediate base class for "table readers" used by FileScanner.
+///
+/// Owns all column-filling state and logic:
+///   - partition column values (from path metadata)
+///   - missing column defaults (columns not in file)
+///   - synthesized column handlers (e.g. Iceberg $row_id)
+///
+/// Provides default on_after_read_block that auto-fills these columns.
+/// Parquet/ORC override to no-op (they fill per-batch internally).
+///
+/// Also provides the default on_before_init_reader for simple readers
+/// (CSV, JSON, etc.) that auto-computes partition/missing columns.
+/// ORC/Parquet override on_before_init_reader with format-specific schema matching.
 class TableFormatReader : public GenericReader {
 public:
-    TableFormatReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeState* state,
-                      RuntimeProfile* profile, const TFileScanRangeParams& params,
-                      const TFileRangeDesc& range, io::IOContext* io_ctx, FileMetaCache* meta_cache)
-            : _file_format_reader(std::move(file_format_reader)),
-              _state(state),
-              _profile(profile),
-              _params(params),
-              _range(range),
-              _io_ctx(io_ctx) {
-        _meta_cache = meta_cache;
-        if (range.table_format_params.__isset.table_level_row_count) {
-            _table_level_row_count = range.table_format_params.table_level_row_count;
-        } else {
-            _table_level_row_count = -1;
+    /// Get missing columns computed by on_before_init_reader / get_columns().
+    const std::unordered_set<std::string>& missing_cols() const { return _fill_missing_cols; }
+
+    // ---- Fill-column hooks (called by RowGroupReader and ORC per-batch reading) ----
+
+    /// Fill partition columns from metadata values.
+    virtual Status on_fill_partition_columns(Block* block, size_t rows,
+                                             const std::vector<std::string>& cols) {
+        DataTypeSerDe::FormatOptions text_format_options;
+        for (const auto& col_name : cols) {
+            auto it = _fill_partition_values.find(col_name);
+            if (it == _fill_partition_values.end()) {
+                continue;
+            }
+            auto col_ptr = block->get_by_position((*_fill_col_name_to_block_idx)[col_name])
+                                   .column->assume_mutable();
+            const auto& [value, slot_desc] = it->second;
+            auto text_serde = slot_desc->get_data_type_ptr()->get_serde();
+            Slice slice(value.data(), value.size());
+            uint64_t num_deserialized = 0;
+            if (text_serde->deserialize_column_from_fixed_json(
+                        *col_ptr, slice, rows, &num_deserialized, text_format_options) !=
+                Status::OK()) {
+                return Status::InternalError("Failed to fill partition column: {}={}",
+                                             slot_desc->col_name(), value);
+            }
+            if (num_deserialized != rows) {
+                return Status::InternalError(
+                        "Failed to fill partition column: {}={}. "
+                        "Expected rows: {}, actual: {}",
+                        slot_desc->col_name(), value, num_deserialized, rows);
+            }
         }
+        return Status::OK();
     }
-    ~TableFormatReader() override = default;
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) final {
-        if (_push_down_agg_type == TPushAggOp::type::COUNT && _table_level_row_count >= 0) {
-            auto rows =
-                    std::min(_table_level_row_count, (int64_t)_state->query_options().batch_size);
-            _table_level_row_count -= rows;
-            auto mutate_columns = block->mutate_columns();
-            for (auto& col : mutate_columns) {
-                col->resize(rows);
+
+    /// Fill missing columns with default values or null.
+    virtual Status on_fill_missing_columns(Block* block, size_t rows,
+                                           const std::vector<std::string>& cols) {
+        for (const auto& col_name : cols) {
+            if (!_fill_col_name_to_block_idx->contains(col_name)) {
+                return Status::InternalError("Missing column: {} not found in block {}", col_name,
+                                             block->dump_structure());
             }
-            block->set_columns(std::move(mutate_columns));
-            *read_rows = rows;
-            if (_table_level_row_count == 0) {
-                *eof = true;
+            auto it = _fill_missing_defaults.find(col_name);
+            VExprContextSPtr ctx = (it != _fill_missing_defaults.end()) ? it->second : nullptr;
+
+            if (ctx == nullptr) {
+                auto mutable_column =
+                        block->get_by_position((*_fill_col_name_to_block_idx)[col_name])
+                                .column->assume_mutable();
+                auto* nullable_column = static_cast<ColumnNullable*>(mutable_column.get());
+                nullable_column->insert_many_defaults(rows);
+            } else {
+                ColumnPtr result_column_ptr;
+                RETURN_IF_ERROR(ctx->execute(block, result_column_ptr));
+                if (result_column_ptr->use_count() == 1) {
+                    auto mutable_column = result_column_ptr->assume_mutable();
+                    mutable_column->resize(rows);
+                    result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
+                    auto origin_column_type =
+                            block->get_by_position((*_fill_col_name_to_block_idx)[col_name]).type;
+                    bool is_nullable = origin_column_type->is_nullable();
+                    block->replace_by_position(
+                            (*_fill_col_name_to_block_idx)[col_name],
+                            is_nullable ? make_nullable(result_column_ptr) : result_column_ptr);
+                }
             }
-
-            return Status::OK();
         }
-        return get_next_block_inner(block, read_rows, eof);
+        return Status::OK();
     }
 
-    virtual Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) = 0;
-
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) final {
-        return _file_format_reader->get_columns(name_to_type, missing_cols);
-    }
+    // ---- Synthesized column handler registry ----
 
-    Status get_parsed_schema(std::vector<std::string>* col_names,
-                             std::vector<DataTypePtr>* col_types) override {
-        return _file_format_reader->get_parsed_schema(col_names, col_types);
-    }
+    using SynthesizedColumnHandler = std::function<Status(Block* block, size_t rows)>;
 
-    Status set_fill_columns(
-            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
-                    partition_columns,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) final {
-        return _file_format_reader->set_fill_columns(partition_columns, missing_columns);
+    void register_synthesized_column_handler(const std::string& col_name,
+                                             SynthesizedColumnHandler handler) {
+        _synthesized_col_handlers.emplace_back(col_name, std::move(handler));
     }
 
-    bool fill_all_columns() const override { return _file_format_reader->fill_all_columns(); }
-
-    virtual Status init_row_filters() = 0;
-
-    bool count_read_rows() override { return _file_format_reader->count_read_rows(); }
-
-    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override {
-        _file_format_reader->set_condition_cache_context(std::move(ctx));
+    Status fill_synthesized_columns(Block* block, size_t rows) {
+        for (auto& [name, handler] : _synthesized_col_handlers) {
+            RETURN_IF_ERROR(handler(block, rows));
+        }
+        return Status::OK();
     }
 
-    bool has_delete_operations() const override {
-        return _file_format_reader->has_delete_operations();
+    /// Unified fill for partition + missing + synthesized columns.
+    /// Called automatically by on_after_read_block for simple readers.
+    /// Parquet/ORC call individual on_fill_* methods per-batch internally.
+    Status fill_remaining_columns(Block* block, size_t rows) {
+        std::vector<std::string> part_col_names;
+        for (auto& kv : _fill_partition_values) {
+            part_col_names.push_back(kv.first);
+        }
+        RETURN_IF_ERROR(on_fill_partition_columns(block, rows, part_col_names));
+        std::vector<std::string> miss_col_names;
+        for (auto& kv : _fill_missing_defaults) {
+            miss_col_names.push_back(kv.first);
+        }
+        RETURN_IF_ERROR(on_fill_missing_columns(block, rows, miss_col_names));
+        RETURN_IF_ERROR(fill_synthesized_columns(block, rows));
+        return Status::OK();
     }
 
-    int64_t get_total_rows() const override { return _file_format_reader->get_total_rows(); }
+    bool has_synthesized_column_handlers() const { return !_synthesized_col_handlers.empty(); }
 
-protected:
-    std::string _table_format;                          // hudi, iceberg, paimon
-    std::unique_ptr<GenericReader> _file_format_reader; // parquet, orc
-    RuntimeState* _state = nullptr;                     // for query options
-    RuntimeProfile* _profile = nullptr;
-    const TFileScanRangeParams& _params;
-    const TFileRangeDesc& _range;
-    io::IOContext* _io_ctx = nullptr;
-    int64_t _table_level_row_count = -1; // for optimization of count(*) push down
-    void _collect_profile_before_close() override {
-        if (_file_format_reader != nullptr) {
-            _file_format_reader->collect_profile_before_close();
-        }
+    /// Fill generated columns. Default is no-op.
+    virtual Status on_fill_generated_columns(Block* block, size_t rows,
+                                             const std::vector<std::string>& cols) {
+        return Status::OK();
     }
-};
-
-class TableSchemaChangeHelper {
-public:
-    ~TableSchemaChangeHelper() = default;
-
-    class Node {
-    public:
-        virtual ~Node() = default;
-        virtual std::shared_ptr<Node> get_children_node(std::string table_column_name) const {
-            throw std::logic_error("get_children_node should not be called on base TableInfoNode");
-        };
 
-        virtual std::shared_ptr<Node> get_children_node_by_file_column_name(
-                std::string file_column_name) const {
-            throw std::logic_error(
-                    "get_children_node_by_file_column_name should not be called on base "
-                    "TableInfoNode");
-        };
-
-        virtual std::string children_file_column_name(std::string table_column_name) const {
-            throw std::logic_error(
-                    "children_file_column_name should not be called on base TableInfoNode");
-        }
-
-        virtual bool children_column_exists(std::string table_column_name) const {
-            throw std::logic_error(
-                    "children_column_exists should not be called on base TableInfoNode");
-        }
-
-        virtual std::shared_ptr<Node> get_element_node() const {
-            throw std::logic_error("get_element_node should not be called on base TableInfoNode");
-        }
-
-        virtual std::shared_ptr<Node> get_key_node() const {
-            throw std::logic_error("get_key_node should not be called on base TableInfoNode");
-        }
-        virtual std::shared_ptr<Node> get_value_node() const {
-            throw std::logic_error("get_value_node should not be called on base TableInfoNode");
-        }
-
-        virtual void add_not_exist_children(std::string table_column_name) {
-            throw std::logic_error(
-                    "add_not_exist_children should not be called on base TableInfoNode");
-        };
-
-        virtual void add_children(std::string table_column_name, std::string file_column_name,
-                                  std::shared_ptr<Node> children_node) {
-            throw std::logic_error("add_children should not be called on base TableInfoNode");
-        }
-    };
-
-    class ScalarNode : public Node {};
-
-    class StructNode : public Node {
-        struct StructChild {
-            const std::shared_ptr<Node> node;
-            const std::string column_name;
-            const bool exists;
-        };
-
-        // table column name -> { node, file_column_name, exists_in_file}
-        std::map<std::string, StructChild> children;
-
-    public:
-        std::shared_ptr<Node> get_children_node(std::string table_column_name) const override {
-            DCHECK(children.contains(table_column_name));
-            DCHECK(children_column_exists(table_column_name));
-            return children.at(table_column_name).node;
-        }
-
-        std::shared_ptr<Node> get_children_node_by_file_column_name(
-                std::string file_column_name) const override {
-            // Search for the child by file column name
-            for (const auto& [table_name, child] : children) {
-                if (child.exists && child.column_name == file_column_name) {
-                    return child.node;
-                }
-            }
-            // Not found - throw or return nullptr
-            throw std::runtime_error("File column name '" + file_column_name +
-                                     "' not found in struct children");
-        }
-
-        std::string children_file_column_name(std::string table_column_name) const override {
-            DCHECK(children.contains(table_column_name));
-            DCHECK(children_column_exists(table_column_name));
-            return children.at(table_column_name).column_name;
-        }
-
-        bool children_column_exists(std::string table_column_name) const override {
-            DCHECK(children.contains(table_column_name));
-            return children.at(table_column_name).exists;
-        }
-
-        void add_not_exist_children(std::string table_column_name) override {
-            children.emplace(table_column_name, StructChild {nullptr, "", false});
-        }
-
-        void add_children(std::string table_column_name, std::string file_column_name,
-                          std::shared_ptr<Node> children_node) override {
-            children.emplace(table_column_name,
-                             StructChild {children_node, file_column_name, true});
-        }
-
-        const std::map<std::string, StructChild>& get_children() const { return children; }
-    };
-
-    class ArrayNode : public Node {
-        std::shared_ptr<Node> _element_node;
-
-    public:
-        ArrayNode(const std::shared_ptr<Node>& element_node) : _element_node(element_node) {}
-
-        std::shared_ptr<Node> get_element_node() const override { return _element_node; }
-    };
-
-    class MapNode : public Node {
-        std::shared_ptr<Node> _key_node;
-        std::shared_ptr<Node> _value_node;
-
-    public:
-        MapNode(const std::shared_ptr<Node>& key_node, const std::shared_ptr<Node>& value_node)
-                : _key_node(key_node), _value_node(value_node) {}
-
-        std::shared_ptr<Node> get_key_node() const override { return _key_node; }
-
-        std::shared_ptr<Node> get_value_node() const override { return _value_node; }
-    };
-
-    class ConstNode : public Node {
-        // If you can be sure that there has been no schema change between the table and the file,
-        // you can use constNode (of course, you need to pay attention to case sensitivity).
-    public:
-        std::shared_ptr<Node> get_children_node(std::string table_column_name) const override {
-            return get_instance();
-        };
-
-        std::shared_ptr<Node> get_children_node_by_file_column_name(
-                std::string file_column_name) const override {
-            return get_instance();
-        };
-
-        std::string children_file_column_name(std::string table_column_name) const override {
-            return table_column_name;
-        }
-
-        bool children_column_exists(std::string table_column_name) const override { return true; }
-
-        std::shared_ptr<Node> get_element_node() const override { return get_instance(); }
-
-        std::shared_ptr<Node> get_key_node() const override { return get_instance(); }
-
-        std::shared_ptr<Node> get_value_node() const override { return get_instance(); }
-
-        static const std::shared_ptr<ConstNode>& get_instance() {
-            static const std::shared_ptr<ConstNode> instance = std::make_shared<ConstNode>();
-            return instance;
-        }
-    };
-
-    static std::string debug(const std::shared_ptr<Node>& root, size_t level = 0);
-
-protected:
-    // Whenever external components invoke the Parquet/ORC reader (e.g., init_reader, get_next_block, set_fill_columns),
-    // the parameters passed in are based on `table column names`.
-    // The table_info_node_ptr assists the Parquet/ORC reader in mapping these to the actual
-    // `file columns name` to be read and enables min/max filtering.
-    std::shared_ptr<Node> table_info_node_ptr = std::make_shared<StructNode>();
+    /// Default on_before_init_reader for simple readers (CSV, JSON, etc.).
+    /// Auto-computes partition values, missing columns, and table_info_node.
+    /// ORC/Parquet/Hive/Iceberg override with format-specific schema matching.
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
 protected:
-    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
-                                           int64_t split_schema_id,
-                                           const TupleDescriptor* tuple_descriptor,
-                                           const FieldDescriptor& parquet_field_desc) {
-        if (!params.__isset.history_schema_info) [[unlikely]] {
-            RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(
-                    tuple_descriptor, parquet_field_desc, table_info_node_ptr));
-            return Status::OK();
+    /// Default on_after_read_block: auto-fill partition/missing/synthesized columns.
+    /// Parquet/ORC override to no-op (they fill per-batch internally).
+    Status on_after_read_block(Block* block, size_t* read_rows) override {
+        if (*read_rows > 0 && _push_down_agg_type != TPushAggOp::type::COUNT) {
+            RETURN_IF_ERROR(fill_remaining_columns(block, *read_rows));
         }
-        return gen_table_info_node_by_field_id(params, split_schema_id);
-    }
-
-    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
-                                           int64_t split_schema_id,
-                                           const TupleDescriptor* tuple_descriptor,
-                                           const orc::Type* orc_type_ptr) {
-        if (!params.__isset.history_schema_info) [[unlikely]] {
-            RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(tuple_descriptor, orc_type_ptr,
-                                                            table_info_node_ptr));
-            return Status::OK();
-        }
-        return gen_table_info_node_by_field_id(params, split_schema_id);
-    }
-
-private:
-    // The filed id of both the table and the file come from the pass from fe. (params.history_schema_info)
-    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
-                                           int64_t split_schema_id) {
-        if (params.current_schema_id == split_schema_id) {
-            table_info_node_ptr = ConstNode::get_instance();
-            return Status::OK();
-        }
-
-        int32_t table_schema_idx = -1;
-        int32_t file_schema_idx = -1;
-        //todo : Perhaps this process can be optimized by pre-generating a map
-        for (int32_t idx = 0; idx < params.history_schema_info.size(); idx++) {
-            if (params.history_schema_info[idx].schema_id == params.current_schema_id) {
-                table_schema_idx = idx;
-            } else if (params.history_schema_info[idx].schema_id == split_schema_id) {
-                file_schema_idx = idx;
-            }
-        }
-
-        if (table_schema_idx == -1 || file_schema_idx == -1) [[unlikely]] {
-            return Status::InternalError(
-                    "miss table/file schema info, table_schema_idx:{}  file_schema_idx:{}",
-                    table_schema_idx, file_schema_idx);
-        }
-        RETURN_IF_ERROR(BuildTableInfoUtil::by_table_field_id(
-                params.history_schema_info.at(table_schema_idx).root_field,
-                params.history_schema_info.at(file_schema_idx).root_field, table_info_node_ptr));
         return Status::OK();
     }
 
-public:
-    /* Schema change Util. Used to generate `std::shared_ptr<TableSchemaChangeHelper::Node> node`.
-        Passed node to parquet/orc reader to find file columns based on table columns,
-    */
-    struct BuildTableInfoUtil {
-        static const Status SCHEMA_ERROR;
-
-        // todo : Maybe I can use templates to implement this functionality.
-
-        // for hive parquet : The table column names passed from fe are lowercase, so use lowercase file column names to match table column names.
-        static Status by_parquet_name(const TupleDescriptor* table_tuple_descriptor,
-                                      const FieldDescriptor& parquet_field_desc,
-                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node,
-                                      const std::set<TSlotId>* is_file_slot = nullptr);
-
-        // for hive parquet
-        static Status by_parquet_name(const DataTypePtr& table_data_type,
-                                      const FieldSchema& file_field,
-                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-
-        // for hive orc: The table column names passed from fe are lowercase, so use lowercase file column names to match table column names.
-        static Status by_orc_name(const TupleDescriptor* table_tuple_descriptor,
-                                  const orc::Type* orc_type_ptr,
-                                  std::shared_ptr<TableSchemaChangeHelper::Node>& node,
-                                  const std::set<TSlotId>* is_file_slot = nullptr);
-        // for hive orc
-        static Status by_orc_name(const DataTypePtr& table_data_type, const orc::Type* orc_root,
-                                  std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-
-        // for paimon hudi: Use the field id in the `table schema` and `history table schema` to match columns.
-        static Status by_table_field_id(const schema::external::TField table_schema,
-                                        const schema::external::TField file_schema,
-                                        std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-
-        // for paimon hudi
-        static Status by_table_field_id(const schema::external::TStructField& table_schema,
-                                        const schema::external::TStructField& file_schema,
-                                        std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-
-        // for iceberg parquet
-        static Status by_parquet_field_id(const schema::external::TField& table_schema,
-                                          const FieldSchema& parquet_field,
-                                          const bool exist_field_id,
-                                          std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-
-        // for iceberg orc
-        static Status by_orc_field_id(const schema::external::TField& table_schema,
-                                      const orc::Type* orc_root,
-                                      const std::string& field_id_attribute_key,
-                                      const bool exist_field_id,
-                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node);
-    };
-};
-
-struct ColumnIdResult {
-    std::set<uint64_t> column_ids;
-    std::set<uint64_t> filter_column_ids;
-
-    ColumnIdResult() = default; // Add default constructor
-
-    ColumnIdResult(std::set<uint64_t> column_ids_, std::set<uint64_t> filter_column_ids_)
-            : column_ids(std::move(column_ids_)),
-              filter_column_ids(std::move(filter_column_ids_)) {}
+    /// Extracts partition key→value pairs from the file range.
+    /// Static utility called by on_before_init_reader implementations.
+    static Status _extract_partition_values(
+            const TFileRangeDesc& range, const TupleDescriptor* tuple_descriptor,
+            std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
+                    partition_values);
+
+    // ---- Fill column data (set by on_before_init_reader / _do_init_reader) ----
+    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
+            _fill_partition_values;
+    std::unordered_map<std::string, VExprContextSPtr> _fill_missing_defaults;
+    std::unordered_map<std::string, uint32_t>* _fill_col_name_to_block_idx = nullptr;
+    std::unordered_set<std::string> _fill_missing_cols;
+
+    // ---- Synthesized column handlers ----
+    std::vector<std::pair<std::string, SynthesizedColumnHandler>> _synthesized_col_handlers;
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/table/table_schema_change_helper.cpp b/be/src/format/table/table_schema_change_helper.cpp
new file mode 100644
index 00000000000000..48492b58dad967
--- /dev/null
+++ b/be/src/format/table/table_schema_change_helper.cpp
@@ -0,0 +1,657 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "table_schema_change_helper.h"
+
+#include <gen_cpp/ExternalTableSchema_types.h>
+
+#include <algorithm>
+#include <string>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_struct.h"
+#include "format/generic_reader.h"
+#include "util/string_util.h"
+
+namespace doris {
+#include "common/compile_check_begin.h"
+const Status TableSchemaChangeHelper::BuildTableInfoUtil::SCHEMA_ERROR = Status::NotSupported(
+        "In the parquet/orc reader, it is not possible to read scenarios where the complex column "
+        "types"
+        "of the table and the file are inconsistent.");
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
+        const TupleDescriptor* table_tuple_descriptor, const FieldDescriptor& parquet_field_desc,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+        const std::set<TSlotId>* is_file_slot) {
+    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+    auto parquet_fields_schema = parquet_field_desc.get_fields_schema();
+    std::map<std::string, size_t> file_column_name_idx_map;
+    for (size_t idx = 0; idx < parquet_fields_schema.size(); idx++) {
+        file_column_name_idx_map.emplace(to_lower(parquet_fields_schema[idx].name), idx);
+    }
+
+    for (const auto& slot : table_tuple_descriptor->slots()) {
+        const auto& table_column_name = slot->col_name();
+        // https://github.com/apache/doris/pull/23369/files
+        if ((is_file_slot == nullptr || is_file_slot->contains(slot->id())) &&
+            file_column_name_idx_map.contains(table_column_name)) {
+            auto file_column_idx = file_column_name_idx_map[table_column_name];
+            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+            RETURN_IF_ERROR(by_parquet_name(slot->type(), parquet_fields_schema[file_column_idx],
+                                            field_node));
+
+            struct_node->add_children(table_column_name,
+                                      parquet_fields_schema[file_column_idx].name, field_node);
+        } else {
+            struct_node->add_not_exist_children(table_column_name);
+        }
+    }
+
+    node = struct_node;
+    return Status::OK();
+};
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name(
+        const DataTypePtr& table_data_type, const FieldSchema& file_field,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
+    switch (table_data_type->get_primitive_type()) {
+    case TYPE_MAP: {
+        if (file_field.data_type->get_primitive_type() != TYPE_MAP) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(file_field.children.size() == 2));
+        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
+
+        {
+            const auto& key_type = assert_cast<const DataTypePtr&>(
+                    assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
+                            ->get_key_type());
+
+            RETURN_IF_ERROR(by_parquet_name(key_type, file_field.children[0], key_node));
+        }
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
+        {
+            const auto& value_type = assert_cast<const DataTypePtr&>(
+                    assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
+                            ->get_value_type());
+
+            RETURN_IF_ERROR(by_parquet_name(value_type, file_field.children[1], value_node));
+        }
+        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
+        break;
+    }
+    case TYPE_ARRAY: {
+        if (file_field.data_type->get_primitive_type() != TYPE_ARRAY) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(file_field.children.size() == 1));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
+        const auto& element_type = assert_cast<const DataTypePtr&>(
+                assert_cast<const DataTypeArray*>(remove_nullable(table_data_type).get())
+                        ->get_nested_type());
+
+        RETURN_IF_ERROR(by_parquet_name(element_type, file_field.children[0], element_node));
+
+        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
+        break;
+    }
+    case TYPE_STRUCT: {
+        if (file_field.data_type->get_primitive_type() != TYPE_STRUCT) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+
+        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+        const auto struct_data_type =
+                assert_cast<const DataTypeStruct*>(remove_nullable(table_data_type).get());
+
+        std::map<std::string, size_t> parquet_field_names;
+        for (size_t idx = 0; idx < file_field.children.size(); idx++) {
+            parquet_field_names.emplace(to_lower(file_field.children[idx].name), idx);
+        }
+        for (size_t idx = 0; idx < struct_data_type->get_elements().size(); idx++) {
+            const auto& doris_field_name = struct_data_type->get_element_name(idx);
+
+            if (parquet_field_names.contains(doris_field_name)) {
+                auto parquet_field_idx = parquet_field_names[doris_field_name];
+                std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+
+                RETURN_IF_ERROR(by_parquet_name(struct_data_type->get_element(idx),
+                                                file_field.children[parquet_field_idx],
+                                                field_node));
+                struct_node->add_children(doris_field_name,
+                                          file_field.children[parquet_field_idx].name, field_node);
+            } else {
+                struct_node->add_not_exist_children(doris_field_name);
+            }
+        }
+        node = struct_node;
+        break;
+    }
+    default: {
+        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
+        break;
+    }
+    }
+
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
+        const TupleDescriptor* table_tuple_descriptor, const orc::Type* orc_type_ptr,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+        const std::set<TSlotId>* is_file_slot) {
+    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+    std::map<std::string, uint64_t> file_column_name_idx_map;
+    for (uint64_t idx = 0; idx < orc_type_ptr->getSubtypeCount(); idx++) {
+        // to_lower for match table column name.
+        file_column_name_idx_map.emplace(to_lower(orc_type_ptr->getFieldName(idx)), idx);
+    }
+
+    for (const auto& slot : table_tuple_descriptor->slots()) {
+        const auto& table_column_name = slot->col_name();
+        if ((is_file_slot == nullptr || is_file_slot->contains(slot->id())) &&
+            file_column_name_idx_map.contains(table_column_name)) {
+            auto file_column_idx = file_column_name_idx_map[table_column_name];
+            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+            RETURN_IF_ERROR(by_orc_name(slot->type(), orc_type_ptr->getSubtype(file_column_idx),
+                                        field_node));
+            struct_node->add_children(table_column_name,
+                                      orc_type_ptr->getFieldName(file_column_idx), field_node);
+        } else {
+            struct_node->add_not_exist_children(table_column_name);
+        }
+    }
+    node = struct_node;
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_name(
+        const DataTypePtr& table_data_type, const orc::Type* orc_root,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
+    switch (table_data_type->get_primitive_type()) {
+    case TYPE_MAP: {
+        if (orc_root->getKind() != orc::TypeKind::MAP) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 2));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
+        const auto& key_type = assert_cast<const DataTypePtr&>(
+                assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
+                        ->get_key_type());
+        RETURN_IF_ERROR(by_orc_name(key_type, orc_root->getSubtype(0), key_node));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
+        const auto& value_type = assert_cast<const DataTypePtr&>(
+                assert_cast<const DataTypeMap*>(remove_nullable(table_data_type).get())
+                        ->get_value_type());
+        RETURN_IF_ERROR(by_orc_name(value_type, orc_root->getSubtype(1), value_node));
+        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
+
+        break;
+    }
+    case TYPE_ARRAY: {
+        if (orc_root->getKind() != orc::TypeKind::LIST) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 1));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
+        const auto& element_type = assert_cast<const DataTypePtr&>(
+                assert_cast<const DataTypeArray*>(remove_nullable(table_data_type).get())
+                        ->get_nested_type());
+
+        RETURN_IF_ERROR(by_orc_name(element_type, orc_root->getSubtype(0), element_node));
+        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
+        break;
+    }
+    case TYPE_STRUCT: {
+        if (orc_root->getKind() != orc::TypeKind::STRUCT) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+        const auto struct_data_type =
+                assert_cast<const DataTypeStruct*>(remove_nullable(table_data_type).get());
+        std::map<std::string, uint64_t> orc_field_names;
+        for (uint64_t idx = 0; idx < orc_root->getSubtypeCount(); idx++) {
+            orc_field_names.emplace(to_lower(orc_root->getFieldName(idx)), idx);
+        }
+
+        for (size_t idx = 0; idx < struct_data_type->get_elements().size(); idx++) {
+            const auto& doris_field_name = struct_data_type->get_element_name(idx);
+
+            if (orc_field_names.contains(doris_field_name)) {
+                std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+
+                auto orc_field_idx = orc_field_names[doris_field_name];
+                RETURN_IF_ERROR(by_orc_name(struct_data_type->get_element(idx),
+                                            orc_root->getSubtype(orc_field_idx), field_node));
+                struct_node->add_children(doris_field_name, orc_root->getFieldName(orc_field_idx),
+                                          field_node);
+            } else {
+                struct_node->add_not_exist_children(doris_field_name);
+            }
+        }
+        node = struct_node;
+        break;
+    }
+    default: {
+        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
+        break;
+    }
+    }
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_table_field_id(
+        const schema::external::TField table_schema, const schema::external::TField file_schema,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
+    switch (table_schema.type.type) {
+    case TPrimitiveType::MAP: {
+        if (file_schema.type.type != TPrimitiveType::MAP) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.map_field));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.__isset.key_field));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.__isset.value_field));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.key_field.field_ptr != nullptr));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.map_field.value_field.field_ptr != nullptr));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
+        RETURN_IF_ERROR(by_table_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
+                                          *file_schema.nestedField.map_field.key_field.field_ptr,
+                                          key_node));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
+        RETURN_IF_ERROR(by_table_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
+                                          *file_schema.nestedField.map_field.value_field.field_ptr,
+                                          value_node));
+
+        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
+        break;
+    }
+    case TPrimitiveType::ARRAY: {
+        if (file_schema.type.type != TPrimitiveType::ARRAY) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.array_field));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.array_field.__isset.item_field));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.array_field.item_field.field_ptr != nullptr));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> item_node = nullptr;
+        RETURN_IF_ERROR(by_table_field_id(
+                *table_schema.nestedField.array_field.item_field.field_ptr,
+                *file_schema.nestedField.array_field.item_field.field_ptr, item_node));
+
+        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(item_node);
+        break;
+    }
+    case TPrimitiveType::STRUCT: {
+        if (file_schema.type.type != TPrimitiveType::STRUCT) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
+
+        MOCK_REMOVE(DCHECK(file_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(file_schema.nestedField.__isset.struct_field));
+
+        RETURN_IF_ERROR(by_table_field_id(table_schema.nestedField.struct_field,
+                                          file_schema.nestedField.struct_field, node));
+        break;
+    }
+    default: {
+        node = std::make_shared<TableSchemaChangeHelper::ScalarNode>();
+        break;
+    }
+    }
+
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_table_field_id(
+        const schema::external::TStructField& table_schema,
+        const schema::external::TStructField& file_schema,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node) {
+    std::map<int32_t, size_t> file_field_id_to_idx;
+    for (size_t idx = 0; idx < file_schema.fields.size(); ++idx) {
+        file_field_id_to_idx.emplace(file_schema.fields[idx].field_ptr->id, idx);
+    }
+    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+    for (const auto& table_field : table_schema.fields) {
+        const auto& table_column_name = table_field.field_ptr->name;
+
+        if (file_field_id_to_idx.contains(table_field.field_ptr->id)) {
+            const auto& file_field =
+                    file_schema.fields.at(file_field_id_to_idx[table_field.field_ptr->id]);
+
+            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+            RETURN_IF_ERROR(
+                    by_table_field_id(*table_field.field_ptr, *file_field.field_ptr, field_node));
+
+            struct_node->add_children(table_column_name, file_field.field_ptr->name, field_node);
+        } else {
+            struct_node->add_not_exist_children(table_column_name);
+        }
+    }
+    node = std::move(struct_node);
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_field_id(
+        const schema::external::TStructField& table_schema,
+        const FieldDescriptor& parquet_field_desc,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node, bool& exist_field_id) {
+    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+    auto parquet_fields_schema = parquet_field_desc.get_fields_schema();
+    std::map<int32_t, size_t> file_column_id_idx_map;
+    for (size_t idx = 0; idx < parquet_fields_schema.size(); idx++) {
+        if (parquet_fields_schema[idx].field_id == -1) {
+            exist_field_id = false;
+            return Status::OK();
+        } else {
+            file_column_id_idx_map.emplace(parquet_fields_schema[idx].field_id, idx);
+        }
+    }
+
+    for (const auto& table_field : table_schema.fields) {
+        const auto& table_column_name = table_field.field_ptr->name;
+
+        if (file_column_id_idx_map.contains(table_field.field_ptr->id)) {
+            auto file_column_idx = file_column_id_idx_map[table_field.field_ptr->id];
+            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+            RETURN_IF_ERROR(by_parquet_field_id(*table_field.field_ptr,
+                                                parquet_fields_schema[file_column_idx], field_node,
+                                                exist_field_id));
+            struct_node->add_children(table_column_name,
+                                      parquet_fields_schema[file_column_idx].name, field_node);
+        } else {
+            struct_node->add_not_exist_children(table_column_name);
+        }
+    }
+
+    node = struct_node;
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_field_id(
+        const schema::external::TField& table_schema, const FieldSchema& parquet_field,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node, bool& exist_field_id) {
+    switch (table_schema.type.type) {
+    case TPrimitiveType::MAP: {
+        if (parquet_field.data_type->get_primitive_type() != TYPE_MAP) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(parquet_field.children.size() == 2));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
+        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
+
+        RETURN_IF_ERROR(by_parquet_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
+                                            parquet_field.children[0], key_node, exist_field_id));
+
+        RETURN_IF_ERROR(
+                by_parquet_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
+                                    parquet_field.children[1], value_node, exist_field_id));
+
+        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
+        break;
+    }
+    case TPrimitiveType::ARRAY: {
+        if (parquet_field.data_type->get_primitive_type() != TYPE_ARRAY) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(parquet_field.children.size() == 1));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
+        RETURN_IF_ERROR(
+                by_parquet_field_id(*table_schema.nestedField.array_field.item_field.field_ptr,
+                                    parquet_field.children[0], element_node, exist_field_id));
+
+        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
+        break;
+    }
+    case TPrimitiveType::STRUCT: {
+        if (parquet_field.data_type->get_primitive_type() != TYPE_STRUCT) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
+
+        auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+        std::map<int32_t, size_t> file_column_id_idx_map;
+        for (size_t idx = 0; idx < parquet_field.children.size(); idx++) {
+            if (parquet_field.children[idx].field_id == -1) {
+                exist_field_id = false;
+                return Status::OK();
+            } else {
+                file_column_id_idx_map.emplace(parquet_field.children[idx].field_id, idx);
+            }
+        }
+
+        for (const auto& table_field : table_schema.nestedField.struct_field.fields) {
+            const auto& table_column_name = table_field.field_ptr->name;
+            if (file_column_id_idx_map.contains(table_field.field_ptr->id)) {
+                const auto& file_field = parquet_field.children.at(
+                        file_column_id_idx_map[table_field.field_ptr->id]);
+                std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+                RETURN_IF_ERROR(by_parquet_field_id(*table_field.field_ptr, file_field, field_node,
+                                                    exist_field_id));
+                struct_node->add_children(table_column_name, file_field.name, field_node);
+            } else {
+                struct_node->add_not_exist_children(table_column_name);
+            }
+        }
+        node = struct_node;
+        break;
+    }
+    default: {
+        node = std::make_shared<ScalarNode>();
+        break;
+    }
+    }
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_field_id(
+        const schema::external::TStructField& table_schema, const orc::Type* orc_root,
+        const std::string& field_id_attribute_key,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node, bool& exist_field_id) {
+    auto struct_node = std::make_shared<TableSchemaChangeHelper::StructNode>();
+
+    std::map<int32_t, size_t> file_column_id_idx_map;
+    for (size_t idx = 0; idx < orc_root->getSubtypeCount(); idx++) {
+        if (orc_root->getSubtype(idx)->hasAttributeKey(field_id_attribute_key)) {
+            auto field_id =
+                    std::stoi(orc_root->getSubtype(idx)->getAttributeValue(field_id_attribute_key));
+            file_column_id_idx_map.emplace(field_id, idx);
+        } else {
+            exist_field_id = false;
+            return Status::OK();
+        }
+    }
+
+    for (const auto& table_field : table_schema.fields) {
+        const auto& table_column_name = table_field.field_ptr->name;
+        if (file_column_id_idx_map.contains(table_field.field_ptr->id)) {
+            auto file_field_idx = file_column_id_idx_map[table_field.field_ptr->id];
+            const auto& file_field = orc_root->getSubtype(file_field_idx);
+            std::shared_ptr<TableSchemaChangeHelper::Node> field_node = nullptr;
+            RETURN_IF_ERROR(by_orc_field_id(*table_field.field_ptr, file_field,
+                                            field_id_attribute_key, field_node, exist_field_id));
+            struct_node->add_children(table_column_name, orc_root->getFieldName(file_field_idx),
+                                      field_node);
+        } else {
+            struct_node->add_not_exist_children(table_column_name);
+        }
+    }
+    node = struct_node;
+    return Status::OK();
+}
+
+Status TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_field_id(
+        const schema::external::TField& table_schema, const orc::Type* orc_root,
+        const std::string& field_id_attribute_key,
+        std::shared_ptr<TableSchemaChangeHelper::Node>& node, bool& exist_field_id) {
+    switch (table_schema.type.type) {
+    case TPrimitiveType::MAP: {
+        if (orc_root->getKind() != orc::TypeKind::MAP) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.map_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.key_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.__isset.value_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.key_field.field_ptr != nullptr));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.map_field.value_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 2));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> key_node = nullptr;
+        std::shared_ptr<TableSchemaChangeHelper::Node> value_node = nullptr;
+
+        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.map_field.key_field.field_ptr,
+                                        orc_root->getSubtype(0), field_id_attribute_key, key_node,
+                                        exist_field_id));
+
+        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.map_field.value_field.field_ptr,
+                                        orc_root->getSubtype(1), field_id_attribute_key, value_node,
+                                        exist_field_id));
+
+        node = std::make_shared<TableSchemaChangeHelper::MapNode>(key_node, value_node);
+        break;
+    }
+    case TPrimitiveType::ARRAY: {
+        if (orc_root->getKind() != orc::TypeKind::LIST) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.array_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.__isset.item_field));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.array_field.item_field.field_ptr != nullptr));
+
+        MOCK_REMOVE(DCHECK(orc_root->getSubtypeCount() == 1));
+
+        std::shared_ptr<TableSchemaChangeHelper::Node> element_node = nullptr;
+        RETURN_IF_ERROR(by_orc_field_id(*table_schema.nestedField.array_field.item_field.field_ptr,
+                                        orc_root->getSubtype(0), field_id_attribute_key,
+                                        element_node, exist_field_id));
+
+        node = std::make_shared<TableSchemaChangeHelper::ArrayNode>(element_node);
+        break;
+    }
+    case TPrimitiveType::STRUCT: {
+        if (orc_root->getKind() != orc::TypeKind::STRUCT) [[unlikely]] {
+            return SCHEMA_ERROR;
+        }
+        MOCK_REMOVE(DCHECK(table_schema.__isset.nestedField));
+        MOCK_REMOVE(DCHECK(table_schema.nestedField.__isset.struct_field));
+        RETURN_IF_ERROR(by_orc_field_id(table_schema.nestedField.struct_field, orc_root,
+                                        field_id_attribute_key, node, exist_field_id));
+
+        break;
+    }
+    default: {
+        node = std::make_shared<ScalarNode>();
+        break;
+    }
+    }
+
+    return Status::OK();
+}
+
+std::string TableSchemaChangeHelper::debug(const std::shared_ptr<Node>& root, size_t level) {
+    std::string ans;
+
+    auto indent = [](size_t level) { return std::string(level * 2, ' '); };
+
+    std::string prefix = indent(level);
+
+    if (std::dynamic_pointer_cast<ScalarNode>(root)) {
+        ans += prefix + "ScalarNode\n";
+    } else if (auto struct_node = std::dynamic_pointer_cast<StructNode>(root)) {
+        ans += prefix + "StructNode\n";
+        for (const auto& [table_col_name, value] : struct_node->get_children()) {
+            ans += indent(level + 1) + table_col_name;
+            if (value.exists) {
+                ans += " (file: " + value.column_name + ")";
+            } else {
+                ans += " (not exists)";
+            }
+            ans += "\n";
+            if (value.node) {
+                ans += debug(value.node, level + 2);
+            }
+        }
+    } else if (auto array_node = std::dynamic_pointer_cast<ArrayNode>(root)) {
+        ans += prefix + "ArrayNode\n";
+        ans += indent(level + 1) + "Element:\n";
+        ans += debug(array_node->get_element_node(), level + 2);
+    } else if (auto map_node = std::dynamic_pointer_cast<MapNode>(root)) {
+        ans += prefix + "MapNode\n";
+        ans += indent(level + 1) + "Key:\n";
+        ans += debug(map_node->get_key_node(), level + 2);
+        ans += indent(level + 1) + "Value:\n";
+        ans += debug(map_node->get_value_node(), level + 2);
+    } else if (std::dynamic_pointer_cast<ConstNode>(root)) {
+        ans += prefix + "ConstNode\n";
+    } else {
+        ans += prefix + "UnknownNodeType\n";
+    }
+
+    return ans;
+}
+#include "common/compile_check_end.h"
+} // namespace doris
diff --git a/be/src/format/table/table_schema_change_helper.h b/be/src/format/table/table_schema_change_helper.h
new file mode 100644
index 00000000000000..4e3425b676b0d4
--- /dev/null
+++ b/be/src/format/table/table_schema_change_helper.h
@@ -0,0 +1,350 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_struct.h"
+#include "format/parquet/schema_desc.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/olap_common.h"
+#include "util/string_util.h"
+
+namespace doris {
+class TFileRangeDesc;
+class Block;
+} // namespace doris
+
+namespace doris {
+#include "common/compile_check_begin.h"
+
+class TableSchemaChangeHelper {
+public:
+    ~TableSchemaChangeHelper() = default;
+
+    class Node {
+    public:
+        virtual ~Node() = default;
+        virtual std::shared_ptr<Node> get_children_node(std::string table_column_name) const {
+            throw std::logic_error("get_children_node should not be called on base TableInfoNode");
+        };
+
+        virtual std::shared_ptr<Node> get_children_node_by_file_column_name(
+                std::string file_column_name) const {
+            throw std::logic_error(
+                    "get_children_node_by_file_column_name should not be called on base "
+                    "TableInfoNode");
+        };
+
+        virtual std::string children_file_column_name(std::string table_column_name) const {
+            throw std::logic_error(
+                    "children_file_column_name should not be called on base TableInfoNode");
+        }
+
+        virtual bool children_column_exists(std::string table_column_name) const {
+            throw std::logic_error(
+                    "children_column_exists should not be called on base TableInfoNode");
+        }
+
+        virtual std::shared_ptr<Node> get_element_node() const {
+            throw std::logic_error("get_element_node should not be called on base TableInfoNode");
+        }
+
+        virtual std::shared_ptr<Node> get_key_node() const {
+            throw std::logic_error("get_key_node should not be called on base TableInfoNode");
+        }
+        virtual std::shared_ptr<Node> get_value_node() const {
+            throw std::logic_error("get_value_node should not be called on base TableInfoNode");
+        }
+
+        virtual void add_not_exist_children(std::string table_column_name) {
+            throw std::logic_error(
+                    "add_not_exist_children should not be called on base TableInfoNode");
+        };
+
+        virtual void add_children(std::string table_column_name, std::string file_column_name,
+                                  std::shared_ptr<Node> children_node) {
+            throw std::logic_error("add_children should not be called on base TableInfoNode");
+        }
+    };
+
+    class ConstNode : public Node {
+        // If you can be sure that there has been no schema change between the table and the file,
+        // you can use constNode (of course, you need to pay attention to case sensitivity).
+    public:
+        std::shared_ptr<Node> get_children_node(std::string table_column_name) const override {
+            return get_instance();
+        };
+
+        std::shared_ptr<Node> get_children_node_by_file_column_name(
+                std::string file_column_name) const override {
+            return get_instance();
+        };
+
+        std::string children_file_column_name(std::string table_column_name) const override {
+            return table_column_name;
+        }
+
+        bool children_column_exists(std::string table_column_name) const override { return true; }
+
+        std::shared_ptr<Node> get_element_node() const override { return get_instance(); }
+
+        std::shared_ptr<Node> get_key_node() const override { return get_instance(); }
+
+        std::shared_ptr<Node> get_value_node() const override { return get_instance(); }
+
+        static const std::shared_ptr<ConstNode>& get_instance() {
+            static const std::shared_ptr<ConstNode> instance = std::make_shared<ConstNode>();
+            return instance;
+        }
+    };
+
+    // ScalarNode inherits from ConstNode so that unexpected calls to
+    // get_element_node / get_key_node / get_value_node (e.g. on schema
+    // mismatch where the file has a complex type but the table has a
+    // scalar) are handled safely instead of crashing.
+    class ScalarNode : public ConstNode {};
+
+    class StructNode : public Node {
+        struct StructChild {
+            const std::shared_ptr<Node> node;
+            const std::string column_name;
+            const bool exists;
+        };
+
+        // table column name -> { node, file_column_name, exists_in_file}
+        std::map<std::string, StructChild> children;
+
+    public:
+        std::shared_ptr<Node> get_children_node(std::string table_column_name) const override {
+            DCHECK(children.contains(table_column_name));
+            DCHECK(children_column_exists(table_column_name));
+            return children.at(table_column_name).node;
+        }
+
+        std::shared_ptr<Node> get_children_node_by_file_column_name(
+                std::string file_column_name) const override {
+            // Search for the child by file column name
+            for (const auto& [table_name, child] : children) {
+                if (child.exists && child.column_name == file_column_name) {
+                    return child.node;
+                }
+            }
+            // Not found - throw or return nullptr
+            throw std::runtime_error("File column name '" + file_column_name +
+                                     "' not found in struct children");
+        }
+
+        std::string children_file_column_name(std::string table_column_name) const override {
+            DCHECK(children.contains(table_column_name));
+            DCHECK(children_column_exists(table_column_name));
+            return children.at(table_column_name).column_name;
+        }
+
+        bool children_column_exists(std::string table_column_name) const override {
+            DCHECK(children.contains(table_column_name));
+            return children.at(table_column_name).exists;
+        }
+
+        void add_not_exist_children(std::string table_column_name) override {
+            children.emplace(table_column_name, StructChild {nullptr, "", false});
+        }
+
+        void add_children(std::string table_column_name, std::string file_column_name,
+                          std::shared_ptr<Node> children_node) override {
+            children.emplace(table_column_name,
+                             StructChild {children_node, file_column_name, true});
+        }
+
+        const std::map<std::string, StructChild>& get_children() const { return children; }
+    };
+
+    class ArrayNode : public Node {
+        std::shared_ptr<Node> _element_node;
+
+    public:
+        ArrayNode(const std::shared_ptr<Node>& element_node) : _element_node(element_node) {}
+
+        std::shared_ptr<Node> get_element_node() const override { return _element_node; }
+    };
+
+    class MapNode : public Node {
+        std::shared_ptr<Node> _key_node;
+        std::shared_ptr<Node> _value_node;
+
+    public:
+        MapNode(const std::shared_ptr<Node>& key_node, const std::shared_ptr<Node>& value_node)
+                : _key_node(key_node), _value_node(value_node) {}
+
+        std::shared_ptr<Node> get_key_node() const override { return _key_node; }
+
+        std::shared_ptr<Node> get_value_node() const override { return _value_node; }
+    };
+
+    static std::string debug(const std::shared_ptr<Node>& root, size_t level = 0);
+
+protected:
+    // Whenever external components invoke the Parquet/ORC reader (e.g., init_reader, get_next_block, set_fill_columns),
+    // the parameters passed in are based on `table column names`.
+    // The table_info_node_ptr assists the Parquet/ORC reader in mapping these to the actual
+    // `file columns name` to be read and enables min/max filtering.
+    std::shared_ptr<Node> table_info_node_ptr = std::make_shared<StructNode>();
+
+protected:
+    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
+                                           int64_t split_schema_id,
+                                           const TupleDescriptor* tuple_descriptor,
+                                           const FieldDescriptor& parquet_field_desc) {
+        if (!params.__isset.history_schema_info) [[unlikely]] {
+            RETURN_IF_ERROR(BuildTableInfoUtil::by_parquet_name(
+                    tuple_descriptor, parquet_field_desc, table_info_node_ptr));
+            return Status::OK();
+        }
+        return gen_table_info_node_by_field_id(params, split_schema_id);
+    }
+
+    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
+                                           int64_t split_schema_id,
+                                           const TupleDescriptor* tuple_descriptor,
+                                           const orc::Type* orc_type_ptr) {
+        if (!params.__isset.history_schema_info) [[unlikely]] {
+            RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(tuple_descriptor, orc_type_ptr,
+                                                            table_info_node_ptr));
+            return Status::OK();
+        }
+        return gen_table_info_node_by_field_id(params, split_schema_id);
+    }
+
+private:
+    // The filed id of both the table and the file come from the pass from fe. (params.history_schema_info)
+    Status gen_table_info_node_by_field_id(const TFileScanRangeParams& params,
+                                           int64_t split_schema_id) {
+        if (params.current_schema_id == split_schema_id) {
+            table_info_node_ptr = ConstNode::get_instance();
+            return Status::OK();
+        }
+
+        int32_t table_schema_idx = -1;
+        int32_t file_schema_idx = -1;
+        //todo : Perhaps this process can be optimized by pre-generating a map
+        for (int32_t idx = 0; idx < params.history_schema_info.size(); idx++) {
+            if (params.history_schema_info[idx].schema_id == params.current_schema_id) {
+                table_schema_idx = idx;
+            } else if (params.history_schema_info[idx].schema_id == split_schema_id) {
+                file_schema_idx = idx;
+            }
+        }
+
+        if (table_schema_idx == -1 || file_schema_idx == -1) [[unlikely]] {
+            return Status::InternalError(
+                    "miss table/file schema info, table_schema_idx:{}  file_schema_idx:{}",
+                    table_schema_idx, file_schema_idx);
+        }
+        RETURN_IF_ERROR(BuildTableInfoUtil::by_table_field_id(
+                params.history_schema_info.at(table_schema_idx).root_field,
+                params.history_schema_info.at(file_schema_idx).root_field, table_info_node_ptr));
+        return Status::OK();
+    }
+
+public:
+    /* Schema change Util. Used to generate `std::shared_ptr<TableSchemaChangeHelper::Node> node`.
+        Passed node to parquet/orc reader to find file columns based on table columns,
+    */
+    struct BuildTableInfoUtil {
+        static const Status SCHEMA_ERROR;
+
+        // todo : Maybe I can use templates to implement this functionality.
+
+        // for hive parquet : The table column names passed from fe are lowercase, so use lowercase file column names to match table column names.
+        static Status by_parquet_name(const TupleDescriptor* table_tuple_descriptor,
+                                      const FieldDescriptor& parquet_field_desc,
+                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                      const std::set<TSlotId>* is_file_slot = nullptr);
+
+        // for hive parquet
+        static Status by_parquet_name(const DataTypePtr& table_data_type,
+                                      const FieldSchema& file_field,
+                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node);
+
+        // for hive orc: The table column names passed from fe are lowercase, so use lowercase file column names to match table column names.
+        static Status by_orc_name(const TupleDescriptor* table_tuple_descriptor,
+                                  const orc::Type* orc_type_ptr,
+                                  std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                  const std::set<TSlotId>* is_file_slot = nullptr);
+        // for hive orc
+        static Status by_orc_name(const DataTypePtr& table_data_type, const orc::Type* orc_root,
+                                  std::shared_ptr<TableSchemaChangeHelper::Node>& node);
+
+        // for paimon hudi: Use the field id in the `table schema` and `history table schema` to match columns.
+        static Status by_table_field_id(const schema::external::TField table_schema,
+                                        const schema::external::TField file_schema,
+                                        std::shared_ptr<TableSchemaChangeHelper::Node>& node);
+
+        // for paimon hudi
+        static Status by_table_field_id(const schema::external::TStructField& table_schema,
+                                        const schema::external::TStructField& file_schema,
+                                        std::shared_ptr<TableSchemaChangeHelper::Node>& node);
+
+        //for iceberg parquet: Use the field id in the `table schema` and the parquet file to match columns.
+        static Status by_parquet_field_id(const schema::external::TStructField& table_schema,
+                                          const FieldDescriptor& parquet_field_desc,
+                                          std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                          bool& exist_field_id);
+
+        // for iceberg parquet
+        static Status by_parquet_field_id(const schema::external::TField& table_schema,
+                                          const FieldSchema& parquet_field,
+                                          std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                          bool& exist_field_id);
+
+        // for iceberg orc : Use the field id in the `table schema` and the orc file to match columns.
+        static Status by_orc_field_id(const schema::external::TStructField& table_schema,
+                                      const orc::Type* orc_root,
+                                      const std::string& field_id_attribute_key,
+                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                      bool& exist_field_id);
+
+        // for iceberg orc
+        static Status by_orc_field_id(const schema::external::TField& table_schema,
+                                      const orc::Type* orc_root,
+                                      const std::string& field_id_attribute_key,
+                                      std::shared_ptr<TableSchemaChangeHelper::Node>& node,
+                                      bool& exist_field_id);
+    };
+};
+
+struct ColumnIdResult {
+    std::set<uint64_t> column_ids;
+    std::set<uint64_t> filter_column_ids;
+
+    ColumnIdResult() = default; // Add default constructor
+
+    ColumnIdResult(std::set<uint64_t> column_ids_, std::set<uint64_t> filter_column_ids_)
+            : column_ids(std::move(column_ids_)),
+              filter_column_ids(std::move(filter_column_ids_)) {}
+};
+
+#include "common/compile_check_end.h"
+} // namespace doris
diff --git a/be/src/format/table/transactional_hive_common.h b/be/src/format/table/transactional_hive_common.h
index 4ec08c3254e3bb..f2cba0c660764c 100644
--- a/be/src/format/table/transactional_hive_common.h
+++ b/be/src/format/table/transactional_hive_common.h
@@ -17,11 +17,14 @@
 
 #pragma once
 
+#include <cstddef>
+#include <cstdint>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
 #include "core/data_type/define_primitive_type.h"
+#include "exec/common/hash_table/phmap_fwd_decl.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
@@ -55,5 +58,48 @@ struct TransactionalHive {
 
     static const std::unordered_map<std::string, uint32_t> DELETE_COL_NAME_TO_BLOCK_IDX;
 };
+
+// ACID row identifier for transactional Hive tables, used for delete row matching.
+// Placed here (not in TransactionalHiveReader) to avoid circular dependency with OrcReader.
+struct AcidRowID {
+    int64_t original_transaction;
+    int64_t bucket;
+    int64_t row_id;
+
+    struct Hash {
+        size_t operator()(const AcidRowID& transactional_row_id) const {
+            size_t hash_value = 0;
+            hash_value ^= std::hash<int64_t> {}(transactional_row_id.original_transaction) +
+                          0x9e3779b9 + (hash_value << 6) + (hash_value >> 2);
+            hash_value ^= std::hash<int64_t> {}(transactional_row_id.bucket) + 0x9e3779b9 +
+                          (hash_value << 6) + (hash_value >> 2);
+            hash_value ^= std::hash<int64_t> {}(transactional_row_id.row_id) + 0x9e3779b9 +
+                          (hash_value << 6) + (hash_value >> 2);
+            return hash_value;
+        }
+    };
+
+    struct Eq {
+        bool operator()(const AcidRowID& lhs, const AcidRowID& rhs) const {
+            return lhs.original_transaction == rhs.original_transaction &&
+                   lhs.bucket == rhs.bucket && lhs.row_id == rhs.row_id;
+        }
+    };
+};
+
+using AcidRowIDSet = flat_hash_set<AcidRowID, AcidRowID::Hash, AcidRowID::Eq>;
+
+inline bool operator<(const AcidRowID& lhs, const AcidRowID& rhs) {
+    if (lhs.original_transaction != rhs.original_transaction) {
+        return lhs.original_transaction < rhs.original_transaction;
+    } else if (lhs.bucket != rhs.bucket) {
+        return lhs.bucket < rhs.bucket;
+    } else if (lhs.row_id != rhs.row_id) {
+        return lhs.row_id < rhs.row_id;
+    } else {
+        return false;
+    }
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/transactional_hive_reader.cpp b/be/src/format/table/transactional_hive_reader.cpp
index de6227977734df..a9cca89baca6b6 100644
--- a/be/src/format/table/transactional_hive_reader.cpp
+++ b/be/src/format/table/transactional_hive_reader.cpp
@@ -21,8 +21,8 @@
 
 #include "core/data_type/data_type_factory.hpp"
 #include "format/orc/vorc_reader.h"
-#include "format/table/table_format_reader.h"
-#include "format/table/transactional_hive_common.h"
+#include "format/table/table_schema_change_helper.h"
+#include "transactional_hive_common.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
@@ -35,49 +35,57 @@ class VExprContext;
 
 namespace doris {
 
-TransactionalHiveReader::TransactionalHiveReader(std::unique_ptr<GenericReader> file_format_reader,
-                                                 RuntimeProfile* profile, RuntimeState* state,
+TransactionalHiveReader::TransactionalHiveReader(RuntimeProfile* profile, RuntimeState* state,
                                                  const TFileScanRangeParams& params,
-                                                 const TFileRangeDesc& range, io::IOContext* io_ctx,
+                                                 const TFileRangeDesc& range, size_t batch_size,
+                                                 const std::string& ctz, io::IOContext* io_ctx,
                                                  FileMetaCache* meta_cache)
-        : TableFormatReader(std::move(file_format_reader), state, profile, params, range, io_ctx,
-                            meta_cache) {
+        : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache, false) {
     static const char* transactional_hive_profile = "TransactionalHiveProfile";
-    ADD_TIMER(_profile, transactional_hive_profile);
-    _transactional_orc_profile.num_delete_files =
-            ADD_CHILD_COUNTER(_profile, "NumDeleteFiles", TUnit::UNIT, transactional_hive_profile);
-    _transactional_orc_profile.num_delete_rows =
-            ADD_CHILD_COUNTER(_profile, "NumDeleteRows", TUnit::UNIT, transactional_hive_profile);
+    ADD_TIMER(get_profile(), transactional_hive_profile);
+    _transactional_orc_profile.num_delete_files = ADD_CHILD_COUNTER(
+            get_profile(), "NumDeleteFiles", TUnit::UNIT, transactional_hive_profile);
+    _transactional_orc_profile.num_delete_rows = ADD_CHILD_COUNTER(
+            get_profile(), "NumDeleteRows", TUnit::UNIT, transactional_hive_profile);
     _transactional_orc_profile.delete_files_read_time =
-            ADD_CHILD_TIMER(_profile, "DeleteFileReadTime", transactional_hive_profile);
+            ADD_CHILD_TIMER(get_profile(), "DeleteFileReadTime", transactional_hive_profile);
 }
 
-Status TransactionalHiveReader::init_reader(
-        const std::vector<std::string>& column_names,
-        std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-        const RowDescriptor* row_descriptor,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    _col_name_to_block_idx = col_name_to_block_idx;
-    auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
-    _col_names.insert(_col_names.end(), column_names.begin(), column_names.end());
+// ============================================================================
+// on_before_init_reader: ACID schema mapping
+// ============================================================================
+Status TransactionalHiveReader::on_before_init_reader(ReaderInitContext* ctx) {
+    _column_descs = ctx->column_descs;
+    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
+    RETURN_IF_ERROR(
+            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
+    for (auto& desc : *ctx->column_descs) {
+        if (desc.category == ColumnCategory::REGULAR ||
+            desc.category == ColumnCategory::GENERATED) {
+            _col_names.push_back(desc.name);
+        }
+    }
+
+    _is_acid = true;
+    // Add ACID column names (originalTransaction, bucket, rowId, etc.)
     _col_names.insert(_col_names.end(), TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
                       TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end());
+    ctx->column_names = _col_names;
 
-    // https://issues.apache.org/jira/browse/HIVE-15190
+    // Get ORC file type
     const orc::Type* orc_type_ptr = nullptr;
-    RETURN_IF_ERROR(orc_reader->get_file_type(&orc_type_ptr));
+    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
     const auto& orc_type = *orc_type_ptr;
 
+    // Add ACID metadata columns to table_info_node
     for (auto idx = 0; idx < TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.size(); idx++) {
         table_info_node_ptr->add_children(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE[idx],
                                           TransactionalHive::READ_ROW_COLUMN_NAMES[idx],
                                           std::make_shared<ScalarNode>());
     }
 
+    // https://issues.apache.org/jira/browse/HIVE-15190
     auto row_orc_type = orc_type.getSubtype(TransactionalHive::ROW_OFFSET);
-    // struct<operation:int,originalTransaction:bigint,bucket:int,rowId:bigint,currentTransaction:bigint,row:struct<id:int,name:string>>
     std::vector<std::string> row_names;
     std::map<std::string, uint64_t> row_names_map;
     for (uint64_t idx = 0; idx < row_orc_type->getSubtypeCount(); idx++) {
@@ -86,8 +94,8 @@ Status TransactionalHiveReader::init_reader(
         row_names_map.emplace(file_column_name, idx);
     }
 
-    // use name for match.
-    for (const auto& slot : tuple_descriptor->slots()) {
+    // Match table columns to file columns by name
+    for (const auto& slot : ctx->tuple_descriptor->slots()) {
         const auto& slot_name = slot->col_name();
 
         if (std::count(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
@@ -103,58 +111,36 @@ Status TransactionalHiveReader::init_reader(
                     "{}.{}", TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
                     slot_name);
             table_info_node_ptr->add_children(slot_name, file_column_name, child_node);
-
         } else {
             table_info_node_ptr->add_not_exist_children(slot_name);
         }
     }
-
-    Status status = orc_reader->init_reader(
-            &_col_names, col_name_to_block_idx, conjuncts, true, tuple_descriptor, row_descriptor,
-            not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts, table_info_node_ptr);
-    return status;
-}
-
-Status TransactionalHiveReader::get_next_block_inner(Block* block, size_t* read_rows, bool* eof) {
-    for (const auto& i : TransactionalHive::READ_PARAMS) {
-        DataTypePtr data_type = get_data_type_with_default_argument(
-                DataTypeFactory::instance().create_data_type(i.type, false));
-        MutableColumnPtr data_column = data_type->create_column();
-        (*_col_name_to_block_idx)[i.column_lower_case] = static_cast<uint32_t>(block->columns());
-        block->insert(
-                ColumnWithTypeAndName(std::move(data_column), data_type, i.column_lower_case));
-    }
-    auto res = _file_format_reader->get_next_block(block, read_rows, eof);
-    Block::erase_useless_column(block, block->columns() - TransactionalHive::READ_PARAMS.size());
-    for (const auto& i : TransactionalHive::READ_PARAMS) {
-        _col_name_to_block_idx->erase(i.column_lower_case);
-    }
-    return res;
+    ctx->table_info_node = table_info_node_ptr;
+    return Status::OK();
 }
 
-Status TransactionalHiveReader::init_row_filters() {
-    std::string data_file_path = _range.path;
-    // the path in _range is remove the namenode prefix,
+// ============================================================================
+// on_after_init_reader: read delete delta files
+// ============================================================================
+Status TransactionalHiveReader::on_after_init_reader(ReaderInitContext* /*ctx*/) {
+    std::string data_file_path = get_scan_range().path;
+    // the path in _range has the namenode prefix removed,
     // and the file_path in delete file is full path, so we should add it back.
-    if (_params.__isset.hdfs_params && _params.hdfs_params.__isset.fs_name) {
-        std::string fs_name = _params.hdfs_params.fs_name;
+    if (get_scan_params().__isset.hdfs_params && get_scan_params().hdfs_params.__isset.fs_name) {
+        std::string fs_name = get_scan_params().hdfs_params.fs_name;
         if (!starts_with(data_file_path, fs_name)) {
             data_file_path = fs_name + data_file_path;
         }
     }
 
-    auto* orc_reader = (OrcReader*)(_file_format_reader.get());
     std::vector<std::string> delete_file_col_names;
     int64_t num_delete_rows = 0;
     int64_t num_delete_files = 0;
     std::filesystem::path file_path(data_file_path);
 
-    //See https://github.com/apache/hive/commit/ffee30e6267e85f00a22767262192abb9681cfb7#diff-5fe26c36b4e029dcd344fc5d484e7347R165
     // bucket_xxx_attemptId => bucket_xxx
-    // bucket_xxx           => bucket_xxx
     auto remove_bucket_attemptId = [](const std::string& str) {
         re2::RE2 pattern("^bucket_\\d+_\\d+$");
-
         if (re2::RE2::FullMatch(str, pattern)) {
             size_t pos = str.rfind('_');
             if (pos != std::string::npos) {
@@ -166,10 +152,9 @@ Status TransactionalHiveReader::init_row_filters() {
 
     SCOPED_TIMER(_transactional_orc_profile.delete_files_read_time);
     for (const auto& delete_delta :
-         _range.table_format_params.transactional_hive_params.delete_deltas) {
+         get_scan_range().table_format_params.transactional_hive_params.delete_deltas) {
         const std::string file_name = file_path.filename().string();
 
-        //need opt.
         std::vector<std::string> delete_delta_file_names;
         for (const auto& x : delete_delta.file_names) {
             delete_delta_file_names.emplace_back(remove_bucket_attemptId(x));
@@ -184,15 +169,15 @@ Status TransactionalHiveReader::init_row_filters() {
                             delete_delta.file_names[iter - delete_delta_file_names.begin()]);
 
         TFileRangeDesc delete_range;
-        // must use __set() method to make sure __isset is true
-        delete_range.__set_fs_name(_range.fs_name);
+        delete_range.__set_fs_name(get_scan_range().fs_name);
         delete_range.path = delete_file;
         delete_range.start_offset = 0;
         delete_range.size = -1;
         delete_range.file_size = -1;
 
-        OrcReader delete_reader(_profile, _state, _params, delete_range, _MIN_BATCH_SIZE,
-                                _state->timezone(), _io_ctx, _meta_cache, false);
+        OrcReader delete_reader(get_profile(), get_state(), get_scan_params(), delete_range,
+                                256 /*batch_size*/, get_state()->timezone(), get_io_ctx(),
+                                _meta_cache, false);
 
         auto acid_info_node = std::make_shared<StructNode>();
         for (auto idx = 0; idx < TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.size();
@@ -204,16 +189,14 @@ Status TransactionalHiveReader::init_row_filters() {
                                          std::make_shared<ScalarNode>());
         }
 
-        RETURN_IF_ERROR(delete_reader.init_reader(
-                &TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE,
-                const_cast<std::unordered_map<std::string, uint32_t>*>(
-                        &TransactionalHive::DELETE_COL_NAME_TO_BLOCK_IDX),
-                {}, false, nullptr, nullptr, nullptr, nullptr, acid_info_node));
-
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        RETURN_IF_ERROR(delete_reader.set_fill_columns(partition_columns, missing_columns));
+        OrcInitContext delete_ctx;
+        delete_ctx.column_names.assign(
+                TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
+                TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.end());
+        delete_ctx.col_name_to_block_idx = const_cast<std::unordered_map<std::string, uint32_t>*>(
+                &TransactionalHive::DELETE_COL_NAME_TO_BLOCK_IDX);
+        delete_ctx.table_info_node = acid_info_node;
+        RETURN_IF_ERROR(delete_reader.init_reader(&delete_ctx));
 
         bool eof = false;
         while (!eof) {
@@ -247,7 +230,7 @@ Status TransactionalHiveReader::init_row_filters() {
                     Int64 bucket_id = bucket_id_column.get_int(i);
                     Int64 row_id = row_id_column.get_int(i);
                     AcidRowID delete_row_id = {original_transaction, bucket_id, row_id};
-                    _delete_rows.insert(delete_row_id);
+                    _acid_delete_rows.insert(delete_row_id);
                     ++num_delete_rows;
                 }
             }
@@ -255,12 +238,41 @@ Status TransactionalHiveReader::init_row_filters() {
         ++num_delete_files;
     }
     if (num_delete_rows > 0) {
-        orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
-        orc_reader->set_delete_rows(&_delete_rows);
+        set_push_down_agg_type(TPushAggOp::NONE);
+        lock_push_down_agg_type();
+        set_delete_rows(&_acid_delete_rows);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
     }
     return Status::OK();
 }
+
+// ============================================================================
+// on_before_read_block: expand ACID columns into block
+// ============================================================================
+Status TransactionalHiveReader::on_before_read_block(Block* block) {
+    for (const auto& i : TransactionalHive::READ_PARAMS) {
+        DataTypePtr data_type = get_data_type_with_default_argument(
+                DataTypeFactory::instance().create_data_type(i.type, false));
+        MutableColumnPtr data_column = data_type->create_column();
+        (*col_name_to_block_idx_ref())[i.column_lower_case] =
+                static_cast<uint32_t>(block->columns());
+        block->insert(
+                ColumnWithTypeAndName(std::move(data_column), data_type, i.column_lower_case));
+    }
+    return Status::OK();
+}
+
+// ============================================================================
+// on_after_read_block: shrink ACID columns from block
+// ============================================================================
+Status TransactionalHiveReader::on_after_read_block(Block* block, size_t* /*read_rows*/) {
+    Block::erase_useless_column(block, block->columns() - TransactionalHive::READ_PARAMS.size());
+    for (const auto& i : TransactionalHive::READ_PARAMS) {
+        col_name_to_block_idx_ref()->erase(i.column_lower_case);
+    }
+    return Status::OK();
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/transactional_hive_reader.h b/be/src/format/table/transactional_hive_reader.h
index 9c4603d4ce819c..02fd294621402e 100644
--- a/be/src/format/table/transactional_hive_reader.h
+++ b/be/src/format/table/transactional_hive_reader.h
@@ -21,19 +21,17 @@
 #include <cstdint>
 #include <string>
 #include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 #include "common/factory_creator.h"
 #include "common/status.h"
-#include "exec/common/hash_table/phmap_fwd_decl.h"
-#include "format/table/table_format_reader.h"
-#include "storage/olap_scan_common.h"
+#include "format/orc/vorc_reader.h"
+#include "format/table/table_schema_change_helper.h"
+#include "format/table/transactional_hive_common.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
 class RuntimeState;
-class SlotDescriptor;
 class TFileRangeDesc;
 class TFileScanRangeParams;
 
@@ -42,63 +40,34 @@ struct IOContext;
 } // namespace io
 
 class Block;
-class GenericReader;
 class ShardedKVCache;
 class VExprContext;
 
-class TransactionalHiveReader : public TableFormatReader, public TableSchemaChangeHelper {
+// TransactionalHiveReader: directly inherits OrcReader (no composition wrapping).
+// ACID column expansion/shrinking done via on_before_read_block/on_after_read_block hooks.
+// Delete delta reading done via on_after_init_reader hook.
+class TransactionalHiveReader final : public OrcReader, public TableSchemaChangeHelper {
     ENABLE_FACTORY_CREATOR(TransactionalHiveReader);
 
 public:
-    struct AcidRowID {
-        int64_t original_transaction;
-        int64_t bucket;
-        int64_t row_id;
-
-        struct Hash {
-            size_t operator()(const AcidRowID& transactional_row_id) const {
-                size_t hash_value = 0;
-                hash_value ^= std::hash<int64_t> {}(transactional_row_id.original_transaction) +
-                              0x9e3779b9 + (hash_value << 6) + (hash_value >> 2);
-                hash_value ^= std::hash<int64_t> {}(transactional_row_id.bucket) + 0x9e3779b9 +
-                              (hash_value << 6) + (hash_value >> 2);
-                hash_value ^= std::hash<int64_t> {}(transactional_row_id.row_id) + 0x9e3779b9 +
-                              (hash_value << 6) + (hash_value >> 2);
-                return hash_value;
-            }
-        };
-
-        struct Eq {
-            bool operator()(const AcidRowID& lhs, const AcidRowID& rhs) const {
-                return lhs.original_transaction == rhs.original_transaction &&
-                       lhs.bucket == rhs.bucket && lhs.row_id == rhs.row_id;
-            }
-        };
-    };
-
-    using AcidRowIDSet = flat_hash_set<AcidRowID, AcidRowID::Hash, AcidRowID::Eq>;
-
-    TransactionalHiveReader(std::unique_ptr<GenericReader> file_format_reader,
-                            RuntimeProfile* profile, RuntimeState* state,
+    TransactionalHiveReader(RuntimeProfile* profile, RuntimeState* state,
                             const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                            io::IOContext* io_ctx, FileMetaCache* meta_cache);
-    ~TransactionalHiveReader() override = default;
+                            size_t batch_size, const std::string& ctz, io::IOContext* io_ctx,
+                            FileMetaCache* meta_cache = nullptr);
+    ~TransactionalHiveReader() final = default;
 
-    Status init_row_filters() final;
+protected:
+    // Hook: ACID schema mapping (add transactional columns, map row.* fields)
+    Status on_before_init_reader(ReaderInitContext* ctx) override;
 
-    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
+    // Hook: read delete delta files
+    Status on_after_init_reader(ReaderInitContext* /*ctx*/) override;
 
-    bool has_delete_operations() const override {
-        return !_delete_rows.empty() || TableFormatReader::has_delete_operations();
-    }
+    // Hook: expand ACID columns into block before reading
+    Status on_before_read_block(Block* block) override;
 
-    Status init_reader(
-            const std::vector<std::string>& column_names,
-            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+    // Hook: shrink ACID columns from block after reading
+    Status on_after_read_block(Block* block, size_t* read_rows) override;
 
 private:
     struct TransactionalHiveProfile {
@@ -108,25 +77,9 @@ class TransactionalHiveReader : public TableFormatReader, public TableSchemaChan
     };
 
     TransactionalHiveProfile _transactional_orc_profile;
-    AcidRowIDSet _delete_rows;
-    std::unique_ptr<IColumn::Filter> _delete_rows_filter_ptr;
+    AcidRowIDSet _acid_delete_rows;
     std::vector<std::string> _col_names;
-    // Column name to block index map, passed from FileScanner
-    std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
 };
 
-inline bool operator<(const TransactionalHiveReader::AcidRowID& lhs,
-                      const TransactionalHiveReader::AcidRowID& rhs) {
-    if (lhs.original_transaction != rhs.original_transaction) {
-        return lhs.original_transaction < rhs.original_transaction;
-    } else if (lhs.bucket != rhs.bucket) {
-        return lhs.bucket < rhs.bucket;
-    } else if (lhs.row_id != rhs.row_id) {
-        return lhs.row_id < rhs.row_id;
-    } else {
-        return false;
-    }
-}
-
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/format/table/trino_connector_jni_reader.h b/be/src/format/table/trino_connector_jni_reader.h
index d571c5cd5866ed..5a2482ee9e29da 100644
--- a/be/src/format/table/trino_connector_jni_reader.h
+++ b/be/src/format/table/trino_connector_jni_reader.h
@@ -49,6 +49,9 @@ class TrinoConnectorJniReader : public JniReader {
 
     Status init_reader();
 
+protected:
+    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
+
 private:
     Status _set_spi_plugins_dir();
 };
diff --git a/be/src/load/delta_writer/push_handler.cpp b/be/src/load/delta_writer/push_handler.cpp
index 8996082adfcf38..cf32886f7b90ba 100644
--- a/be/src/load/delta_writer/push_handler.cpp
+++ b/be/src/load/delta_writer/push_handler.cpp
@@ -638,21 +638,24 @@ Status PushBrokerReader::_get_next_reader() {
                 _runtime_profile, _file_params, range, _runtime_state->query_options().batch_size,
                 &_runtime_state->timezone_obj(), _io_ctx.get(), _runtime_state.get());
 
-        init_status = parquet_reader->init_reader(
-                _all_col_names, &_col_name_to_block_idx, _push_down_exprs, _slot_id_to_predicates,
-                _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id,
-                &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts,
-                TableSchemaChangeHelper::ConstNode::get_instance(), false);
+        ParquetInitContext ctx;
+        ctx.column_names = _all_col_names;
+        ctx.col_name_to_block_idx = &_col_name_to_block_idx;
+        ctx.conjuncts = &_push_down_exprs;
+        ctx.slot_id_to_predicates = &_slot_id_to_predicates;
+        ctx.tuple_descriptor = _real_tuple_desc;
+        ctx.row_descriptor = _default_val_row_desc.get();
+        ctx.colname_to_slot_id = _col_name_to_slot_id;
+        ctx.not_single_slot_filter_conjuncts = &_not_single_slot_filter_conjuncts;
+        ctx.slot_id_to_filter_conjuncts = &_slot_id_to_filter_conjuncts;
+
+        init_status = parquet_reader->init_reader(&ctx);
         _cur_reader = std::move(parquet_reader);
         if (!init_status.ok()) {
             return Status::InternalError("failed to init reader for file {}, err: {}", range.path,
                                          init_status.to_string());
         }
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type, &_missing_cols));
-        RETURN_IF_ERROR(_cur_reader->set_fill_columns(partition_columns, missing_columns));
+        RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type));
         break;
     }
     default:
diff --git a/be/src/load/group_commit/wal/wal_reader.cpp b/be/src/load/group_commit/wal/wal_reader.cpp
index 610b27f9f8b545..1f2a45d262f8f0 100644
--- a/be/src/load/group_commit/wal/wal_reader.cpp
+++ b/be/src/load/group_commit/wal/wal_reader.cpp
@@ -40,7 +40,22 @@ Status WalReader::init_reader(const TupleDescriptor* tuple_descriptor) {
     return Status::OK();
 }
 
-Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
+// ---- Unified init_reader(ReaderInitContext*) overrides ----
+
+Status WalReader::_open_file_reader(ReaderInitContext* /*ctx*/) {
+    RETURN_IF_ERROR(_state->exec_env()->wal_mgr()->get_wal_path(_wal_id, _wal_path));
+    _wal_reader = std::make_shared<doris::WalFileReader>(_wal_path);
+    RETURN_IF_ERROR(_wal_reader->init());
+    return Status::OK();
+}
+
+Status WalReader::_do_init_reader(ReaderInitContext* base_ctx) {
+    auto* ctx = checked_context_cast<WalInitContext>(base_ctx);
+    _tuple_descriptor = ctx->output_tuple_descriptor;
+    return Status::OK();
+}
+
+Status WalReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) {
     //read src block
     PBlock pblock;
     auto st = _wal_reader->read_block(pblock);
@@ -97,11 +112,11 @@ Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     block->swap(dst_block);
     *read_rows = block->rows();
     VLOG_DEBUG << "read block rows:" << *read_rows;
+
     return Status::OK();
 }
 
-Status WalReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                              std::unordered_set<std::string>* missing_cols) {
+Status WalReader::_get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) {
     std::string col_ids;
     RETURN_IF_ERROR(_wal_reader->read_header(_version, col_ids));
     std::vector<std::string> column_id_vector =
diff --git a/be/src/load/group_commit/wal/wal_reader.h b/be/src/load/group_commit/wal/wal_reader.h
index 23579daff45343..db87397704d681 100644
--- a/be/src/load/group_commit/wal/wal_reader.h
+++ b/be/src/load/group_commit/wal/wal_reader.h
@@ -16,23 +16,29 @@
 // under the License.
 
 #pragma once
-#include "format/generic_reader.h"
+#include "format/table/table_format_reader.h"
 #include "load/group_commit/wal/wal_file_reader.h"
 #include "runtime/descriptors.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
 struct ScannerCounter;
-class WalReader : public GenericReader {
+
+/// WAL-specific initialization context.
+/// Extends ReaderInitContext with output tuple descriptor (unique to WAL reader).
+struct WalInitContext final : public ReaderInitContext {
+    const TupleDescriptor* output_tuple_descriptor = nullptr;
+};
+
+class WalReader : public TableFormatReader {
     ENABLE_FACTORY_CREATOR(WalReader);
 
 public:
     WalReader(RuntimeState* state);
     ~WalReader() override = default;
     Status init_reader(const TupleDescriptor* tuple_descriptor);
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
-    Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
 
     Status close() override {
         if (_wal_reader) {
@@ -41,6 +47,11 @@ class WalReader : public GenericReader {
         return Status::OK();
     }
 
+protected:
+    // ---- Unified init_reader(ReaderInitContext*) overrides ----
+    Status _open_file_reader(ReaderInitContext* ctx) override;
+    Status _do_init_reader(ReaderInitContext* ctx) override;
+
 private:
     RuntimeState* _state = nullptr;
     int64_t _wal_id;
diff --git a/be/test/format/condition_cache_test.cpp b/be/test/format/condition_cache_test.cpp
index 50c5e8c55031df..ae89a67210b625 100644
--- a/be/test/format/condition_cache_test.cpp
+++ b/be/test/format/condition_cache_test.cpp
@@ -25,8 +25,9 @@
 
 #include "common/status.h"
 #include "format/generic_reader.h"
+#include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
-#include "format/table/iceberg_reader.h"
+#include "format/table/transactional_hive_common.h"
 
 namespace doris::vectorized {
 
@@ -365,42 +366,13 @@ TEST_F(CachePreAllocTest, ExtraElementDoesNotCauseIncorrectFiltering) {
 // ============================================================
 
 // GenericReader whose has_delete_operations() result is configurable,
-// used as the inner file-format reader for table-format readers.
+// used to test condition cache skip logic for various delete scenarios.
 class MockFileFormatReader : public GenericReader {
 public:
     bool mock_has_deletes = false;
-    Status get_next_block(Block*, size_t*, bool*) override { return Status::OK(); }
+    Status _do_get_next_block(Block*, size_t*, bool*) override { return Status::OK(); }
     bool has_delete_operations() const override { return mock_has_deletes; }
 };
-
-// Concrete IcebergTableReader (pure-virtual stubs filled in).
-// Exposes the protected _equality_delete_impls for testing.
-class TestableIcebergReader : public IcebergTableReader {
-public:
-    using IcebergTableReader::IcebergTableReader;
-    void set_delete_rows() override {}
-    Status _process_equality_delete(
-            const std::vector<TIcebergDeleteFileDesc>& delete_files) override {
-        return Status::OK();
-    }
-    void test_set_equality_delete(std::unique_ptr<EqualityDeleteBase> impl) {
-        _equality_delete_impls.push_back(std::move(impl));
-    }
-};
-
-// Minimal EqualityDeleteBase (only needs to be non-null for the check).
-class MockEqualityDelete : public EqualityDeleteBase {
-public:
-    MockEqualityDelete() : EqualityDeleteBase(nullptr, {}) {}
-    Status _build_set() override { return Status::OK(); }
-    Status filter_data_block(Block* data_block,
-                             const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
-                             const std::unordered_map<int, std::string>& id_to_block_column_name,
-                             IColumn::Filter& filter) override {
-        return Status::OK();
-    }
-};
-
 // ============================================================
 // These tests reproduce the logic from
 // FileScanner::_init_reader_condition_cache() (file_scanner.cpp)
@@ -539,7 +511,7 @@ TEST_F(ConditionCacheDeleteOpsTest, OrcWithAcidDeletes_CacheSkipped) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = OrcReader::create_unique(params, range, "", nullptr);
-    TransactionalHiveReader::AcidRowIDSet acid_deletes;
+    AcidRowIDSet acid_deletes;
     acid_deletes.insert({1, 0, 5});
     reader->set_delete_rows(&acid_deletes);
 
@@ -552,62 +524,37 @@ TEST_F(ConditionCacheDeleteOpsTest, OrcWithAcidDeletes_CacheSkipped) {
     EXPECT_EQ(cache, nullptr);
 }
 
-// -- IcebergTableReader: with equality deletes -> cache skipped --
-TEST_F(ConditionCacheDeleteOpsTest, IcebergWithEqualityDeletes_CacheSkipped) {
-    TFileScanRangeParams params;
-    TFileRangeDesc range;
-    auto inner = std::make_unique<MockFileFormatReader>();
-    inner->mock_has_deletes = false;
-    RuntimeProfile profile("test");
-    TestableIcebergReader reader(std::move(inner), &profile, nullptr, params, range, nullptr,
-                                 nullptr, nullptr);
-    reader.test_set_equality_delete(std::make_unique<MockEqualityDelete>());
+// -- MockReader: with deletes (simulating Iceberg/Hive with inner deletes) -> cache skipped --
+// In the new architecture, Iceberg readers inherit ParquetReader/OrcReader directly (CRTP),
+// so has_delete_operations() is resolved through the base reader. We use MockFileFormatReader
+// to test the generic condition cache skip logic.
+TEST_F(ConditionCacheDeleteOpsTest, ReaderWithDeletes_CacheSkipped) {
+    auto reader = std::make_unique<MockFileFormatReader>();
+    reader->mock_has_deletes = true;
 
     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
-    simulate_init_condition_cache(&reader, 42, "/data/iceberg.parquet", hit, cache, ctx);
+    simulate_init_condition_cache(reader.get(), 42, "/data/iceberg.parquet", hit, cache, ctx);
 
     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
 }
 
-// -- IcebergTableReader: with position deletes in inner reader -> cache skipped --
-TEST_F(ConditionCacheDeleteOpsTest, IcebergWithPositionDeletes_CacheSkipped) {
-    TFileScanRangeParams params;
-    TFileRangeDesc range;
-    auto inner = std::make_unique<MockFileFormatReader>();
-    inner->mock_has_deletes = true; // inner reader has position deletes
-    RuntimeProfile profile("test");
-    TestableIcebergReader reader(std::move(inner), &profile, nullptr, params, range, nullptr,
-                                 nullptr, nullptr);
+// -- MockReader: no deletes -> cache populated --
+TEST_F(ConditionCacheDeleteOpsTest, ReaderWithoutDeletes_CachePopulated) {
+    auto reader = std::make_unique<MockFileFormatReader>();
+    reader->mock_has_deletes = false;
 
     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
-    simulate_init_condition_cache(&reader, 42, "/data/iceberg.parquet", hit, cache, ctx);
+    simulate_init_condition_cache(reader.get(), 42, "/data/iceberg.parquet", hit, cache, ctx);
 
-    EXPECT_EQ(ctx, nullptr);
-    EXPECT_EQ(cache, nullptr);
-}
-
-// -- TransactionalHiveReader: inner reader has deletes -> cache skipped --
-TEST_F(ConditionCacheDeleteOpsTest, TransactionalHiveInnerDeletes_CacheSkipped) {
-    TFileScanRangeParams params;
-    TFileRangeDesc range;
-    auto inner = std::make_unique<MockFileFormatReader>();
-    inner->mock_has_deletes = true;
-    RuntimeProfile profile("test");
-    auto reader = TransactionalHiveReader::create_unique(std::move(inner), &profile, nullptr,
-                                                         params, range, nullptr, nullptr);
-
-    bool hit = false;
-    std::shared_ptr<std::vector<bool>> cache;
-    std::shared_ptr<ConditionCacheContext> ctx;
-    simulate_init_condition_cache(reader.get(), 42, "/data/hive_acid.orc", hit, cache, ctx);
-
-    EXPECT_EQ(ctx, nullptr);
-    EXPECT_EQ(cache, nullptr);
+    EXPECT_FALSE(hit);
+    EXPECT_NE(ctx, nullptr);
+    EXPECT_NE(cache, nullptr);
+    EXPECT_FALSE(ctx->is_hit);
 }
 
 // -- Pre-populated cache entry is NOT returned when deletes exist --
diff --git a/be/test/format/native/native_reader_writer_test.cpp b/be/test/format/native/native_reader_writer_test.cpp
index 0b5bcf1bfa4bca..5d1d7dc207cef7 100644
--- a/be/test/format/native/native_reader_writer_test.cpp
+++ b/be/test/format/native/native_reader_writer_test.cpp
@@ -757,10 +757,9 @@ TEST_F(NativeReaderWriterTest, get_columns_and_parsed_schema) {
     NativeReader reader_impl(nullptr, scan_params, scan_range, nullptr, &state);
 
     std::unordered_map<std::string, DataTypePtr> name_to_type;
-    std::unordered_set<std::string> missing_cols;
-    st = reader_impl.get_columns(&name_to_type, &missing_cols);
+    st = reader_impl.get_columns(&name_to_type);
     ASSERT_TRUE(st.ok()) << st;
-    ASSERT_TRUE(missing_cols.empty());
+    ASSERT_TRUE(reader_impl.missing_cols().empty());
 
     // All columns from src_block should appear in name_to_type.
     for (size_t i = 0; i < src_block.columns(); ++i) {
diff --git a/be/test/format/orc/orc_read_lines.cpp b/be/test/format/orc/orc_read_lines.cpp
index 3e8803d5681458..7381be75719942 100644
--- a/be/test/format/orc/orc_read_lines.cpp
+++ b/be/test/format/orc/orc_read_lines.cpp
@@ -135,16 +135,20 @@ static void read_orc_line(int64_t line, std::string block_dump,
                            tuple_desc->slots().size());
     reader->set_row_id_column_iterator(iterator_pair);
 
-    auto status = reader->init_reader(&column_names, &col_name_to_block_idx, {}, false, tuple_desc,
-                                      &row_desc, nullptr, nullptr);
+    // Construct OrcInitContext for standalone reader (no column_descs).
+    OrcInitContext orc_ctx;
+    orc_ctx.column_names = column_names;
+    orc_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    orc_ctx.tuple_descriptor = tuple_desc;
+    orc_ctx.row_descriptor = &row_desc;
+    orc_ctx.params = &params;
+    orc_ctx.range = &range;
+    auto status = reader->init_reader(&orc_ctx);
 
     EXPECT_TRUE(status.ok());
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    auto st = reader->set_fill_columns(partition_columns, missing_columns);
-    EXPECT_TRUE(st.ok()) << st;
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = slot_desc->type();
@@ -159,7 +163,7 @@ static void read_orc_line(int64_t line, std::string block_dump,
 
     bool eof = false;
     size_t read_row = 0;
-    st = reader->get_next_block(block.get(), &read_row, &eof);
+    Status st = reader->get_next_block(block.get(), &read_row, &eof);
     EXPECT_TRUE(st.ok()) << st;
     auto row_id_string_column = static_cast<const ColumnString&>(
             *block->get_by_position(block->get_position_by_name("row_id")).column.get());
diff --git a/be/test/format/orc/orc_reader_init_column_test.cpp b/be/test/format/orc/orc_reader_init_column_test.cpp
index 4005edcf8fb7e6..e4d40c40935718 100644
--- a/be/test/format/orc/orc_reader_init_column_test.cpp
+++ b/be/test/format/orc/orc_reader_init_column_test.cpp
@@ -58,13 +58,13 @@ TEST_F(OrcReaderInitColumnTest, InitReadColumn) {
         std::vector<std::string> tmp;
         tmp.emplace_back("col1");
 
-        reader->_table_column_names = &tmp;
+        reader->_table_column_names = tmp;
         Status st = reader->_init_read_columns();
         std::cout << "st =" << st << "\n";
-        std::list<std::string> ans;
-        ans.emplace_back("col1");
-        ASSERT_EQ(ans, reader->_read_file_cols);
-        ASSERT_EQ(ans, reader->_read_table_cols);
+        // _init_read_columns builds _type_map; _read_file_cols is populated later
+        // in _do_init_reader's standalone path when _table_column_names is set.
+        ASSERT_TRUE(reader->_type_map.contains("col1"));
+        ASSERT_FALSE(reader->_type_map.contains("nonexistent"));
     }
 }
 
diff --git a/be/test/format/orc/orc_reader_test.cpp b/be/test/format/orc/orc_reader_test.cpp
index 6d44eeb1a36d72..3adbb000048c27 100644
--- a/be/test/format/orc/orc_reader_test.cpp
+++ b/be/test/format/orc/orc_reader_test.cpp
@@ -65,7 +65,7 @@ class OrcReaderTest : public testing::Test {
                                    "o_orderstatus")
                 << std::make_tuple(DataTypeFactory::instance().create_data_type(TYPE_DOUBLE, false),
                                    "o_totalprice")
-                << std::make_tuple(DataTypeFactory::instance().create_data_type(TYPE_DATE, false),
+                << std::make_tuple(DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false),
                                    "o_orderdate")
                 << std::make_tuple(DataTypeFactory::instance().create_data_type(TYPE_STRING, false),
                                    "o_orderpriority")
@@ -83,10 +83,16 @@ class OrcReaderTest : public testing::Test {
         range.path = "./be/test/exec/test_data/orc_scanner/orders.orc";
         range.start_offset = 0;
         range.size = 1293;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, &cache, true);
-        auto status = reader->init_reader(&column_names, &col_name_to_block_idx, {}, false,
-                                          tuple_desc, &row_desc, nullptr, nullptr);
-        EXPECT_TRUE(status.ok());
+        auto reader = OrcReader::create_unique(params, range, "UTC", nullptr, &cache, true);
+        OrcInitContext orc_ctx;
+        orc_ctx.column_names = column_names;
+        orc_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+        orc_ctx.tuple_descriptor = tuple_desc;
+        orc_ctx.row_descriptor = &row_desc;
+        orc_ctx.params = &params;
+        orc_ctx.range = &range;
+        auto status = reader->init_reader(&orc_ctx);
+        EXPECT_TRUE(status.ok()) << "init_reader failed: " << status.to_string();
 
         // deserialize expr
         auto exprx = apache::thrift::from_json_string<TExpr>(expr);
@@ -155,7 +161,7 @@ TEST_F(OrcReaderTest, test_build_search_argument) {
             "<= 1200000), leaf-3 = (o_orderkey = 1100000), expr = (and (or leaf-0 (not leaf-1)) "
             "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))",
             "leaf-0 = (o_orderkey in [1000000, 2000000, 3000000]), leaf-1 = (o_orderdate < "
-            "17121205), leaf-2 = (o_orderdate <= 17121205), expr = (and (or leaf-0 (not leaf-1)) "
+            "8766), leaf-2 = (o_orderdate <= 9130), expr = (and (or leaf-0 (not leaf-1)) "
             "(or leaf-0 leaf-2))",
             "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), expr = (or leaf-0 "
             "leaf-1)",
diff --git a/be/test/format/parquet/parquet_expr_test.cpp b/be/test/format/parquet/parquet_expr_test.cpp
index 73441901db7743..4db4bc03feebd6 100644
--- a/be/test/format/parquet/parquet_expr_test.cpp
+++ b/be/test/format/parquet/parquet_expr_test.cpp
@@ -279,10 +279,14 @@ class ParquetExprTest : public testing::Test {
                                                 &ctz, nullptr, nullptr);
         p_reader->set_file_reader(local_file_reader);
         colname_to_slot_id.emplace("int64_col", 2);
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-        static_cast<void>(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp,
-                                                tuple_desc, nullptr, &colname_to_slot_id, nullptr,
-                                                nullptr));
+        ParquetInitContext pq_ctx;
+        pq_ctx.column_names = column_names;
+        pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+        pq_ctx.tuple_descriptor = tuple_desc;
+        pq_ctx.colname_to_slot_id = &colname_to_slot_id;
+        pq_ctx.params = &scan_params;
+        pq_ctx.range = &scan_range;
+        static_cast<void>(p_reader->init_reader(&pq_ctx));
 
         size_t meta_size;
         static_cast<void>(parse_thrift_footer(p_reader->_file_reader, &doris_file_metadata,
@@ -326,15 +330,16 @@ class ParquetExprTest : public testing::Test {
         auto local_reader = ParquetReader::create_unique(
                 nullptr, scan_params, scan_range, scan_range.size, &local_ctz, nullptr, nullptr);
         local_reader->set_file_reader(local_file_reader);
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-        static_cast<void>(local_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp,
-                                                    tuple_desc, nullptr, nullptr, nullptr,
-                                                    nullptr));
-
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        static_cast<void>(local_reader->set_fill_columns(partition_columns, missing_columns));
+        ParquetInitContext pq_ctx2;
+        pq_ctx2.column_names = column_names;
+        pq_ctx2.col_name_to_block_idx = &col_name_to_block_idx;
+        pq_ctx2.tuple_descriptor = tuple_desc;
+        pq_ctx2.params = &scan_params;
+        pq_ctx2.range = &scan_range;
+        static_cast<void>(local_reader->init_reader(&pq_ctx2));
+
+        // set_fill_columns logic is now inlined in _do_init_reader,
+        // so no separate call is needed.
 
         bool eof = false;
         std::string dump;
diff --git a/be/test/format/parquet/parquet_read_lines.cpp b/be/test/format/parquet/parquet_read_lines.cpp
index 443f5226775201..abcb62024e463a 100644
--- a/be/test/format/parquet/parquet_read_lines.cpp
+++ b/be/test/format/parquet/parquet_read_lines.cpp
@@ -151,13 +151,14 @@ static void read_parquet_lines(std::vector<std::string> numeric_types,
     runtime_state.set_desc_tbl(desc_tbl);
 
     std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    static_cast<void>(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr,
-                                            nullptr, nullptr, nullptr, nullptr));
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    static_cast<void>(p_reader->set_fill_columns(partition_columns, missing_columns));
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    static_cast<void>(p_reader->init_reader(&pq_ctx));
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
diff --git a/be/test/format/parquet/parquet_reader_test.cpp b/be/test/format/parquet/parquet_reader_test.cpp
index 1d6e4632842f89..e738d9c4de77ad 100644
--- a/be/test/format/parquet/parquet_reader_test.cpp
+++ b/be/test/format/parquet/parquet_reader_test.cpp
@@ -38,6 +38,7 @@
 #include "core/data_type/data_type.h"
 #include "core/data_type/data_type_factory.hpp"
 #include "core/string_view.h"
+#include "format/column_descriptor.h"
 #include "format/parquet/vparquet_reader.h"
 #include "gtest/gtest_pred_impl.h"
 #include "io/fs/file_meta_cache.h"
@@ -51,6 +52,13 @@
 
 namespace doris {
 class VExprContext;
+static std::vector<ColumnDescriptor> to_column_descs(const std::vector<std::string>& names) {
+    std::vector<ColumnDescriptor> descs;
+    for (const auto& name : names) {
+        descs.push_back({name, nullptr, ColumnCategory::REGULAR, nullptr});
+    }
+    return descs;
+}
 static VExprContextSPtrs create_predicates(DescriptorTbl* desc_tbl, RuntimeState* runtime_state);
 template <bool filter_all>
 static VExprContextSPtrs create_partition_predicates(DescriptorTbl* desc_tbl,
@@ -109,27 +117,34 @@ class ParquetReaderTest : public testing::Test {
                                                 nullptr, &runtime_state, &cache, enable_lazy);
         p_reader->set_file_reader(reader);
         runtime_state.set_desc_tbl(desc_tbl);
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
         auto conjuncts = create_predicates(desc_tbl, &runtime_state);
         std::unordered_map<int, VExprContextSPtrs> slot_id_to_expr_ctxs;
         slot_id_to_expr_ctxs[0].emplace_back(conjuncts[0]);
         slot_id_to_expr_ctxs[1].emplace_back(conjuncts[1]);
 
+        auto column_descs = to_column_descs(column_names);
         if constexpr (filter_all) {
-            st = p_reader->init_reader(column_names, &col_name_to_block_idx, conjuncts, tmp,
-                                       tuple_desc, nullptr, nullptr, nullptr,
-                                       &slot_id_to_expr_ctxs);
+            ParquetInitContext pq_ctx;
+            pq_ctx.column_descs = &column_descs;
+            pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+            pq_ctx.conjuncts = &conjuncts;
+            pq_ctx.tuple_descriptor = tuple_desc;
+            pq_ctx.slot_id_to_filter_conjuncts = &slot_id_to_expr_ctxs;
+            pq_ctx.params = &scan_params;
+            pq_ctx.range = &scan_range;
+            st = p_reader->init_reader(&pq_ctx);
         } else {
-            st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr,
-                                       nullptr, nullptr, nullptr, nullptr);
+            ParquetInitContext pq_ctx;
+            pq_ctx.column_descs = &column_descs;
+            pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+            pq_ctx.params = &scan_params;
+            pq_ctx.range = &scan_range;
+            st = p_reader->init_reader(&pq_ctx);
         }
 
         EXPECT_TRUE(st.ok()) << st;
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        st = p_reader->set_fill_columns(partition_columns, missing_columns);
-        EXPECT_TRUE(st.ok()) << st;
+        // set_fill_columns logic is now inlined in _do_init_reader,
+        // so no separate call is needed.
         bool eof = false;
         size_t total_rows = 0;
         bool all_null = true;
@@ -195,6 +210,8 @@ class ParquetReaderTest : public testing::Test {
         TFileRangeDesc scan_range;
         scan_range.start_offset = 0;
         scan_range.size = 1000;
+        scan_range.__set_columns_from_path_keys({"part_col"});
+        scan_range.__set_columns_from_path({"1"});
         auto q_options = TQueryOptions();
         q_options.__set_enable_adjust_conjunct_order_by_cost(true);
         RuntimeState runtime_state = RuntimeState(q_options, TQueryGlobals());
@@ -204,22 +221,25 @@ class ParquetReaderTest : public testing::Test {
         p_reader->set_file_reader(reader);
         runtime_state.set_desc_tbl(desc_tbl);
 
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
         auto conjuncts = create_partition_predicates<filter_all>(desc_tbl, &runtime_state);
         std::unordered_map<int, VExprContextSPtrs> slot_id_to_expr_ctxs;
         slot_id_to_expr_ctxs[1].emplace_back(conjuncts[0]);
         slot_id_to_expr_ctxs[2].emplace_back(conjuncts[1]);
 
-        st = p_reader->init_reader(column_names, &col_name_to_block_idx, conjuncts, tmp, tuple_desc,
-                                   nullptr, nullptr, nullptr, &slot_id_to_expr_ctxs);
+        auto column_descs = to_column_descs(column_names);
+        ParquetInitContext pq_ctx;
+        pq_ctx.column_descs = &column_descs;
+        pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+        pq_ctx.conjuncts = &conjuncts;
+        pq_ctx.tuple_descriptor = tuple_desc;
+        pq_ctx.slot_id_to_filter_conjuncts = &slot_id_to_expr_ctxs;
+        pq_ctx.params = &scan_params;
+        pq_ctx.range = &scan_range;
+        st = p_reader->init_reader(&pq_ctx);
         EXPECT_TRUE(st.ok()) << st;
 
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        partition_columns.emplace("part_col", std::make_tuple("1", tuple_desc->slots()[2]));
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        st = p_reader->set_fill_columns(partition_columns, missing_columns);
-        EXPECT_TRUE(st.ok()) << st;
+        // Partition/missing column logic is now handled by on_before_init_reader
+        // via _extract_partition_values from scan_range.
 
         bool eof = false;
         size_t total_rows = 0;
@@ -348,13 +368,14 @@ TEST_F(ParquetReaderTest, normal) {
     RuntimeState runtime_state((TQueryOptions()), TQueryGlobals());
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    static_cast<void>(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr,
-                                            nullptr, nullptr, nullptr, nullptr));
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    static_cast<void>(p_reader->set_fill_columns(partition_columns, missing_columns));
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    static_cast<void>(p_reader->init_reader(&pq_ctx));
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
@@ -413,15 +434,15 @@ TEST_F(ParquetReaderTest, uuid_varbinary) {
     RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals());
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr, nullptr,
-                               nullptr, nullptr, nullptr);
-    EXPECT_TRUE(st.ok()) << st;
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    st = p_reader->set_fill_columns(partition_columns, missing_columns);
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = p_reader->init_reader(&pq_ctx);
     EXPECT_TRUE(st.ok()) << st;
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
@@ -487,15 +508,15 @@ TEST_F(ParquetReaderTest, varbinary_varbinary) {
     RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals());
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr, nullptr,
-                               nullptr, nullptr, nullptr);
-    EXPECT_TRUE(st.ok()) << st;
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    st = p_reader->set_fill_columns(partition_columns, missing_columns);
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = p_reader->init_reader(&pq_ctx);
     EXPECT_TRUE(st.ok()) << st;
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
@@ -563,15 +584,15 @@ TEST_F(ParquetReaderTest, varbinary_string) {
     RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals());
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr, nullptr,
-                               nullptr, nullptr, nullptr);
-    EXPECT_TRUE(st.ok()) << st;
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    st = p_reader->set_fill_columns(partition_columns, missing_columns);
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = p_reader->init_reader(&pq_ctx);
     EXPECT_TRUE(st.ok()) << st;
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
@@ -639,15 +660,15 @@ TEST_F(ParquetReaderTest, varbinary_string2) {
     RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals());
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, nullptr, nullptr,
-                               nullptr, nullptr, nullptr);
-    EXPECT_TRUE(st.ok()) << st;
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    st = p_reader->set_fill_columns(partition_columns, missing_columns);
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = p_reader->init_reader(&pq_ctx);
     EXPECT_TRUE(st.ok()) << st;
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
     BlockUPtr block = Block::create_unique();
     for (const auto& slot_desc : tuple_desc->slots()) {
         auto data_type = make_nullable(slot_desc->type());
@@ -961,6 +982,8 @@ TEST_F(ParquetReaderTest, only_partition_column) {
     TFileRangeDesc scan_range;
     scan_range.start_offset = 0;
     scan_range.size = 1000;
+    scan_range.__set_columns_from_path_keys({"part_col"});
+    scan_range.__set_columns_from_path({"1"});
     auto q_options = TQueryOptions();
     q_options.__set_enable_adjust_conjunct_order_by_cost(true);
     RuntimeState runtime_state = RuntimeState(q_options, TQueryGlobals());
@@ -969,21 +992,24 @@ TEST_F(ParquetReaderTest, only_partition_column) {
     p_reader->set_file_reader(reader);
     runtime_state.set_desc_tbl(desc_tbl);
 
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
     auto conjuncts = create_only_partition_predicates(desc_tbl, &runtime_state);
     std::unordered_map<int, VExprContextSPtrs> slot_id_to_expr_ctxs;
     slot_id_to_expr_ctxs[0].emplace_back(conjuncts[0]);
 
-    st = p_reader->init_reader(column_names, &col_name_to_block_idx, conjuncts, tmp, tuple_desc,
-                               nullptr, nullptr, nullptr, &slot_id_to_expr_ctxs);
+    auto column_descs = to_column_descs(column_names);
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_descs = &column_descs;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.conjuncts = &conjuncts;
+    pq_ctx.tuple_descriptor = tuple_desc;
+    pq_ctx.slot_id_to_filter_conjuncts = &slot_id_to_expr_ctxs;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = p_reader->init_reader(&pq_ctx);
     EXPECT_TRUE(st.ok()) << st;
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    partition_columns.emplace("part_col", std::make_tuple("1", tuple_desc->slots()[0]));
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    st = p_reader->set_fill_columns(partition_columns, missing_columns);
-    EXPECT_TRUE(st.ok()) << st;
+    // Partition/missing column logic is now handled by on_before_init_reader
+    // via _extract_partition_values from scan_range.
 
     bool eof = false;
     size_t total_rows = 0;
diff --git a/be/test/format/table/hive/hive_reader_create_column_ids_test.cpp b/be/test/format/table/hive/hive_reader_create_column_ids_test.cpp
index 845594d608faee..7a884359027d73 100644
--- a/be/test/format/table/hive/hive_reader_create_column_ids_test.cpp
+++ b/be/test/format/table/hive/hive_reader_create_column_ids_test.cpp
@@ -660,33 +660,28 @@ class HiveReaderCreateColumnIdsTest : public ::testing::Test {
 
         cctz::time_zone ctz;
         TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
-        auto generic_reader =
-                ParquetReader::create_unique(&profile, scan_params, scan_range, 1024, &ctz, nullptr,
-                                             &runtime_state, cache.get());
-        if (!generic_reader) {
+
+        auto hive_reader =
+                std::make_unique<HiveParquetReader>(&profile, scan_params, scan_range, 1024, &ctz,
+                                                    nullptr, &runtime_state, nullptr, cache.get());
+        if (!hive_reader) {
             return {nullptr, nullptr};
         }
 
-        auto parquet_reader = static_cast<ParquetReader*>(generic_reader.get());
-        parquet_reader->set_file_reader(file_reader);
+        hive_reader->set_file_reader(file_reader);
 
         const FieldDescriptor* field_desc = nullptr;
-        st = parquet_reader->get_file_metadata_schema(&field_desc);
+        st = hive_reader->get_file_metadata_schema(&field_desc);
         if (!st.ok() || !field_desc) {
             return {nullptr, nullptr};
         }
 
-        auto hive_reader = std::make_unique<HiveParquetReader>(
-                std::move(generic_reader), &profile, &runtime_state, scan_params, scan_range,
-                nullptr, nullptr, cache.get());
-
         return {std::move(hive_reader), field_desc};
     }
 
     // Helper function: Create and setup OrcReader
     std::tuple<std::unique_ptr<HiveOrcReader>, const orc::Type*> create_orc_reader(
             const std::string& test_file) {
-        // Open the Hive Orc test file
         auto local_fs = io::global_local_filesystem();
         io::FileReaderSPtr file_reader;
         auto st = local_fs->open_file(test_file, &file_reader);
@@ -694,45 +689,31 @@ class HiveReaderCreateColumnIdsTest : public ::testing::Test {
             return {nullptr, nullptr};
         }
 
-        // Setup runtime state
         RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals());
-
-        // Setup scan parameters
         TFileScanRangeParams scan_params;
         scan_params.format_type = TFileFormatType::FORMAT_ORC;
-
         TFileRangeDesc scan_range;
         scan_range.start_offset = 0;
-        scan_range.size = file_reader->size(); // Read entire file
+        scan_range.size = file_reader->size();
         scan_range.path = test_file;
-
-        // Create mock profile
         RuntimeProfile profile("test_profile");
 
-        // Create OrcReader as the underlying file format reader
         cctz::time_zone ctz;
         TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
 
-        auto generic_reader =
-                OrcReader::create_unique(&profile, &runtime_state, scan_params, scan_range, 1024,
-                                         "CST", nullptr, cache.get());
-        if (!generic_reader) {
+        auto hive_reader =
+                std::make_unique<HiveOrcReader>(&profile, &runtime_state, scan_params, scan_range,
+                                                1024, "CST", nullptr, nullptr, cache.get());
+        if (!hive_reader) {
             return {nullptr, nullptr};
         }
 
-        auto orc_reader = static_cast<OrcReader*>(generic_reader.get());
-        // Get FieldDescriptor from Orc file
         const orc::Type* orc_type_ptr = nullptr;
-        st = orc_reader->get_file_type(&orc_type_ptr);
+        st = hive_reader->get_file_type(&orc_type_ptr);
         if (!st.ok() || !orc_type_ptr) {
             return {nullptr, nullptr};
         }
 
-        // Create HiveOrcReader
-        auto hive_reader = std::make_unique<HiveOrcReader>(std::move(generic_reader), &profile,
-                                                           &runtime_state, scan_params, scan_range,
-                                                           nullptr, nullptr, cache.get());
-
         return {std::move(hive_reader), orc_type_ptr};
     }
 
diff --git a/be/test/format/table/hive/hive_reader_test.cpp b/be/test/format/table/hive/hive_reader_test.cpp
index 6b3711ff9794d9..7746cbffa60bac 100644
--- a/be/test/format/table/hive/hive_reader_test.cpp
+++ b/be/test/format/table/hive/hive_reader_test.cpp
@@ -44,6 +44,7 @@
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/data_type_struct.h"
+#include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
 #include "io/fs/file_meta_cache.h"
 #include "io/fs/file_reader_writer_fwd.h"
@@ -525,22 +526,15 @@ TEST_F(HiveReaderTest, read_hive_parquet_file) {
     // Create mock profile
     RuntimeProfile profile("test_profile");
 
-    // Create ParquetReader as the underlying file format reader
+    // Create HiveParquetReader (directly inherits ParquetReader)
     cctz::time_zone ctz;
     TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
+    auto hive_reader =
+            std::make_unique<HiveParquetReader>(&profile, scan_params, scan_range, 1024, &ctz,
+                                                nullptr, &runtime_state, nullptr, cache.get());
 
-    auto generic_reader = ParquetReader::create_unique(&profile, scan_params, scan_range, 1024,
-                                                       &ctz, nullptr, &runtime_state, cache.get());
-    ASSERT_NE(generic_reader, nullptr);
-
-    // Set file reader for the generic reader
-    auto parquet_reader = static_cast<ParquetReader*>(generic_reader.get());
-    parquet_reader->set_file_reader(file_reader);
-
-    // Create HiveParquetReader
-    auto hive_reader = std::make_unique<HiveParquetReader>(std::move(generic_reader), &profile,
-                                                           &runtime_state, scan_params, scan_range,
-                                                           nullptr, nullptr, cache.get());
+    // Set file reader for the hive reader (inherited from ParquetReader)
+    hive_reader->set_file_reader(file_reader);
 
     // Create complex struct types using helper function
     DataTypePtr coordinates_struct_type, address_struct_type, phone_struct_type;
@@ -564,24 +558,21 @@ TEST_F(HiveReaderTest, read_hive_parquet_file) {
             create_tuple_descriptor(&desc_tbl, obj_pool, t_desc_table, t_table_desc,
                                     table_column_names, table_column_positions, table_column_types);
 
-    VExprContextSPtrs conjuncts; // Empty conjuncts for this test
-    std::vector<std::string> table_col_names = {"name", "profile"};
     std::unordered_map<std::string, uint32_t> col_name_to_block_idx = {{"name", 0}, {"profile", 1}};
-    const RowDescriptor* row_descriptor = nullptr;
-    const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
-    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
-    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
-
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = hive_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, tmp,
-                                  tuple_descriptor, row_descriptor, colname_to_slot_id,
-                                  not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+
+    // Use the template method init_reader (inherited from ParquetReader)
+    // on_before_init_columns hook in HiveParquetReader will do schema matching
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_names = table_column_names;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.tuple_descriptor = tuple_descriptor;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = hive_reader->init_reader(&pq_ctx);
     ASSERT_TRUE(st.ok()) << st;
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    ASSERT_TRUE(hive_reader->set_fill_columns(partition_columns, missing_columns).ok());
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
 
     // Create block for reading nested structure (not flattened)
     Block block;
@@ -667,18 +658,10 @@ TEST_F(HiveReaderTest, read_hive_rrc_file) {
     // Create mock profile
     RuntimeProfile profile("test_profile");
 
-    // Create OrcReader as the underlying file format reader
-    cctz::time_zone ctz;
-    TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
-
-    auto generic_reader = OrcReader::create_unique(&profile, &runtime_state, scan_params,
-                                                   scan_range, 1024, "CST", nullptr, cache.get());
-    ASSERT_NE(generic_reader, nullptr);
-
-    // Create HiveOrcReader
+    // Create HiveOrcReader (directly inherits OrcReader)
     auto hive_reader =
-            std::make_unique<HiveOrcReader>(std::move(generic_reader), &profile, &runtime_state,
-                                            scan_params, scan_range, nullptr, nullptr, cache.get());
+            std::make_unique<HiveOrcReader>(&profile, &runtime_state, scan_params, scan_range, 1024,
+                                            "CST", nullptr, nullptr, cache.get());
 
     // Create complex struct types using helper function
     DataTypePtr coordinates_struct_type, address_struct_type, phone_struct_type;
@@ -702,22 +685,19 @@ TEST_F(HiveReaderTest, read_hive_rrc_file) {
             create_tuple_descriptor(&desc_tbl, obj_pool, t_desc_table, t_table_desc,
                                     table_column_names, table_column_positions, table_column_types);
 
-    VExprContextSPtrs conjuncts; // Empty conjuncts for this test
-    std::vector<std::string> table_col_names = {"name", "profile"};
     std::unordered_map<std::string, uint32_t> col_name_to_block_idx = {{"name", 0}, {"profile", 1}};
-    const RowDescriptor* row_descriptor = nullptr;
-    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
-    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
 
-    st = hive_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts,
-                                  tuple_descriptor, row_descriptor,
-                                  not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+    OrcInitContext orc_ctx;
+    orc_ctx.column_names = table_column_names;
+    orc_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    orc_ctx.tuple_descriptor = tuple_descriptor;
+    orc_ctx.params = &scan_params;
+    orc_ctx.range = &scan_range;
+    st = hive_reader->init_reader(&orc_ctx);
     ASSERT_TRUE(st.ok()) << st;
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    ASSERT_TRUE(hive_reader->set_fill_columns(partition_columns, missing_columns).ok());
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
 
     // Create block for reading nested structure (not flattened)
     Block block;
diff --git a/be/test/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp b/be/test/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
index d2de833c494fcd..e32153d1ef7f74 100644
--- a/be/test/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
+++ b/be/test/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
@@ -686,32 +686,26 @@ class IcebergReaderCreateColumnIdsTest : public ::testing::Test {
         // Create mock profile
         RuntimeProfile profile("test_profile");
 
-        // Create ParquetReader as the underlying file format reader
+        // Create IcebergParquetReader (IS-A ParquetReader via CRTP mixin)
         cctz::time_zone ctz;
         TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
 
-        auto generic_reader =
-                ParquetReader::create_unique(&profile, scan_params, scan_range, 1024, &ctz, nullptr,
-                                             &runtime_state, cache.get());
-        if (!generic_reader) {
+        auto iceberg_reader = std::make_unique<IcebergParquetReader>(
+                nullptr /* kv_cache */, &profile, scan_params, scan_range, 1024, &ctz,
+                nullptr /* io_ctx */, &runtime_state, cache.get());
+        if (!iceberg_reader) {
             return {nullptr, nullptr};
         }
 
-        // Set file reader for the generic reader
-        auto parquet_reader = static_cast<ParquetReader*>(generic_reader.get());
-        parquet_reader->set_file_reader(file_reader);
+        // Set file reader directly on the iceberg reader (it IS the ParquetReader)
+        iceberg_reader->set_file_reader(file_reader);
 
         const FieldDescriptor* field_desc = nullptr;
-        st = parquet_reader->get_file_metadata_schema(&field_desc);
+        st = iceberg_reader->get_file_metadata_schema(&field_desc);
         if (!st.ok() || !field_desc) {
             return {nullptr, nullptr};
         }
 
-        // Create IcebergParquetReader
-        auto iceberg_reader = std::make_unique<IcebergParquetReader>(
-                std::move(generic_reader), &profile, &runtime_state, scan_params, scan_range,
-                nullptr, nullptr, cache.get());
-
         return {std::move(iceberg_reader), field_desc};
     }
 
@@ -741,30 +735,21 @@ class IcebergReaderCreateColumnIdsTest : public ::testing::Test {
         // Create mock profile
         RuntimeProfile profile("test_profile");
 
-        // Create OrcReader as the underlying file format reader
-        cctz::time_zone ctz;
-        TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
-
-        auto generic_reader =
-                OrcReader::create_unique(&profile, &runtime_state, scan_params, scan_range, 1024,
-                                         "CST", nullptr, cache.get());
-        if (!generic_reader) {
+        // Create IcebergOrcReader (IS-A OrcReader via CRTP mixin)
+        auto iceberg_reader = std::make_unique<IcebergOrcReader>(
+                nullptr /* kv_cache */, &profile, &runtime_state, scan_params, scan_range, 1024,
+                "CST", nullptr /* io_ctx */, cache.get());
+        if (!iceberg_reader) {
             return {nullptr, nullptr};
         }
 
-        auto orc_reader = static_cast<OrcReader*>(generic_reader.get());
-        // Get FieldDescriptor from Orc file
+        // Get ORC type from the iceberg reader (it IS the OrcReader)
         const orc::Type* orc_type_ptr = nullptr;
-        st = orc_reader->get_file_type(&orc_type_ptr);
+        st = iceberg_reader->get_file_type(&orc_type_ptr);
         if (!st.ok() || !orc_type_ptr) {
             return {nullptr, nullptr};
         }
 
-        // Create IcebergOrcReader
-        auto iceberg_reader = std::make_unique<IcebergOrcReader>(
-                std::move(generic_reader), &profile, &runtime_state, scan_params, scan_range,
-                nullptr, nullptr, cache.get());
-
         return {std::move(iceberg_reader), orc_type_ptr};
     }
 
diff --git a/be/test/format/table/iceberg/iceberg_reader_test.cpp b/be/test/format/table/iceberg/iceberg_reader_test.cpp
index ba387b8c0bb8a4..fb35b78c6174b0 100644
--- a/be/test/format/table/iceberg/iceberg_reader_test.cpp
+++ b/be/test/format/table/iceberg/iceberg_reader_test.cpp
@@ -45,6 +45,8 @@
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/data_type_struct.h"
+#include "format/column_descriptor.h"
+#include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_column_chunk_reader.h"
 #include "format/parquet/vparquet_reader.h"
 #include "io/fs/file_meta_cache.h"
@@ -107,23 +109,18 @@ class IcebergReaderTest : public ::testing::Test {
 
         parquet_reader->set_file_reader(*file_reader);
 
-        phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> predicates;
-        st = parquet_reader->init_reader(delete_file_column_names,
-                                         &delete_file_col_name_to_block_idx, {}, predicates,
-                                         nullptr, nullptr, nullptr, nullptr, nullptr);
+        ParquetInitContext pq_ctx;
+        pq_ctx.column_names = delete_file_column_names;
+        pq_ctx.col_name_to_block_idx = &delete_file_col_name_to_block_idx;
+        pq_ctx.params = scan_params;
+        pq_ctx.range = scan_range;
+        st = parquet_reader->init_reader(&pq_ctx);
         EXPECT_TRUE(st.ok()) << st;
         if (!st.ok()) {
             return nullptr;
         }
 
-        std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                partition_columns;
-        std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-        st = parquet_reader->set_fill_columns(partition_columns, missing_columns);
-        EXPECT_TRUE(st.ok()) << st;
-        if (!st.ok()) {
-            return nullptr;
-        }
+        // Partition/missing column logic is now inlined in _do_init_reader.
 
         *file_meta_data = parquet_reader->get_meta_data();
         return parquet_reader;
@@ -705,22 +702,17 @@ TEST_F(IcebergReaderTest, read_iceberg_parquet_file) {
     // Create mock profile
     RuntimeProfile profile("test_profile");
 
-    // Create ParquetReader as the underlying file format reader
+    // Create IcebergParquetReader (IS-A ParquetReader via CRTP mixin)
     cctz::time_zone ctz;
     TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
 
-    auto generic_reader = ParquetReader::create_unique(&profile, scan_params, scan_range, 1024,
-                                                       &ctz, nullptr, &runtime_state, cache.get());
-    ASSERT_NE(generic_reader, nullptr);
-
-    // Set file reader for the generic reader
-    auto parquet_reader = static_cast<ParquetReader*>(generic_reader.get());
-    parquet_reader->set_file_reader(file_reader);
-
-    // Create IcebergParquetReader
     auto iceberg_reader = std::make_unique<IcebergParquetReader>(
-            std::move(generic_reader), &profile, &runtime_state, scan_params, scan_range, nullptr,
-            nullptr, cache.get());
+            nullptr /* kv_cache */, &profile, scan_params, scan_range, 1024, &ctz,
+            nullptr /* io_ctx */, &runtime_state, cache.get());
+    ASSERT_NE(iceberg_reader, nullptr);
+
+    // Set file reader for the iceberg reader (it IS the ParquetReader)
+    iceberg_reader->set_file_reader(file_reader);
 
     // Create complex struct types using helper function
     DataTypePtr coordinates_struct_type, address_struct_type, phone_struct_type;
@@ -738,27 +730,29 @@ TEST_F(IcebergReaderTest, read_iceberg_parquet_file) {
     const TupleDescriptor* tuple_descriptor =
             create_tuple_descriptor(&desc_tbl, obj_pool, t_desc_table, t_table_desc);
 
-    VExprContextSPtrs conjuncts; // Empty conjuncts for this test
     std::vector<std::string> table_col_names = {"name", "profile"};
     std::unordered_map<std::string, uint32_t> col_name_to_block_idx = {
             {"name", 0},
             {"profile", 1},
     };
-    const RowDescriptor* row_descriptor = nullptr;
-    const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
-    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
-    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
-
-    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> tmp;
-    st = iceberg_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, tmp,
-                                     tuple_descriptor, row_descriptor, colname_to_slot_id,
-                                     not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+
+    std::vector<ColumnDescriptor> column_descs;
+    for (const auto& name : table_col_names) {
+        ColumnDescriptor desc;
+        desc.name = name;
+        column_descs.push_back(desc);
+    }
+    ParquetInitContext pq_ctx;
+    pq_ctx.column_descs = &column_descs;
+    pq_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    pq_ctx.tuple_descriptor = tuple_descriptor;
+    pq_ctx.params = &scan_params;
+    pq_ctx.range = &scan_range;
+    st = iceberg_reader->init_reader(&pq_ctx);
     ASSERT_TRUE(st.ok()) << st;
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    ASSERT_TRUE(iceberg_reader->set_fill_columns(partition_columns, missing_columns).ok());
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
 
     // Create block for reading nested structure (not flattened)
     Block block;
@@ -845,18 +839,11 @@ TEST_F(IcebergReaderTest, read_iceberg_orc_file) {
     // Create mock profile
     RuntimeProfile profile("test_profile");
 
-    // Create OrcReader as the underlying file format reader
-    cctz::time_zone ctz;
-    TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
-
-    auto generic_reader = OrcReader::create_unique(&profile, &runtime_state, scan_params,
-                                                   scan_range, 1024, "CST", nullptr, cache.get());
-    ASSERT_NE(generic_reader, nullptr);
-
-    // Create IcebergOrcReader
+    // Create IcebergOrcReader (IS-A OrcReader via CRTP mixin)
     auto iceberg_reader = std::make_unique<IcebergOrcReader>(
-            std::move(generic_reader), &profile, &runtime_state, scan_params, scan_range, nullptr,
-            nullptr, cache.get());
+            nullptr /* kv_cache */, &profile, &runtime_state, scan_params, scan_range, 1024, "CST",
+            nullptr /* io_ctx */, cache.get());
+    ASSERT_NE(iceberg_reader, nullptr);
 
     // Create complex struct types using helper function
     DataTypePtr coordinates_struct_type, address_struct_type, phone_struct_type;
@@ -874,26 +861,31 @@ TEST_F(IcebergReaderTest, read_iceberg_orc_file) {
     const TupleDescriptor* tuple_descriptor =
             create_tuple_descriptor(&desc_tbl, obj_pool, t_desc_table, t_table_desc);
 
-    VExprContextSPtrs conjuncts; // Empty conjuncts for this test
     std::vector<std::string> table_col_names = {"name", "profile"};
     const RowDescriptor* row_descriptor = nullptr;
-    const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
     std::unordered_map<std::string, uint32_t> col_name_to_block_idx = {
             {"name", 0},
             {"profile", 1},
     };
-    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
-    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
 
-    st = iceberg_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts,
-                                     tuple_descriptor, row_descriptor, colname_to_slot_id,
-                                     not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+    std::vector<ColumnDescriptor> column_descs;
+    for (const auto& name : table_col_names) {
+        ColumnDescriptor desc;
+        desc.name = name;
+        column_descs.push_back(desc);
+    }
+    OrcInitContext orc_ctx;
+    orc_ctx.column_descs = &column_descs;
+    orc_ctx.col_name_to_block_idx = &col_name_to_block_idx;
+    orc_ctx.tuple_descriptor = tuple_descriptor;
+    orc_ctx.row_descriptor = row_descriptor;
+    orc_ctx.params = &scan_params;
+    orc_ctx.range = &scan_range;
+    st = iceberg_reader->init_reader(&orc_ctx);
     ASSERT_TRUE(st.ok()) << st;
 
-    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-            partition_columns;
-    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-    ASSERT_TRUE(iceberg_reader->set_fill_columns(partition_columns, missing_columns).ok());
+    // set_fill_columns logic is now inlined in _do_init_reader,
+    // so no separate call is needed.
 
     // Create block for reading nested structure (not flattened)
     Block block;
diff --git a/be/test/format/table/table_schema_change_helper_test.cpp b/be/test/format/table/table_schema_change_helper_test.cpp
index ba1d96e4d6d3c9..de653c1486a5f0 100644
--- a/be/test/format/table/table_schema_change_helper_test.cpp
+++ b/be/test/format/table/table_schema_change_helper_test.cpp
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "format/table/table_schema_change_helper.h"
+
 #include <gtest/gtest.h>
 
 #include <string>
@@ -24,7 +26,6 @@
 #include "core/column/column_string.h"
 #include "core/data_type/data_type_factory.hpp"
 #include "format/table/iceberg_reader.h"
-#include "format/table/table_format_reader.h"
 #include "testutil/desc_tbl_builder.h"
 
 namespace doris {
@@ -337,7 +338,7 @@ TEST(MockTableSchemaChangeHelper, IcebergParquetSchemaChange) {
     bool exist_field_id = true;
     std::shared_ptr<TableSchemaChangeHelper::Node> ans_node = nullptr;
     ASSERT_TRUE(TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_field_id(
-                        test_field, parquet_field, exist_field_id, ans_node)
+                        test_field, parquet_field, ans_node, exist_field_id)
                         .ok());
     ASSERT_TRUE(exist_field_id);
     std::cout << TableSchemaChangeHelper::debug(ans_node) << "\n";
@@ -420,7 +421,7 @@ TEST(MockTableSchemaChangeHelper, IcebergOrcSchemaChange) {
     bool exist_field_id = true;
     std::shared_ptr<TableSchemaChangeHelper::Node> ans_node = nullptr;
     ASSERT_TRUE(TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_field_id(
-                        test_field, orc_type.get(), attribute, exist_field_id, ans_node)
+                        test_field, orc_type.get(), attribute, ans_node, exist_field_id)
                         .ok());
     ASSERT_TRUE(exist_field_id);
 
@@ -806,7 +807,7 @@ TEST(MockTableSchemaChangeHelper, OrcFieldIdNestedStructMap) {
     bool exist_field_id = true;
     std::shared_ptr<TableSchemaChangeHelper::Node> ans_node = nullptr;
     ASSERT_TRUE(TableSchemaChangeHelper::BuildTableInfoUtil::by_orc_field_id(
-                        test_field, orc_type.get(), attribute, exist_field_id, ans_node)
+                        test_field, orc_type.get(), attribute, ans_node, exist_field_id)
                         .ok());
 
     ASSERT_TRUE(exist_field_id);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index 9f3bbe2376ab77..c6a07c9cdcd5cb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -45,6 +45,7 @@
 import org.apache.doris.spi.Split;
 import org.apache.doris.system.Backend;
 import org.apache.doris.tablefunction.ExternalFileTableValuedFunction;
+import org.apache.doris.thrift.TColumnCategory;
 import org.apache.doris.thrift.TExternalScanRange;
 import org.apache.doris.thrift.TFileAttributes;
 import org.apache.doris.thrift.TFileCompressType;
@@ -169,11 +170,9 @@ protected void initSchemaParams() throws UserException {
         for (SlotDescriptor slot : desc.getSlots()) {
             TFileScanSlotInfo slotInfo = new TFileScanSlotInfo();
             slotInfo.setSlotId(slot.getId().asInt());
-            boolean isFileSlot = !partitionKeys.contains(slot.getColumn().getName());
-            if (isIcebergRowIdColumn(slot)) {
-                isFileSlot = false;
-            }
-            slotInfo.setIsFileSlot(isFileSlot);
+            TColumnCategory category = classifyColumn(slot, partitionKeys);
+            slotInfo.setCategory(category);
+            slotInfo.setIsFileSlot(category == TColumnCategory.REGULAR || category == TColumnCategory.GENERATED);
             params.addToRequiredSlots(slotInfo);
         }
         setDefaultValueExprs(getTargetTable(), destSlotDescByName, null, params, false);
@@ -190,19 +189,27 @@ private void updateRequiredSlots() throws UserException {
         for (SlotDescriptor slot : desc.getSlots()) {
             TFileScanSlotInfo slotInfo = new TFileScanSlotInfo();
             slotInfo.setSlotId(slot.getId().asInt());
-            boolean isFileSlot = !getPathPartitionKeys().contains(slot.getColumn().getName());
-            if (isIcebergRowIdColumn(slot)) {
-                isFileSlot = false;
-            }
-            slotInfo.setIsFileSlot(isFileSlot);
+            TColumnCategory category = classifyColumn(slot, getPathPartitionKeys());
+            slotInfo.setCategory(category);
+            slotInfo.setIsFileSlot(category == TColumnCategory.REGULAR || category == TColumnCategory.GENERATED);
             params.addToRequiredSlots(slotInfo);
         }
         // Update required slots and column_idxs in scanRangeLocations.
         setColumnPositionMapping();
     }
 
-    private boolean isIcebergRowIdColumn(SlotDescriptor slot) {
-        return Column.ICEBERG_ROWID_COL.equalsIgnoreCase(slot.getColumn().getName());
+    /**
+     * Classify a column's category for the BE reader.
+     * Subclasses override this for format-specific classification.
+     */
+    protected TColumnCategory classifyColumn(SlotDescriptor slot, List<String> partitionKeys) {
+        if (Column.ICEBERG_ROWID_COL.equalsIgnoreCase(slot.getColumn().getName())) {
+            return TColumnCategory.SYNTHESIZED;
+        }
+        if (partitionKeys.contains(slot.getColumn().getName())) {
+            return TColumnCategory.PARTITION_KEY;
+        }
+        return TColumnCategory.REGULAR;
     }
 
     public void setTableSample(TableSample tSample) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
index c8aa0baab0ec88..e00463f16d3052 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
@@ -42,6 +42,7 @@
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TFileScanNode;
 import org.apache.doris.thrift.TFileScanRangeParams;
+import org.apache.doris.thrift.TFileScanSlotInfo;
 import org.apache.doris.thrift.TPlanNode;
 import org.apache.doris.thrift.TPlanNodeType;
 import org.apache.doris.thrift.TPushAggOp;
@@ -252,6 +253,15 @@ protected void setDefaultValueExprs(TableIf tbl,
             nameToSlotDesc.put(slot.getColumn().getName(), slot);
         }
 
+        // Build slot_id -> index map for required_slots to set default_value_expr inline.
+        Map<Integer, Integer> slotIdToRequiredIdx = Maps.newHashMap();
+        if (params.getRequiredSlots() != null) {
+            for (int i = 0; i < params.getRequiredSlots().size(); i++) {
+                TFileScanSlotInfo slotInfo = params.getRequiredSlots().get(i);
+                slotIdToRequiredIdx.put(slotInfo.getSlotId(), i);
+            }
+        }
+
         for (Column column : desc.getTable().getFullSchema()) {
             Expr expr;
             Expression expression;
@@ -293,19 +303,28 @@ protected void setDefaultValueExprs(TableIf tbl,
             // default value.
             // and if z is not nullable, the load will fail.
             if (slotDesc != null) {
+                TExpr defaultExpr;
                 if (expression != null) {
                     expression = TypeCoercionUtils.castIfNotSameType(expression,
                             DataType.fromCatalogType(slotDesc.getType()));
                     expr = ExpressionTranslator.translate(expression,
                             new PlanTranslatorContext(CascadesContext.initTempContext()));
-                    params.putToDefaultValueOfSrcSlot(slotDesc.getId().asInt(), ExprToThriftVisitor.treeToThrift(expr));
+                    defaultExpr = ExprToThriftVisitor.treeToThrift(expr);
                 } else {
-                    params.putToDefaultValueOfSrcSlot(slotDesc.getId().asInt(), tExpr);
+                    defaultExpr = tExpr;
+                }
+                // Populate legacy map (for backward compatibility with old BE)
+                params.putToDefaultValueOfSrcSlot(slotDesc.getId().asInt(), defaultExpr);
+                // Also embed default expr directly in the TFileScanSlotInfo
+                Integer idx = slotIdToRequiredIdx.get(slotDesc.getId().asInt());
+                if (idx != null) {
+                    params.getRequiredSlots().get(idx).setDefaultValueExpr(defaultExpr);
                 }
             }
         }
     }
 
+
     protected void addFileCacheAdmissionLog(String userIdentity, Boolean admitted, String reason, double durationMs) {
         String admissionStatus = admitted ? "ADMITTED" : "DENIED";
         String admissionLog = String.format("file cache request %s: user_identity:%s, reason:%s, cost:%.6f ms",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
index 74fbab4590d216..f5a2f9397f3b39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
@@ -187,14 +187,26 @@ public TFileScanRangeParams toFileScanRangeParams(TUniqueId loadId, NereidsFileG
                 params.putToExprOfDestSlot(entry.getKey().asInt(), ExprToThriftVisitor.treeToThrift(entry.getValue()));
             }
 
+            // Build slot_id -> index map for required_slots to set default_value_expr inline.
+            Map<Integer, Integer> slotIdToRequiredIdx = Maps.newHashMap();
+            for (int i = 0; i < params.getRequiredSlots().size(); i++) {
+                slotIdToRequiredIdx.put(params.getRequiredSlots().get(i).getSlotId(), i);
+            }
+
             for (Map.Entry<SlotId, Expr> entry : srcSlotIdToDefaultValueMap.entrySet()) {
+                TExpr defaultExpr;
                 if (entry.getValue() != null) {
-                    params.putToDefaultValueOfSrcSlot(entry.getKey().asInt(),
-                            ExprToThriftVisitor.treeToThrift(entry.getValue()));
+                    defaultExpr = ExprToThriftVisitor.treeToThrift(entry.getValue());
                 } else {
-                    TExpr tExpr = new TExpr();
-                    tExpr.setNodes(Lists.newArrayList());
-                    params.putToDefaultValueOfSrcSlot(entry.getKey().asInt(), tExpr);
+                    defaultExpr = new TExpr();
+                    defaultExpr.setNodes(Lists.newArrayList());
+                }
+                // Populate legacy map (for backward compatibility with old BE)
+                params.putToDefaultValueOfSrcSlot(entry.getKey().asInt(), defaultExpr);
+                // Also embed default expr directly in the TFileScanSlotInfo
+                Integer idx = slotIdToRequiredIdx.get(entry.getKey().asInt());
+                if (idx != null) {
+                    params.getRequiredSlots().get(idx).setDefaultValueExpr(defaultExpr);
                 }
             }
 
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 60c5e01ecf8fdd..ae7427e3327c55 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -253,9 +253,21 @@ struct TFileTextScanRangeParams {
     8: optional bool empty_field_as_null
 }
 
+enum TColumnCategory {
+    REGULAR = 0,
+    PARTITION_KEY = 1,
+    SYNTHESIZED = 2,
+    GENERATED = 3,
+}
+
 struct TFileScanSlotInfo {
     1: optional Types.TSlotId slot_id;
     2: optional bool is_file_slot;
+    3: optional TColumnCategory category;
+    // Default value expression for this column when it is missing from the data file.
+    // Populated by FE from Column.getDefaultValue() or NULL literal.
+    // This replaces the separate default_value_of_src_slot map in TFileScanRangeParams.
+    4: optional Exprs.TExpr default_value_expr;
 }
 
 // descirbe how to read file
diff --git a/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_v3_row_lineage_rewrite_data_files.groovy b/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_v3_row_lineage_rewrite_data_files.groovy
deleted file mode 100644
index 438276c6946950..00000000000000
--- a/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_v3_row_lineage_rewrite_data_files.groovy
+++ /dev/null
@@ -1,244 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_iceberg_v3_row_lineage_rewrite_data_files", "p0,external,iceberg,external_docker,external_docker_iceberg") {
-    String enabled = context.config.otherConfigs.get("enableIcebergTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("Iceberg test is disabled")
-        return
-    }
-
-    String catalogName = "test_iceberg_v3_row_lineage_rewrite_data_files"
-    String dbName = "test_row_lineage_rewrite_db"
-    String restPort = context.config.otherConfigs.get("iceberg_rest_uri_port")
-    String minioPort = context.config.otherConfigs.get("iceberg_minio_port")
-    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-    String endpoint = "http://${externalEnvIp}:${minioPort}"
-
-    def formats = ["parquet", "orc"]
-
-    def schemaContainsField = { schemaRows, fieldName ->
-        String target = fieldName.toLowerCase()
-        return schemaRows.any { row -> row.toString().toLowerCase().contains(target) }
-    }
-
-    def fileSchemaRows = { filePath, format ->
-        return sql("""
-            desc function s3(
-                "uri" = "${filePath}",
-                "format" = "${format}",
-                "s3.access_key" = "admin",
-                "s3.secret_key" = "password",
-                "s3.endpoint" = "${endpoint}",
-                "s3.region" = "us-east-1"
-            )
-        """)
-    }
-
-    def assertCurrentFilesContainRowLineageColumns = { tableName, format ->
-        def files = sql("""select file_path, lower(file_format) from ${tableName}\$files order by file_path""")
-        log.info("Checking rewritten files for physical row lineage columns in ${tableName}: ${files}")
-        assertTrue(files.size() > 0, "Current files should exist for ${tableName}")
-        files.each { row ->
-            assertEquals(format, row[1].toString())
-            assertTrue(row[0].toString().endsWith(format == "parquet" ? ".parquet" : ".orc"),
-                    "Current data file should match ${format} for ${tableName}, file=${row[0]}")
-            def schemaRows = fileSchemaRows(row[0].toString(), format)
-            log.info("Rewritten ${format} schema for ${tableName}, file=${row[0]} -> ${schemaRows}")
-            assertTrue(schemaContainsField(schemaRows, "_row_id"),
-                    "Rewritten file should physically contain _row_id for ${tableName}, schema=${schemaRows}")
-            assertTrue(schemaContainsField(schemaRows, "_last_updated_sequence_number"),
-                    "Rewritten file should physically contain _last_updated_sequence_number for ${tableName}, schema=${schemaRows}")
-        }
-    }
-
-    def assertCurrentFilesDoNotContainRowLineageColumns = { tableName, format ->
-        def files = sql("""select file_path, lower(file_format) from ${tableName}\$files order by file_path""")
-        log.info("Checking regular INSERT files for absence of physical row lineage columns in ${tableName}: ${files}")
-        assertTrue(files.size() > 0, "Current files should exist for ${tableName}")
-        files.each { row ->
-            assertEquals(format, row[1].toString())
-            assertTrue(row[0].toString().endsWith(format == "parquet" ? ".parquet" : ".orc"),
-                    "Current data file should match ${format} for ${tableName}, file=${row[0]}")
-            def schemaRows = fileSchemaRows(row[0].toString(), format)
-            log.info("Regular INSERT ${format} schema for ${tableName}, file=${row[0]} -> ${schemaRows}")
-            assertTrue(!schemaContainsField(schemaRows, "_row_id"),
-                    "Normal INSERT file should not contain _row_id for ${tableName}, schema=${schemaRows}")
-            assertTrue(!schemaContainsField(schemaRows, "_last_updated_sequence_number"),
-                    "Normal INSERT file should not contain _last_updated_sequence_number for ${tableName}, schema=${schemaRows}")
-        }
-    }
-
-    def lineageMap = { tableName ->
-        def rows = sql("""
-            select id, _row_id, _last_updated_sequence_number
-            from ${tableName}
-            order by id
-        """)
-        Map<Integer, List<String>> result = [:]
-        rows.each { row ->
-            result[row[0].toString().toInteger()] = [row[1].toString(), row[2].toString()]
-        }
-        log.info("Built lineage map for ${tableName}: ${result}")
-        return result
-    }
-
-    def assertLineageMapEquals = { expected, actual, tableName ->
-        log.info("Comparing lineage maps for ${tableName}: expected=${expected}, actual=${actual}")
-        assertEquals(expected.size(), actual.size())
-        expected.each { key, value ->
-            assertTrue(actual.containsKey(key), "Missing id=${key} after rewrite for ${tableName}")
-            assertEquals(value[0], actual[key][0])
-            assertEquals(value[1], actual[key][1])
-        }
-    }
-
-    def runRewriteAndAssert = { tableName, format, expectedCount ->
-        def filesBefore = sql("""select file_path from ${tableName}\$files order by file_path""")
-        def snapshotsBefore = sql("""select snapshot_id from ${tableName}\$snapshots order by committed_at""")
-        log.info("Checking rewrite preconditions for ${tableName}: filesBefore=${filesBefore}, snapshotsBefore=${snapshotsBefore}")
-        assertTrue(filesBefore.size() >= 2,
-                "Rewrite test requires at least 2 input files for ${tableName}, but got ${filesBefore.size()}")
-
-        def visibleBefore = sql("""select * from ${tableName} order by id""")
-        def rowLineageBefore = lineageMap(tableName)
-        log.info("Visible rows before rewrite for ${tableName}: ${visibleBefore}")
-
-        assertCurrentFilesDoNotContainRowLineageColumns(tableName, format)
-
-        def rewriteResult = sql("""
-            alter table ${catalogName}.${dbName}.${tableName}
-            execute rewrite_data_files(
-                "target-file-size-bytes" = "10485760",
-                "min-input-files" = "1"
-            )
-        """)
-        log.info("rewrite_data_files result for ${tableName}: ${rewriteResult}")
-        assertTrue(rewriteResult.size() > 0, "rewrite_data_files should return summary rows for ${tableName}")
-        int rewrittenFiles = rewriteResult[0][0] as int
-        assertTrue(rewrittenFiles > 0, "rewrite_data_files should rewrite at least one file for ${tableName}")
-
-        def visibleAfter = sql("""select * from ${tableName} order by id""")
-        log.info("Visible rows after rewrite for ${tableName}: ${visibleAfter}")
-        assertEquals(visibleBefore, visibleAfter)
-
-        def rowLineageAfter = lineageMap(tableName)
-        assertLineageMapEquals(rowLineageBefore, rowLineageAfter, tableName)
-
-        def countAfter = sql("""select count(*) from ${tableName}""")
-        log.info("Checking row count after rewrite for ${tableName}: ${countAfter}")
-        assertEquals(expectedCount, countAfter[0][0].toString().toInteger())
-
-        def snapshotsAfter = sql("""select snapshot_id from ${tableName}\$snapshots order by committed_at""")
-        log.info("Snapshots after rewrite for ${tableName}: ${snapshotsAfter}")
-        assertTrue(snapshotsAfter.size() > snapshotsBefore.size(),
-                "rewrite_data_files should create a new snapshot for ${tableName}")
-
-        assertCurrentFilesContainRowLineageColumns(tableName, format)
-
-        def sampleRowId = rowLineageAfter.entrySet().iterator().next().value[0]
-        def sampleQuery = sql("""select count(*) from ${tableName} where _row_id = ${sampleRowId}""")
-        log.info("Checking sample _row_id predicate after rewrite for ${tableName}: sampleRowId=${sampleRowId}, result=${sampleQuery}")
-        assertEquals(1, sampleQuery[0][0].toString().toInteger())
-    }
-
-    sql """drop catalog if exists ${catalogName}"""
-    sql """
-        create catalog if not exists ${catalogName} properties (
-            "type" = "iceberg",
-            "iceberg.catalog.type" = "rest",
-            "uri" = "http://${externalEnvIp}:${restPort}",
-            "s3.access_key" = "admin",
-            "s3.secret_key" = "password",
-            "s3.endpoint" = "${endpoint}",
-            "s3.region" = "us-east-1"
-        )
-    """
-
-    sql """switch ${catalogName}"""
-    sql """create database if not exists ${dbName}"""
-    sql """use ${dbName}"""
-    sql """set enable_fallback_to_original_planner = false"""
-    sql """set show_hidden_columns = false"""
-
-    try {
-        formats.each { format ->
-            String rewriteTable = "test_row_lineage_rewrite_unpartitioned_${format}"
-            String rewritePartitionTable = "test_row_lineage_rewrite_partitioned_${format}"
-            log.info("Run rewrite_data_files row lineage test with format ${format}")
-
-            try {
-                sql """drop table if exists ${rewriteTable}"""
-                sql """
-                    create table ${rewriteTable} (
-                        id int,
-                        name string,
-                        score int
-                    ) engine=iceberg
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """insert into ${rewriteTable} values (1, 'A', 10), (2, 'B', 20)"""
-                sql """insert into ${rewriteTable} values (3, 'C', 30), (4, 'D', 40)"""
-                sql """insert into ${rewriteTable} values (5, 'E', 50), (6, 'F', 60)"""
-                log.info("Inserted three batches into ${rewriteTable} to prepare rewrite_data_files input files")
-
-                // Assert baseline:
-                // 1. Data files from regular INSERT do not physically contain the two row lineage columns.
-                // 2. After rewrite_data_files, every current data file should contain both row lineage columns.
-                // 3. Visible query results stay unchanged before and after rewrite.
-                // 4. _row_id and _last_updated_sequence_number stay stable for every row across rewrite.
-                runRewriteAndAssert(rewriteTable, format, 6)
-
-                sql """drop table if exists ${rewritePartitionTable}"""
-                sql """
-                    create table ${rewritePartitionTable} (
-                        id int,
-                        name string,
-                        score int,
-                        dt date
-                    ) engine=iceberg
-                    partition by list (day(dt)) ()
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """insert into ${rewritePartitionTable} values (11, 'P1', 10, '2024-01-01'), (12, 'P2', 20, '2024-01-01')"""
-                sql """insert into ${rewritePartitionTable} values (13, 'P3', 30, '2024-01-01'), (14, 'P4', 40, '2024-02-01')"""
-                sql """insert into ${rewritePartitionTable} values (15, 'P5', 50, '2024-02-01'), (16, 'P6', 60, '2024-01-01')"""
-                log.info("Inserted three partitioned batches into ${rewritePartitionTable} to prepare rewrite_data_files input files")
-
-                // Assert baseline:
-                // 1. Partitioned tables also write row lineage columns physically only during rewrite.
-                // 2. Business data and row lineage values stay stable before and after rewrite.
-                // 3. _row_id predicate queries remain available after rewrite.
-                runRewriteAndAssert(rewritePartitionTable, format, 6)
-            } finally {
-                sql """drop table if exists ${rewritePartitionTable}"""
-                sql """drop table if exists ${rewriteTable}"""
-            }
-        }
-    } finally {
-        sql """drop database if exists ${dbName} force"""
-        sql """drop catalog if exists ${catalogName}"""
-    }
-}
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v2_to_v3_doris_spark_compare.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_v2_to_v3_doris_spark_compare.groovy
deleted file mode 100644
index df6d1bbea20087..00000000000000
--- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v2_to_v3_doris_spark_compare.groovy
+++ /dev/null
@@ -1,223 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_iceberg_v2_to_v3_doris_spark_compare", "p0,external,iceberg,external_docker,external_docker_iceberg") {
-    def enabled = context.config.otherConfigs.get("enableIcebergTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("Iceberg test is disabled")
-        return
-    }
-
-    def catalogName = "test_iceberg_v2_to_v3_doris_spark_compare"
-    def dbName = "test_v2_to_v3_doris_spark_compare_db"
-    def restPort = context.config.otherConfigs.get("iceberg_rest_uri_port")
-    def minioPort = context.config.otherConfigs.get("iceberg_minio_port")
-    def externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-
-    def formats = ["parquet", "orc"]
-
-    def tableNameForFormat = { baseName, format ->
-        return format == "parquet" ? baseName : "${baseName}_orc"
-    }
-
-    sql """drop catalog if exists ${catalogName}"""
-    sql """
-        create catalog if not exists ${catalogName} properties (
-            "type" = "iceberg",
-            "iceberg.catalog.type" = "rest",
-            "uri" = "http://${externalEnvIp}:${restPort}",
-            "s3.access_key" = "admin",
-            "s3.secret_key" = "password",
-            "s3.endpoint" = "http://${externalEnvIp}:${minioPort}",
-            "s3.region" = "us-east-1"
-        )
-    """
-
-    sql """switch ${catalogName}"""
-    sql """use ${dbName}"""
-    sql """set enable_fallback_to_original_planner = false"""
-
-    try {
-        def assertV2RowsAreNullAfterUpgrade = { tableName ->
-            def rows = sql """
-                select id, _row_id, _last_updated_sequence_number
-                from ${tableName}
-                order by id
-            """
-            assertEquals(2, rows.size())
-            rows.each { row ->
-                assertTrue(row[1] == null,
-                        "_row_id should be null for v2 rows after upgrade in ${tableName}, row=${row}")
-                assertTrue(row[2] == null,
-                        "_last_updated_sequence_number should be null for v2 rows after upgrade in ${tableName}, row=${row}")
-            }
-        }
-
-        def assertV23RowsNotNullAfterUpd = { tableName ->
-            def rows = sql """
-                select id, _row_id, _last_updated_sequence_number
-                from ${tableName}
-                order by id
-            """
-            rows.each { row ->
-                assertTrue(row[1] != null,
-                    "_row_id should be non-null after Doris operator for ${tableName}")                        
-                assertTrue(row[2] != null,
-                    "_last_updated_sequence_number should be non-null after Doris operator for ${tableName}")
-
-            }
-        }
-
-        def upgradeV3DorisOperationInsert = { tableName ->
-            assertV2RowsAreNullAfterUpgrade(tableName)
-
-            sql """
-                insert into ${tableName} values
-                (4, 'post_v3_i', 400, date '2024-01-04')
-            """
-
-            def rows = sql """
-                select id, tag, score, _row_id, _last_updated_sequence_number
-                from ${tableName}
-                order by id
-            """
-            assertEquals(3, rows.size())
-            assertEquals(4, rows[2][0].toString().toInteger())
-            assertEquals("post_v3_i", rows[2][1])
-            assertV23RowsNotNullAfterUpd(tableName)
-        }
-
-        def upgradeV3DorisOperationDelete = { tableName ->
-            assertV2RowsAreNullAfterUpgrade(tableName)
-
-            sql """
-                delete from ${tableName}
-                where id = 3
-            """
-
-            def rows = sql """
-                select id, tag, score
-                from ${tableName}
-                order by id
-            """
-            assertEquals(1, rows.size())
-            assertEquals(1, rows[0][0].toString().toInteger())
-            assertV23RowsNotNullAfterUpd(tableName)
-
-        }
-
-        def upgradeV3DorisOperationUpdate = { tableName ->
-            assertV2RowsAreNullAfterUpgrade(tableName)
-
-            sql """
-                update ${tableName}
-                set tag = 'post_v3_u', score = score + 20
-                where id = 1
-            """
-
-            def rows = sql """
-                select id, tag, score
-                from ${tableName}
-                order by id
-            """
-            assertEquals(2, rows.size())
-            assertEquals(1, rows[0][0].toString().toInteger())
-            assertEquals("post_v3_u", rows[0][1])
-            assertV23RowsNotNullAfterUpd(tableName)
-        }
-
-        def upgradeV3DorisOperationRewrite = { tableName ->
-            assertV2RowsAreNullAfterUpgrade(tableName)
-
-            def rewriteResult = sql("""
-                alter table ${catalogName}.${dbName}.${tableName}
-                execute rewrite_data_files(
-                    "target-file-size-bytes" = "10485760",
-                    "min-input-files" = "1"
-                )
-            """)
-            assertTrue(rewriteResult.size() > 0,
-                    "rewrite_data_files should return summary rows for ${tableName}")
-
-            def rowCount = sql """
-                select count(*)
-                from ${tableName}
-            """
-            assertEquals(2, rowCount[0][0].toString().toInteger())
-            assertV23RowsNotNullAfterUpd(tableName)
-        }
-
-        formats.each { format ->
-            def rowLineageNullTable = tableNameForFormat("v2v3_row_lineage_null_after_upgrade", format)
-            def sparkReferenceTable = tableNameForFormat("v2v3_spark_ops_reference", format)
-            def dorisTargetTable = tableNameForFormat("v2v3_doris_ops_target", format)
-            log.info("Run v2-to-v3 Doris/Spark compare test with format ${format}")
-
-            def scenario1Rows = sql """
-                select id, _row_id, _last_updated_sequence_number
-                from ${rowLineageNullTable}
-                order by id
-            """
-            assertEquals(3, scenario1Rows.size())
-            scenario1Rows.each { row ->
-                assertTrue(row[1] == null,
-                        "_row_id should be null for rows written before v3 upgrade, row=${row}")
-                assertTrue(row[2] == null,
-                        "_last_updated_sequence_number should be null for rows written before v3 upgrade, row=${row}")
-            }
-
-            sql """
-                update ${dorisTargetTable}
-                set tag = 'post_v3_u', score = score + 20
-                where id = 2
-            """
-
-            sql """
-                insert into ${dorisTargetTable} values
-                (4, 'post_v3_i', 400, date '2024-02-04')
-            """
-
-            def dorisRewriteResult = sql("""
-                alter table ${catalogName}.${dbName}.${dorisTargetTable}
-                execute rewrite_data_files(
-                    "target-file-size-bytes" = "10485760",
-                    "min-input-files" = "1"
-                )
-            """)
-            assertTrue(dorisRewriteResult.size() > 0,
-                    "Doris rewrite_data_files should return summary rows")
-
-            check_sqls_result_equal """
-                select *
-                from ${dorisTargetTable}
-                order by id
-            """, """
-                select *
-                from ${sparkReferenceTable}
-                order by id
-            """
-
-            upgradeV3DorisOperationInsert(tableNameForFormat("v2v3_doris_upd_case1", format))
-            upgradeV3DorisOperationDelete(tableNameForFormat("v2v3_doris_upd_case2", format))
-            upgradeV3DorisOperationUpdate(tableNameForFormat("v2v3_doris_upd_case3", format))
-            upgradeV3DorisOperationRewrite(tableNameForFormat("v2v3_doris_upd_case4", format))
-        }
-
-    } finally {
-        sql """drop catalog if exists ${catalogName}"""
-    }
-}
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_query_insert.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_query_insert.groovy
deleted file mode 100644
index 7276fadba76b2c..00000000000000
--- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_query_insert.groovy
+++ /dev/null
@@ -1,304 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_iceberg_v3_row_lineage_query_insert", "p0,external,iceberg,external_docker,external_docker_iceberg") {
-    String enabled = context.config.otherConfigs.get("enableIcebergTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("Iceberg test is disabled")
-        return
-    }
-
-    String catalogName = "test_iceberg_v3_row_lineage_query_insert"
-    String dbName = "test_row_lineage_query_insert_db"
-    String restPort = context.config.otherConfigs.get("iceberg_rest_uri_port")
-    String minioPort = context.config.otherConfigs.get("iceberg_minio_port")
-    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-    String endpoint = "http://${externalEnvIp}:${minioPort}"
-
-    def formats = ["parquet", "orc"]
-
-    def collectDescColumns = { rows ->
-        return rows.collect { row -> row[0].toString().toLowerCase() }
-    }
-
-    def schemaContainsField = { schemaRows, fieldName ->
-        String target = fieldName.toLowerCase()
-        return schemaRows.any { row -> row.toString().toLowerCase().contains(target) }
-    }
-
-    def fileSchemaRows = { filePath, format ->
-        return sql("""
-            desc function s3(
-                "uri" = "${filePath}",
-                "format" = "${format}",
-                "s3.access_key" = "admin",
-                "s3.secret_key" = "password",
-                "s3.endpoint" = "${endpoint}",
-                "s3.region" = "us-east-1"
-            )
-        """)
-    }
-
-    def assertCurrentFilesDoNotContainRowLineageColumns = { tableName, format, messagePrefix ->
-        def files = sql("""select file_path, lower(file_format) from ${tableName}\$files order by file_path""")
-        log.info("${messagePrefix}: checking ${files.size()} current data files for ${tableName}: ${files}")
-        assertTrue(files.size() > 0, "Current data files should exist for ${tableName}")
-        files.each { row ->
-            assertEquals(format, row[1].toString())
-            assertTrue(row[0].toString().endsWith(format == "parquet" ? ".parquet" : ".orc"),
-                    "${messagePrefix} should write ${format} files for ${tableName}, file=${row[0]}")
-            def schemaRows = fileSchemaRows(row[0].toString(), format)
-            log.info("${messagePrefix}: ${format} schema for ${tableName}, file=${row[0]} -> ${schemaRows}")
-            assertTrue(!schemaContainsField(schemaRows, "_row_id"),
-                    "${messagePrefix} should not physically write _row_id, schema=${schemaRows}")
-            assertTrue(!schemaContainsField(schemaRows, "_last_updated_sequence_number"),
-                    "${messagePrefix} should not physically write _last_updated_sequence_number, schema=${schemaRows}")
-        }
-    }
-
-    def assertRowLineageHiddenColumns = { tableName, visibleColumnCount ->
-        sql("""set show_hidden_columns = false""")
-        def descDefault = sql("""desc ${tableName}""")
-        def defaultColumns = collectDescColumns(descDefault)
-        log.info("Checking hidden-column default visibility for ${tableName}: desc=${descDefault}")
-        assertTrue(!defaultColumns.contains("_row_id"),
-                "DESC default should hide _row_id for ${tableName}, got ${defaultColumns}")
-        assertTrue(!defaultColumns.contains("_last_updated_sequence_number"),
-                "DESC default should hide _last_updated_sequence_number for ${tableName}, got ${defaultColumns}")
-
-        def selectVisible = sql("""select * from ${tableName} order by id""")
-        log.info("Checking visible SELECT * layout for ${tableName}: rowCount=${selectVisible.size()}, firstRow=${selectVisible ? selectVisible[0] : 'EMPTY'}")
-        assertTrue(selectVisible.size() > 0, "SELECT * should return rows for ${tableName}")
-        assertEquals(visibleColumnCount, selectVisible[0].size())
-
-        sql("""set show_hidden_columns = true""")
-        def descHidden = sql("""desc ${tableName}""")
-        def hiddenColumns = collectDescColumns(descHidden)
-        log.info("Checking hidden-column enabled visibility for ${tableName}: desc=${descHidden}")
-        assertTrue(hiddenColumns.contains("_row_id"),
-                "DESC with show_hidden_columns=true should expose _row_id for ${tableName}, got ${hiddenColumns}")
-        assertTrue(hiddenColumns.contains("_last_updated_sequence_number"),
-                "DESC with show_hidden_columns=true should expose _last_updated_sequence_number for ${tableName}, got ${hiddenColumns}")
-
-        def selectHidden = sql("""select * from ${tableName} order by id""")
-        log.info("Checking hidden SELECT * layout for ${tableName}: rowCount=${selectHidden.size()}, firstRow=${selectHidden ? selectHidden[0] : 'EMPTY'}")
-        assertTrue(selectHidden.size() > 0, "SELECT * with hidden columns should return rows for ${tableName}")
-        assertEquals(visibleColumnCount + 2 + 1, selectHidden[0].size()) // _row_id + _last_updated_sequence_number + __DORIS_ICEBERG_ROWID_COL__
-
-        sql("""set show_hidden_columns = false""")
-    }
-
-    def assertExplicitRowLineageReadable = { tableName, expectedIds ->
-        def rowLineageRows = sql("""
-            select id, _row_id, _last_updated_sequence_number
-            from ${tableName}
-            order by id
-        """)
-        log.info("Checking explicit row lineage projection for ${tableName}: rows=${rowLineageRows}")
-        assertEquals(expectedIds.size(), rowLineageRows.size())
-        for (int i = 0; i < expectedIds.size(); i++) {
-            assertEquals(expectedIds[i], rowLineageRows[i][0].toString().toInteger())
-            assertTrue(rowLineageRows[i][1] != null,
-                    "_row_id should be non-null for ${tableName}, row=${rowLineageRows[i]}")
-            assertTrue(rowLineageRows[i][2] != null,
-                    "_last_updated_sequence_number should be non-null for ${tableName}, row=${rowLineageRows[i]}")
-        }
-
-        long firstRowId = rowLineageRows[0][1].toString().toLong()
-        long secondRowId = rowLineageRows[1][1].toString().toLong()
-        assertTrue(firstRowId < secondRowId,
-                "Row lineage ids should increase with row position for ${tableName}, rows=${rowLineageRows}")
-
-        def byRowId = sql("""select id from ${tableName} where _row_id = ${firstRowId} order by id""")
-        log.info("Checking single _row_id predicate for ${tableName}: rowId=${firstRowId}, result=${byRowId}")
-        assertEquals(1, byRowId.size())
-        assertEquals(expectedIds[0], byRowId[0][0].toString().toInteger())
-
-        def combinedPredicate = sql("""
-            select id
-            from ${tableName}
-            where id >= ${expectedIds[1]} and _row_id in (${rowLineageRows[1][1]}, ${rowLineageRows[2][1]})
-            order by id
-        """)
-        log.info("Checking combined business + _row_id predicate for ${tableName}: result=${combinedPredicate}")
-        assertEquals(2, combinedPredicate.size())
-        assertEquals(expectedIds[1], combinedPredicate[0][0].toString().toInteger())
-        assertEquals(expectedIds[2], combinedPredicate[1][0].toString().toInteger())
-    }
-
-    sql """drop catalog if exists ${catalogName}"""
-    sql """
-        create catalog if not exists ${catalogName} properties (
-            "type" = "iceberg",
-            "iceberg.catalog.type" = "rest",
-            "uri" = "http://${externalEnvIp}:${restPort}",
-            "s3.access_key" = "admin",
-            "s3.secret_key" = "password",
-            "s3.endpoint" = "${endpoint}",
-            "s3.region" = "us-east-1"
-        )
-    """
-
-    sql """switch ${catalogName}"""
-    sql """create database if not exists ${dbName}"""
-    sql """use ${dbName}"""
-    sql """set enable_fallback_to_original_planner = false"""
-    sql """set show_hidden_columns = false"""
-
-    try {
-        formats.each { format ->
-            String unpartitionedTable = "test_row_lineage_query_insert_unpartitioned_${format}"
-            String partitionedTable = "test_row_lineage_query_insert_partitioned_${format}"
-            log.info("Run row lineage query/insert test with format ${format}")
-
-            try {
-                sql """drop table if exists ${unpartitionedTable}"""
-                sql """
-                    create table ${unpartitionedTable} (
-                        id int,
-                        name string,
-                        age int
-                    ) engine=iceberg
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """
-                    insert into ${unpartitionedTable} values(1, 'Alice', 25);
-                """
-                sql """ insert into ${unpartitionedTable} values(2, 'Bob', 30) """
-                sql """ insert into ${unpartitionedTable} values(3, 'Charlie', 35) """
-
-                log.info("Inserted initial rows into ${unpartitionedTable}")
-
-                // Assert baseline:
-                // 1. DESC and SELECT * hide row lineage columns by default.
-                // 2. show_hidden_columns=true exposes both hidden columns in DESC and SELECT *.
-                // 3. Explicit SELECT on row lineage columns returns non-null values.
-                assertRowLineageHiddenColumns(unpartitionedTable, 3)
-                assertExplicitRowLineageReadable(unpartitionedTable, [1, 2, 3])
-
-                test {
-                    sql """insert into ${unpartitionedTable}(_row_id, id, name, age) values (1, 9, 'BadRow', 99)"""
-                    exception "Cannot specify row lineage column '_row_id' in INSERT statement"
-                }
-
-                test {
-                    sql """
-                        insert into ${unpartitionedTable}(_last_updated_sequence_number, id, name, age)
-                        values (1, 10, 'BadSeq', 100)
-                    """
-                    exception "Cannot specify row lineage column '_last_updated_sequence_number' in INSERT statement"
-                }
-
-                sql """insert into ${unpartitionedTable}(id, name, age) values (4, 'Doris', 40)"""
-                def unpartitionedCount = sql """select count(*) from ${unpartitionedTable}"""
-                log.info("Checking row count after regular INSERT for ${unpartitionedTable}: result=${unpartitionedCount}")
-                assertEquals(4, unpartitionedCount[0][0].toString().toInteger())
-
-                assertCurrentFilesDoNotContainRowLineageColumns(
-                        unpartitionedTable,
-                        format,
-                        "Unpartitioned normal INSERT")
-
-                sql """drop table if exists ${partitionedTable}"""
-                sql """
-                    create table ${partitionedTable} (
-                        id int,
-                        name string,
-                        age int,
-                        dt date
-                    ) engine=iceberg
-                    partition by list (day(dt)) ()
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """ insert into ${partitionedTable} values(11, 'Penny', 21, '2024-01-01')"""
-                sql """ insert into ${partitionedTable} values(12, 'Quinn', 22, '2024-01-02')"""
-                sql """ insert into ${partitionedTable} values(13, 'Rita', 23, '2024-01-03')"""        
-                
-                log.info("Inserted initial rows into ${partitionedTable}")
-
-                // Assert baseline:
-                // 1. Partitioned tables follow the same row lineage semantics as unpartitioned tables.
-                // 2. Explicit SELECT on _row_id remains readable under partition predicates.
-                // 3. Regular INSERT still rejects hidden columns and does not write them physically.
-                assertRowLineageHiddenColumns(partitionedTable, 4)
-
-                def partitionLineageRows = sql """
-                    select id, _row_id, _last_updated_sequence_number
-                    from ${partitionedTable}
-                    where dt >= '2024-01-01'
-                    order by id
-                """
-                log.info("Checking partitioned row lineage projection for ${partitionedTable}: rows=${partitionLineageRows}")
-                assertEquals(3, partitionLineageRows.size())
-                partitionLineageRows.each { row ->
-                    assertTrue(row[1] != null, "_row_id should be non-null for partitioned table row=${row}")
-                    assertTrue(row[2] != null, "_last_updated_sequence_number should be non-null for partitioned table row=${row}")
-                }
-
-                def exactPartitionPredicate = sql """
-                    select id
-                    from ${partitionedTable}
-                    where dt = '2024-01-02' and _row_id = ${partitionLineageRows[1][1]}
-                """
-                log.info("Checking exact partition + _row_id predicate for ${partitionedTable}: result=${exactPartitionPredicate}")
-                assertEquals(1, exactPartitionPredicate.size())
-                assertEquals(12, exactPartitionPredicate[0][0].toString().toInteger())
-
-                test {
-                    sql """
-                        insert into ${partitionedTable}(_row_id, id, name, age, dt)
-                        values (1, 14, 'BadPartitionRow', 24, '2024-01-04')
-                    """
-                    exception "Cannot specify row lineage column '_row_id' in INSERT statement"
-                }
-
-                test {
-                    sql """
-                        insert into ${partitionedTable}(_last_updated_sequence_number, id, name, age, dt)
-                        values (1, 15, 'BadPartitionSeq', 25, '2024-01-05')
-                    """
-                    exception "Cannot specify row lineage column '_last_updated_sequence_number' in INSERT statement"
-                }
-
-                sql """insert into ${partitionedTable}(id, name, age, dt) values (14, 'Sara', 24, '2024-01-04')"""
-                def partitionedCount = sql """select count(*) from ${partitionedTable}"""
-                log.info("Checking row count after regular INSERT for ${partitionedTable}: result=${partitionedCount}")
-                assertEquals(4, partitionedCount[0][0].toString().toInteger())
-
-                assertCurrentFilesDoNotContainRowLineageColumns(
-                        partitionedTable,
-                        format,
-                        "Partitioned normal INSERT")
-            } finally {
-                sql """drop table if exists ${partitionedTable}"""
-                sql """drop table if exists ${unpartitionedTable}"""
-            }
-        }
-    } finally {
-        sql """set show_hidden_columns = false"""
-        sql """drop database if exists ${dbName} force"""
-        sql """drop catalog if exists ${catalogName}"""
-    }
-}
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_update_delete_merge.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_update_delete_merge.groovy
deleted file mode 100644
index 4bce7387f864bb..00000000000000
--- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_v3_row_lineage_update_delete_merge.groovy
+++ /dev/null
@@ -1,292 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_iceberg_v3_row_lineage_update_delete_merge", "p0,external,iceberg,external_docker,external_docker_iceberg") {
-    String enabled = context.config.otherConfigs.get("enableIcebergTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("Iceberg test is disabled")
-        return
-    }
-
-    String catalogName = "test_iceberg_v3_row_lineage_update_delete_merge"
-    String dbName = "test_row_lineage_update_delete_merge_db"
-    String restPort = context.config.otherConfigs.get("iceberg_rest_uri_port")
-    String minioPort = context.config.otherConfigs.get("iceberg_minio_port")
-    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-    String endpoint = "http://${externalEnvIp}:${minioPort}"
-
-    def formats = ["parquet", "orc"]
-
-    def schemaContainsField = { schemaRows, fieldName ->
-        String target = fieldName.toLowerCase()
-        return schemaRows.any { row -> row.toString().toLowerCase().contains(target) }
-    }
-
-    def fileSchemaRows = { filePath, format ->
-        return sql("""
-            desc function s3(
-                "uri" = "${filePath}",
-                "format" = "${format}",
-                "s3.access_key" = "admin",
-                "s3.secret_key" = "password",
-                "s3.endpoint" = "${endpoint}",
-                "s3.region" = "us-east-1"
-            )
-        """)
-    }
-
-    def assertDeleteFilesArePuffin = { tableName ->
-        def deleteFiles = sql("""
-            select file_path, lower(file_format)
-            from ${tableName}\$delete_files
-            order by file_path
-        """)
-        log.info("Checking delete files for ${tableName}: ${deleteFiles}")
-        assertTrue(deleteFiles.size() > 0, "V3 table ${tableName} should produce delete files")
-        deleteFiles.each { row ->
-            assertTrue(row[0].toString().endsWith(".puffin"),
-                    "V3 delete file should be Puffin: ${row}")
-            assertEquals("puffin", row[1].toString())
-        }
-    }
-
-    def assertAtLeastOneCurrentDataFileHasRowLineageColumns = { tableName, format ->
-        def currentFiles = sql("""select file_path, lower(file_format) from ${tableName}\$data_files order by file_path""")
-        log.info("Checking current data files for physical row lineage columns in ${tableName}: ${currentFiles}")
-        assertTrue(currentFiles.size() > 0, "Current data files should exist for ${tableName}")
-
-        boolean found = false
-        currentFiles.each { row ->
-            assertEquals(format, row[1].toString())
-            assertTrue(row[0].toString().endsWith(format == "parquet" ? ".parquet" : ".orc"),
-                    "Current data file should match ${format} for ${tableName}, file=${row[0]}")
-            def schemaRows = fileSchemaRows(row[0].toString(), format)
-            log.info("${format} schema for ${tableName}, file=${row[0]} -> ${schemaRows}")
-            if (schemaContainsField(schemaRows, "_row_id")
-                    && schemaContainsField(schemaRows, "_last_updated_sequence_number")) {
-                found = true
-            }
-        }
-        assertTrue(found, "At least one current data file should physically contain row lineage columns for ${tableName}")
-    }
-
-    def assertExplicitRowLineageNonNull = { tableName, expectedRowCount ->
-        def rows = sql("""
-            select id, _row_id, _last_updated_sequence_number
-            from ${tableName}
-            order by id
-        """)
-        log.info("Checking explicit row lineage projection for ${tableName}: rows=${rows}")
-        assertEquals(expectedRowCount, rows.size())
-        rows.each { row ->
-            assertTrue(row[1] != null, "_row_id should be non-null for ${tableName}, row=${row}")
-            assertTrue(row[2] != null, "_last_updated_sequence_number should be non-null for ${tableName}, row=${row}")
-        }
-    }
-
-    def lineageMap = { tableName ->
-        def rows = sql("""
-            select id, _row_id, _last_updated_sequence_number
-            from ${tableName}
-            order by id
-        """)
-        Map<Integer, List<String>> result = [:]
-        rows.each { row ->
-            result[row[0].toString().toInteger()] = [row[1].toString(), row[2].toString()]
-        }
-        log.info("Built lineage map for ${tableName}: ${result}")
-        return result
-    }
-
-    sql """drop catalog if exists ${catalogName}"""
-    sql """
-        create catalog if not exists ${catalogName} properties (
-            "type" = "iceberg",
-            "iceberg.catalog.type" = "rest",
-            "uri" = "http://${externalEnvIp}:${restPort}",
-            "s3.access_key" = "admin",
-            "s3.secret_key" = "password",
-            "s3.endpoint" = "${endpoint}",
-            "s3.region" = "us-east-1"
-        )
-    """
-
-    sql """switch ${catalogName}"""
-    sql """create database if not exists ${dbName}"""
-    sql """use ${dbName}"""
-    sql """set enable_fallback_to_original_planner = false"""
-    sql """set show_hidden_columns = false"""
-
-    try {
-        formats.each { format ->
-            String updateDeleteTable = "test_row_lineage_v3_update_delete_${format}"
-            String mergeTable = "test_row_lineage_v3_merge_${format}"
-            log.info("Run row lineage update/delete/merge test with format ${format}")
-
-            try {
-                sql """drop table if exists ${updateDeleteTable}"""
-                sql """
-                    create table ${updateDeleteTable} (
-                        id int,
-                        name string,
-                        age int
-                    ) engine=iceberg
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """insert into ${updateDeleteTable} values (1, 'Alice', 25) """ 
-                sql """insert into ${updateDeleteTable} values (2, 'Bob', 30) """ 
-                sql """insert into ${updateDeleteTable} values (3, 'Charlie', 35)""" 
-
-                def updateDeleteLineageBefore = lineageMap(updateDeleteTable)
-                log.info("Lineage before UPDATE/DELETE on ${updateDeleteTable}: ${updateDeleteLineageBefore}")
-                sql """update ${updateDeleteTable} set name = 'Alice_u', age = 26 where id = 1"""
-                sql """delete from ${updateDeleteTable} where id = 2"""
-
-                // Assert baseline:
-                // 1. UPDATE keeps rows readable and applies the new values.
-                // 2. DELETE removes the target row.
-                // 3. V3 delete files use Puffin deletion vectors instead of delete_pos parquet/orc files.
-                // 4. Explicit row lineage reads remain non-null after DML.
-                def updateDeleteRows = sql """select * from ${updateDeleteTable} order by id"""
-                log.info("Checking table rows after UPDATE/DELETE on ${updateDeleteTable}: ${updateDeleteRows}")
-                assertEquals(2, updateDeleteRows.size())
-                assertEquals(1, updateDeleteRows[0][0].toString().toInteger())
-                assertEquals("Alice_u", updateDeleteRows[0][1])
-                assertEquals(26, updateDeleteRows[0][2].toString().toInteger())
-                assertEquals(3, updateDeleteRows[1][0].toString().toInteger())
-                assertEquals("Charlie", updateDeleteRows[1][1])
-                assertEquals(35, updateDeleteRows[1][2].toString().toInteger())
-
-                assertExplicitRowLineageNonNull(updateDeleteTable, 2)
-                def updateDeleteLineageAfter = lineageMap(updateDeleteTable)
-                log.info("Lineage after UPDATE/DELETE on ${updateDeleteTable}: ${updateDeleteLineageAfter}")
-                assertEquals(updateDeleteLineageBefore[1][0], updateDeleteLineageAfter[1][0])
-                assertTrue(updateDeleteLineageBefore[1][1] != updateDeleteLineageAfter[1][1],
-                        "UPDATE should change _last_updated_sequence_number for id=1")
-                assertTrue(updateDeleteLineageAfter[1][1].toLong() > updateDeleteLineageBefore[1][1].toLong(),
-                        "UPDATE should advance _last_updated_sequence_number for id=1")
-                assertEquals(updateDeleteLineageBefore[3][0], updateDeleteLineageAfter[3][0])
-                assertEquals(updateDeleteLineageBefore[3][1], updateDeleteLineageAfter[3][1])
-                assertTrue(!updateDeleteLineageAfter.containsKey(2), "Deleted row id=2 should not remain after DELETE")
-                assertDeleteFilesArePuffin(updateDeleteTable)
-                assertAtLeastOneCurrentDataFileHasRowLineageColumns(updateDeleteTable, format)
-
-                def minRowIdAfterUpdate = sql """
-                    select min(_row_id)
-                    from ${updateDeleteTable}
-                """
-                def rowIdFilterResult = sql """
-                    select count(*)
-                    from ${updateDeleteTable}
-                    where _row_id = ${minRowIdAfterUpdate[0][0]}
-                """
-                log.info("Checking _row_id filter after UPDATE/DELETE on ${updateDeleteTable}: minRowId=${minRowIdAfterUpdate}, result=${rowIdFilterResult}")
-                assertEquals(1, rowIdFilterResult[0][0].toString().toInteger())
-
-                sql """drop table if exists ${mergeTable}"""
-                sql """
-                    create table ${mergeTable} (
-                        id int,
-                        name string,
-                        age int,
-                        dt date
-                    ) engine=iceberg
-                    partition by list (day(dt)) ()
-                    properties (
-                        "format-version" = "3",
-                        "write.format.default" = "${format}"
-                    )
-                """
-
-                sql """ insert into ${mergeTable} values (1, 'Penny', 21, '2024-01-01') """
-                sql """ insert into ${mergeTable} values (2, 'Quinn', 22, '2024-01-02') """
-                sql """ insert into ${mergeTable} values (3, 'Rita', 23, '2024-01-03') """
-
-                def mergeLineageBefore = lineageMap(mergeTable)
-                log.info("Lineage before MERGE on ${mergeTable}: ${mergeLineageBefore}")
-                sql """
-                    merge into ${mergeTable} t
-                    using (
-                        select 1 as id, 'Penny_u' as name, 31 as age, date '2024-01-01' as dt, 'U' as flag
-                        union all
-                        select 2, 'Quinn', 22, date '2024-01-02', 'D'
-                        union all
-                        select 4, 'Sara', 24, date '2024-01-04', 'I'
-                    ) s
-                    on t.id = s.id
-                    when matched and s.flag = 'D' then delete
-                    when matched then update set
-                        name = s.name,
-                        age = s.age
-                    when not matched then insert (id, name, age, dt)
-                    values (s.id, s.name, s.age, s.dt)
-                """
-
-                // Assert baseline:
-                // 1. MERGE applies DELETE, UPDATE, and INSERT actions in one statement.
-                // 2. The partitioned MERGE still writes Puffin deletion vectors.
-                // 3. At least one current data file written by MERGE contains physical row lineage columns.
-                def mergeRows = sql """select * from ${mergeTable} order by id"""
-                log.info("Checking table rows after MERGE on ${mergeTable}: ${mergeRows}")
-                assertEquals(3, mergeRows.size())
-                assertEquals(1, mergeRows[0][0].toString().toInteger())
-                assertEquals("Penny_u", mergeRows[0][1])
-                assertEquals(31, mergeRows[0][2].toString().toInteger())
-                assertEquals(3, mergeRows[1][0].toString().toInteger())
-                assertEquals("Rita", mergeRows[1][1])
-                assertEquals(23, mergeRows[1][2].toString().toInteger())
-                assertEquals(4, mergeRows[2][0].toString().toInteger())
-                assertEquals("Sara", mergeRows[2][1])
-                assertEquals(24, mergeRows[2][2].toString().toInteger())
-
-                assertExplicitRowLineageNonNull(mergeTable, 3)
-                def mergeLineageAfter = lineageMap(mergeTable)
-                log.info("Lineage after MERGE on ${mergeTable}: ${mergeLineageAfter}")
-                assertEquals(mergeLineageBefore[1][0], mergeLineageAfter[1][0])
-                assertTrue(mergeLineageBefore[1][1] != mergeLineageAfter[1][1],
-                        "MERGE UPDATE should change _last_updated_sequence_number for id=1")
-                assertTrue(mergeLineageAfter[1][1].toLong() > mergeLineageBefore[1][1].toLong(),
-                        "MERGE UPDATE should advance _last_updated_sequence_number for id=1")
-                assertEquals(mergeLineageBefore[3][0], mergeLineageAfter[3][0])
-                assertEquals(mergeLineageBefore[3][1], mergeLineageAfter[3][1])
-                assertTrue(!mergeLineageAfter.containsKey(2), "MERGE DELETE should remove id=2")
-                assertDeleteFilesArePuffin(mergeTable)
-                assertAtLeastOneCurrentDataFileHasRowLineageColumns(mergeTable, format)
-
-                def insertedRowLineage = sql """
-                    select _row_id, _last_updated_sequence_number
-                    from ${mergeTable}
-                    where id = 4
-                """
-                log.info("Checking inserted MERGE row lineage for ${mergeTable}: ${insertedRowLineage}")
-                assertEquals(1, insertedRowLineage.size())
-                assertTrue(insertedRowLineage[0][0] != null, "Inserted MERGE row should get generated _row_id")
-                assertTrue(insertedRowLineage[0][1] != null, "Inserted MERGE row should get generated _last_updated_sequence_number")
-            } finally {
-                sql """drop table if exists ${mergeTable}"""
-                sql """drop table if exists ${updateDeleteTable}"""
-            }
-        }
-    } finally {
-        sql """drop database if exists ${dbName} force"""
-        sql """drop catalog if exists ${catalogName}"""
-    }
-}