diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 597fe92b4e3a15..9ad09b317f1190 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -1280,6 +1280,13 @@ Status CloudTablet::calc_delete_bitmap_for_compaction(
     calc_compaction_output_rowset_delete_bitmap(
             input_rowsets, rowid_conversion, 0, version.second + 1, missed_rows.get(),
             location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get());
+    // In cluster-key MOW compaction, rows are sorted by cluster key, so duplicate unique keys
+    // may be non-adjacent in merge order. Scan the output primary key index to delete older
+    // duplicate rows inside the output rowset.
+    if (!tablet_schema()->cluster_key_uids().empty()) {
+        RETURN_IF_ERROR(calc_compaction_output_rowset_internal_delete_bitmap(
+                input_rowsets, output_rowset, rowid_conversion, output_rowset_delete_bitmap.get()));
+    }
     if (missed_rows) {
         missed_rows_size = missed_rows->size();
         if (!allow_delete_in_cumu_compaction) {
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c84f91b89ab9cf..9746877d86ef85 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -431,6 +431,8 @@ DEFINE_mInt32(pk_index_page_cache_stale_sweep_time_sec, "600");
 DEFINE_mBool(enable_low_cardinality_optimize, "true");
 DEFINE_Bool(enable_low_cardinality_cache_code, "true");
 
+DEFINE_mBool(enable_adaptive_batch_size, "true");
+
 // be policy
 // whether check compaction checksum
 DEFINE_mBool(enable_compaction_checksum, "false");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index be4d280df1ac66..c9e7acaefb4705 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -498,6 +498,11 @@ DECLARE_mInt32(pk_index_page_cache_stale_sweep_time_sec);
 DECLARE_mBool(enable_low_cardinality_optimize);
 DECLARE_Bool(enable_low_cardinality_cache_code);
 
+// Adaptive batch size: dynamically adjust SegmentIterator chunk row count using EWMA
+// so that each output block stays close to preferred_block_size_bytes.
+// When false, the fixed batch_size row behaviour is preserved.
+DECLARE_mBool(enable_adaptive_batch_size);
+
 // be policy
 // whether check compaction checksum
 DECLARE_mBool(enable_compaction_checksum);
diff --git a/be/src/core/block/block.h b/be/src/core/block/block.h
index 43d55164750d3c..affc89392603d6 100644
--- a/be/src/core/block/block.h
+++ b/be/src/core/block/block.h
@@ -186,12 +186,17 @@ class Block {
 
     Status check_type_and_column() const;
 
-    /// Approximate number of bytes in memory - for profiling and limits.
+    /// Approximate number of bytes used by column data in memory.
+    /// This reflects the actual data footprint (e.g. string contents, numeric arrays)
+    /// and is the metric used by adaptive batch size byte budgets.
     size_t bytes() const;
 
+    /// Returns per-column byte sizes as a comma-separated string (for debugging).
     std::string columns_bytes() const;
 
-    /// Approximate number of allocated bytes in memory - for profiling and limits.
+    /// Approximate number of allocated (reserved) bytes in memory.
+    /// This may be larger than bytes() due to pre-allocated capacity in vectors/arenas.
+    /// Used for memory tracking and profiling.
     MOCK_FUNCTION size_t allocated_bytes() const;
 
     /** Get a list of column names separated by commas. */
@@ -355,6 +360,17 @@ class Block {
     void clear_column_mem_not_keep(const std::vector<bool>& column_keep_flags,
                                    bool need_keep_first);
 
+    // Helper: sum byte_size() of all mutable columns.
+    // Unlike Block::bytes() which operates on immutable ColumnPtr,
+    // this works on MutableColumns during block construction (e.g. in BlockReader).
+    static inline size_t columns_byte_size(const MutableColumns& cols) {
+        size_t total = 0;
+        for (const auto& col : cols) {
+            total += col->byte_size();
+        }
+        return total;
+    }
+
 private:
     void erase_impl(size_t position);
 };
diff --git a/be/src/exec/operator/mock_scan_operator.h b/be/src/exec/operator/mock_scan_operator.h
index 1022a5c44fb694..8800dc97860028 100644
--- a/be/src/exec/operator/mock_scan_operator.h
+++ b/be/src/exec/operator/mock_scan_operator.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <list>
+
 #include "exec/operator/scan_operator.h"
 
 #ifdef BE_TEST
@@ -80,6 +82,29 @@ class MockScanOperatorX final : public ScanOperatorX<MockScanLocalState> {
 public:
     friend class OlapScanLocalState;
     MockScanOperatorX() = default;
+
+    void set_output_block(Block block) {
+        _output_blocks.clear();
+        _output_blocks.push_back(std::move(block));
+    }
+
+    Status get_block(RuntimeState* state, Block* block, bool* eos) override {
+        if (_output_blocks.empty()) {
+            *eos = true;
+            return Status::OK();
+        }
+
+        *eos = false;
+        block->swap(_output_blocks.front());
+        _output_blocks.pop_front();
+        if (_output_blocks.empty()) {
+            *eos = true;
+        }
+        return Status::OK();
+    }
+
+private:
+    std::list<Block> _output_blocks;
 };
 } // namespace doris
-#endif
\ No newline at end of file
+#endif
diff --git a/be/src/exec/operator/olap_scan_operator.cpp b/be/src/exec/operator/olap_scan_operator.cpp
index 669694df2814f4..12aabf2d457916 100644
--- a/be/src/exec/operator/olap_scan_operator.cpp
+++ b/be/src/exec/operator/olap_scan_operator.cpp
@@ -394,6 +394,11 @@ Status OlapScanLocalState::_init_profile() {
             ADD_COUNTER(_segment_profile, "ConditionCacheSegmentHit", TUnit::UNIT);
     _condition_cache_filtered_rows_counter =
             ADD_COUNTER(_segment_profile, "ConditionCacheFilteredRows", TUnit::UNIT);
+    _adaptive_batch_predict_min_rows_counter =
+            ADD_COUNTER(_segment_profile, "AdaptiveBatchPredictMinRows", TUnit::UNIT);
+    _adaptive_batch_predict_max_rows_counter =
+            ADD_COUNTER(_segment_profile, "AdaptiveBatchPredictMaxRows", TUnit::UNIT);
+
     return Status::OK();
 }
 
diff --git a/be/src/exec/operator/olap_scan_operator.h b/be/src/exec/operator/olap_scan_operator.h
index 3a27db78885b33..5bf32f7b8708f6 100644
--- a/be/src/exec/operator/olap_scan_operator.h
+++ b/be/src/exec/operator/olap_scan_operator.h
@@ -315,6 +315,9 @@ class OlapScanLocalState final : public ScanLocalState<OlapScanLocalState> {
     // Variant subtree: times selecting doc snapshot all iterator (merge doc snapshot into root)
     RuntimeProfile::Counter* _variant_doc_value_column_iter_count = nullptr;
 
+    RuntimeProfile::Counter* _adaptive_batch_predict_min_rows_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_predict_max_rows_counter = nullptr;
+
     std::vector<TabletWithVersion> _tablets;
     std::vector<TabletReadSource> _read_sources;
 
diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp
index 27d8acf859aa80..3b330550faf02f 100644
--- a/be/src/exec/operator/operator.cpp
+++ b/be/src/exec/operator/operator.cpp
@@ -385,10 +385,7 @@ Status OperatorXBase::get_block_after_projects(RuntimeState* state, Block* block
     auto* local_state = state->get_local_state(operator_id());
     Defer defer([&]() {
         if (status.ok()) {
-            if (auto rows = block->rows()) {
-                COUNTER_UPDATE(local_state->_rows_returned_counter, rows);
-                COUNTER_UPDATE(local_state->_blocks_returned_counter, 1);
-            }
+            local_state->update_output_block_counters(*block);
         }
     });
     if (_output_row_descriptor) {
@@ -505,7 +502,11 @@ PipelineXSinkLocalStateBase::PipelineXSinkLocalStateBase(DataSinkOperatorXBase*
         : _parent(parent), _state(state) {}
 
 PipelineXLocalStateBase::PipelineXLocalStateBase(RuntimeState* state, OperatorXBase* parent)
-        : _num_rows_returned(0), _rows_returned_counter(nullptr), _parent(parent), _state(state) {}
+        : _num_rows_returned(0),
+          _rows_returned_counter(nullptr),
+          _parent(parent),
+          _state(state),
+          _budget(state->batch_size(), state->preferred_block_size_bytes()) {}
 
 template <typename SharedStateArg>
 Status PipelineXLocalState<SharedStateArg>::init(RuntimeState* state, LocalStateInfo& info) {
@@ -559,6 +560,12 @@ Status PipelineXLocalState<SharedStateArg>::init(RuntimeState* state, LocalState
     _open_timer = ADD_TIMER_WITH_LEVEL(_common_profile, "OpenTime", 2);
     _close_timer = ADD_TIMER_WITH_LEVEL(_common_profile, "CloseTime", 2);
     _exec_timer = ADD_TIMER_WITH_LEVEL(_common_profile, "ExecTime", 1);
+    _output_block_bytes_counter =
+            ADD_COUNTER_WITH_LEVEL(_common_profile, "OutputBlockBytes", TUnit::BYTES, 1);
+    _max_output_block_bytes_counter =
+            ADD_COUNTER_WITH_LEVEL(_common_profile, "MaxOutputBlockBytes", TUnit::BYTES, 1);
+    _min_output_block_bytes_counter =
+            ADD_COUNTER_WITH_LEVEL(_common_profile, "MinOutputBlockBytes", TUnit::BYTES, 1);
     _memory_used_counter =
             _common_profile->AddHighWaterMarkCounter("MemoryUsage", TUnit::BYTES, "", 1);
     _common_profile->add_info_string("IsColocate",
diff --git a/be/src/exec/operator/operator.h b/be/src/exec/operator/operator.h
index 2f403d275fd758..25ae1477f8abff 100644
--- a/be/src/exec/operator/operator.h
+++ b/be/src/exec/operator/operator.h
@@ -23,6 +23,7 @@
 #include <atomic>
 #include <cstdint>
 #include <functional>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
@@ -43,6 +44,7 @@
 #include "runtime/runtime_profile.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
+#include "util/block_budget.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
@@ -245,11 +247,28 @@ class PipelineXLocalStateBase {
     RuntimeProfile::Counter* memory_used_counter() { return _memory_used_counter; }
     OperatorXBase* parent() { return _parent; }
     RuntimeState* state() { return _state; }
+    [[nodiscard]] const BlockBudget& block_budget() const { return _budget; }
     VExprContextSPtrs& conjuncts() { return _conjuncts; }
     VExprContextSPtrs& projections() { return _projections; }
     [[nodiscard]] int64_t num_rows_returned() const { return _num_rows_returned; }
     void add_num_rows_returned(int64_t delta) { _num_rows_returned += delta; }
     void set_num_rows_returned(int64_t value) { _num_rows_returned = value; }
+    void update_output_block_counters(const Block& block) {
+        if (auto rows = block.rows()) {
+            COUNTER_UPDATE(_rows_returned_counter, rows);
+            COUNTER_UPDATE(_blocks_returned_counter, 1);
+            auto block_bytes = static_cast<int64_t>(block.bytes());
+            COUNTER_UPDATE(_output_block_bytes_counter, block_bytes);
+            if (block_bytes > _max_output_block_bytes) {
+                _max_output_block_bytes = block_bytes;
+                COUNTER_SET(_max_output_block_bytes_counter, block_bytes);
+            }
+            if (block_bytes < _min_output_block_bytes) {
+                _min_output_block_bytes = block_bytes;
+                COUNTER_SET(_min_output_block_bytes_counter, block_bytes);
+            }
+        }
+    }
 
     [[nodiscard]] virtual std::string debug_string(int indentation_level = 0) const = 0;
     [[nodiscard]] virtual bool is_blockable() const;
@@ -305,6 +324,11 @@ class PipelineXLocalStateBase {
 
     RuntimeProfile::Counter* _rows_returned_counter = nullptr;
     RuntimeProfile::Counter* _blocks_returned_counter = nullptr;
+    RuntimeProfile::Counter* _output_block_bytes_counter = nullptr;
+    RuntimeProfile::Counter* _max_output_block_bytes_counter = nullptr;
+    RuntimeProfile::Counter* _min_output_block_bytes_counter = nullptr;
+    int64_t _max_output_block_bytes = 0;
+    int64_t _min_output_block_bytes = std::numeric_limits<int64_t>::max();
     RuntimeProfile::Counter* _wait_for_dependency_timer = nullptr;
     // Account for current memory and peak memory used by this node
     RuntimeProfile::HighWaterMarkCounter* _memory_used_counter = nullptr;
@@ -316,6 +340,8 @@ class PipelineXLocalStateBase {
 
     OperatorXBase* _parent = nullptr;
     RuntimeState* _state = nullptr;
+    // Execution-scoped row/byte budget derived from the session batch settings.
+    const BlockBudget _budget;
     VExprContextSPtrs _conjuncts;
     VExprContextSPtrs _projections;
     std::shared_ptr<ScoreRuntime> _score_runtime;
diff --git a/be/src/exec/operator/scan_operator.h b/be/src/exec/operator/scan_operator.h
index 635e3c8d593582..d6e2407a8d2fba 100644
--- a/be/src/exec/operator/scan_operator.h
+++ b/be/src/exec/operator/scan_operator.h
@@ -344,11 +344,7 @@ class ScanOperatorX : public OperatorX<LocalStateType> {
     Status get_block_after_projects(RuntimeState* state, Block* block, bool* eos) override {
         Status status = get_block(state, block, eos);
         if (status.ok()) {
-            if (auto rows = block->rows()) {
-                auto* local_state = state->get_local_state(operator_id());
-                COUNTER_UPDATE(local_state->_rows_returned_counter, rows);
-                COUNTER_UPDATE(local_state->_blocks_returned_counter, 1);
-            }
+            state->get_local_state(operator_id())->update_output_block_counters(*block);
         }
         return status;
     }
diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp
index be60af084df586..27385c3eca079e 100644
--- a/be/src/exec/operator/schema_scan_operator.cpp
+++ b/be/src/exec/operator/schema_scan_operator.cpp
@@ -32,6 +32,12 @@ class RuntimeState;
 
 namespace doris {
 
+SchemaScanLocalState::SchemaScanLocalState(RuntimeState* state, OperatorXBase* parent)
+        : PipelineXLocalState<>(state, parent),
+          _data_dependency(std::make_shared<Dependency>(parent->operator_id(), parent->node_id(),
+                                                        parent->get_name() + "_DEPENDENCY", true)) {
+}
+
 Status SchemaScanLocalState::init(RuntimeState* state, LocalStateInfo& info) {
     RETURN_IF_ERROR(PipelineXLocalState<>::init(state, info));
 
@@ -243,7 +249,7 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* e
                 break;
             }
 
-            if (src_block.rows() >= state->batch_size()) {
+            if (local_state.block_budget().exceeded(src_block.rows(), src_block.bytes())) {
                 break;
             }
         }
diff --git a/be/src/exec/operator/schema_scan_operator.h b/be/src/exec/operator/schema_scan_operator.h
index c158ff0b087890..1d8cf22c4a0be0 100644
--- a/be/src/exec/operator/schema_scan_operator.h
+++ b/be/src/exec/operator/schema_scan_operator.h
@@ -37,11 +37,7 @@ class SchemaScanLocalState final : public PipelineXLocalState<> {
 public:
     ENABLE_FACTORY_CREATOR(SchemaScanLocalState);
 
-    SchemaScanLocalState(RuntimeState* state, OperatorXBase* parent)
-            : PipelineXLocalState<>(state, parent) {
-        _data_dependency = std::make_shared<Dependency>(parent->operator_id(), parent->node_id(),
-                                                        parent->get_name() + "_DEPENDENCY", true);
-    }
+    SchemaScanLocalState(RuntimeState* state, OperatorXBase* parent);
     ~SchemaScanLocalState() override = default;
 
     Status init(RuntimeState* state, LocalStateInfo& info) override;
@@ -93,4 +89,4 @@ class SchemaScanOperatorX final : public OperatorX<SchemaScanLocalState> {
 };
 
 #include "common/compile_check_end.h"
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp
index a48ebbed98e926..d97f5a9ae95441 100644
--- a/be/src/exec/scan/file_scanner.cpp
+++ b/be/src/exec/scan/file_scanner.cpp
@@ -162,6 +162,21 @@ Status FileScanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts
     _runtime_filter_partition_pruned_range_counter =
             ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(),
                                    "RuntimeFilterPartitionPrunedRangeNum", TUnit::UNIT, 1);
+    // Keep the current file's adaptive state while also preserving the peak value across all
+    // files handled by this scanner instance.
+    _adaptive_batch_predicted_rows_counter =
+            _local_state->scanner_profile()->AddHighWaterMarkCounter(
+                    "AdaptiveBatchPredictedRows", TUnit::UNIT, RuntimeProfile::ROOT_COUNTER, 1);
+    _adaptive_batch_actual_bytes_before_truncate_counter =
+            _local_state->scanner_profile()->AddHighWaterMarkCounter(
+                    "AdaptiveBatchActualBytesBeforeTruncate", TUnit::BYTES,
+                    RuntimeProfile::ROOT_COUNTER, 1);
+    _adaptive_batch_actual_bytes_after_truncate_counter =
+            _local_state->scanner_profile()->AddHighWaterMarkCounter(
+                    "AdaptiveBatchActualBytesAfterTruncate", TUnit::BYTES,
+                    RuntimeProfile::ROOT_COUNTER, 1);
+    _adaptive_batch_probe_count_counter = ADD_COUNTER_WITH_LEVEL(
+            _local_state->scanner_profile(), "AdaptiveBatchProbeCount", TUnit::UNIT, 1);
 
     _file_cache_statistics.reset(new io::FileCacheStatistics());
     _file_reader_stats.reset(new io::FileReaderStats());
@@ -202,6 +217,98 @@ Status FileScanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts
     return Status::OK();
 }
 
+bool FileScanner::_should_enable_adaptive_batch_size(TFileFormatType::type format_type) const {
+    // Only enable for readers that support set_batch_size().
+    // Table-format wrappers are covered because they delegate to native readers.
+    if (!config::enable_adaptive_batch_size) {
+        return false;
+    }
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+    case TFileFormatType::FORMAT_ORC:
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+    case TFileFormatType::FORMAT_TEXT:
+    case TFileFormatType::FORMAT_JSON:
+    case TFileFormatType::FORMAT_JNI:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool FileScanner::_should_run_adaptive_batch_size() const {
+    // Skip adaptive batch sizing for pushed-down COUNT(*): the reader is wrapped by CountReader
+    // and only emits a single aggregated row count instead of materializing real columns, so
+    // there is no per-row byte cost to learn from and no benefit in tuning the batch size.
+    return _block_size_predictor != nullptr && _get_push_down_agg_type() != TPushAggOp::type::COUNT;
+}
+
+void FileScanner::_reset_adaptive_batch_size_state() {
+    _block_size_predictor.reset();
+    COUNTER_SET(_adaptive_batch_predicted_rows_counter, int64_t(0));
+    COUNTER_SET(_adaptive_batch_actual_bytes_before_truncate_counter, int64_t(0));
+    COUNTER_SET(_adaptive_batch_actual_bytes_after_truncate_counter, int64_t(0));
+}
+
+void FileScanner::_init_adaptive_batch_size_state(TFileFormatType::type format_type) {
+    _reset_adaptive_batch_size_state();
+    if (!_should_enable_adaptive_batch_size(format_type)) {
+        return;
+    }
+
+    // External file readers do not provide reliable memory-size metadata hints. Use a small probe
+    // batch so the predictor can learn from real FileScanner output quickly.
+    _block_size_predictor = std::make_unique<AdaptiveBlockSizePredictor>(
+            _state->preferred_block_size_bytes(), 0.0, ADAPTIVE_BATCH_INITIAL_PROBE_ROWS,
+            _state->batch_size());
+}
+
+size_t FileScanner::_predict_reader_batch_rows() {
+    DCHECK(_block_size_predictor != nullptr);
+    size_t predicted_rows = _block_size_predictor->predict_next_rows();
+    COUNTER_SET(_adaptive_batch_predicted_rows_counter, static_cast<int64_t>(predicted_rows));
+    return predicted_rows;
+}
+
+void FileScanner::_update_adaptive_batch_size_before_truncate(const Block& block) {
+    if (!_should_run_adaptive_batch_size()) {
+        return;
+    }
+
+    // Learn from the logical bytes before CHAR/VARCHAR truncation. The truncated block can be
+    // much smaller than the data the reader and FileScanner have already materialized.
+    COUNTER_SET(_adaptive_batch_actual_bytes_before_truncate_counter,
+                static_cast<int64_t>(block.bytes()));
+    if (block.rows() == 0) {
+        return;
+    }
+
+    // Count a probe only when we actually obtain the first non-empty sample that seeds history.
+    if (!_block_size_predictor->has_history()) {
+        COUNTER_UPDATE(_adaptive_batch_probe_count_counter, 1);
+    }
+    _block_size_predictor->update(block);
+}
+
+void FileScanner::_update_adaptive_batch_size_after_truncate(const Block& block) {
+    if (!_should_run_adaptive_batch_size()) {
+        return;
+    }
+
+    // Keep the post-truncate size only for observability. It should not affect the next batch
+    // because truncation happens after the upstream memory cost has already been paid.
+    COUNTER_SET(_adaptive_batch_actual_bytes_after_truncate_counter,
+                static_cast<int64_t>(block.bytes()));
+}
+
 // check if the expr is a partition pruning expr
 bool FileScanner::_check_partition_prune_expr(const VExprSPtr& expr) {
     if (expr->is_slot_ref()) {
@@ -460,12 +567,17 @@ Status FileScanner::_get_block_wrapped(RuntimeState* state, Block* block, bool*
         // For query job, simply set _src_block_ptr to block.
         size_t read_rows = 0;
         RETURN_IF_ERROR(_init_src_block(block));
+
         if (_need_iceberg_rowid_column && _current_range.__isset.table_format_params &&
             _current_range.table_format_params.table_format_type == "iceberg") {
             if (auto* iceberg_reader = dynamic_cast<IcebergTableReader*>(_cur_reader.get())) {
                 iceberg_reader->set_row_id_column_position(_iceberg_rowid_column_pos);
             }
         }
+
+        if (_should_run_adaptive_batch_size()) {
+            _cur_reader->set_batch_size(_predict_reader_batch_rows());
+        }
         {
             SCOPED_TIMER(_get_block_timer);
 
@@ -937,6 +1049,7 @@ Status FileScanner::_get_next_reader() {
             _state->update_num_finished_scan_range(1);
         }
         _cur_reader.reset(nullptr);
+        _reset_adaptive_batch_size_state();
         _src_block_init = false;
         bool has_next = _first_scan_range;
         if (!_first_scan_range) {
@@ -1113,24 +1226,25 @@ Status FileScanner::_get_next_reader() {
         case TFileFormatType::FORMAT_CSV_DEFLATE:
         case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
         case TFileFormatType::FORMAT_PROTO: {
-            auto reader = CsvReader::create_unique(_state, _profile, &_counter, *_params, range,
-                                                   _file_slot_descs, _io_ctx.get());
-
+            auto reader =
+                    CsvReader::create_unique(_state, _profile, &_counter, *_params, range,
+                                             _file_slot_descs, _state->batch_size(), _io_ctx.get());
             init_status = reader->init_reader(_is_load);
             _cur_reader = std::move(reader);
             break;
         }
         case TFileFormatType::FORMAT_TEXT: {
             auto reader = TextReader::create_unique(_state, _profile, &_counter, *_params, range,
-                                                    _file_slot_descs, _io_ctx.get());
+                                                    _file_slot_descs, _state->batch_size(),
+                                                    _io_ctx.get());
             init_status = reader->init_reader(_is_load);
             _cur_reader = std::move(reader);
             break;
         }
         case TFileFormatType::FORMAT_JSON: {
-            _cur_reader =
-                    NewJsonReader::create_unique(_state, _profile, &_counter, *_params, range,
-                                                 _file_slot_descs, &_scanner_eof, _io_ctx.get());
+            _cur_reader = NewJsonReader::create_unique(_state, _profile, &_counter, *_params, range,
+                                                       _file_slot_descs, &_scanner_eof,
+                                                       _state->batch_size(), _io_ctx.get());
             init_status = ((NewJsonReader*)(_cur_reader.get()))
                                   ->init_reader(_col_default_value_ctx, _is_load);
             break;
@@ -1226,6 +1340,7 @@ Status FileScanner::_get_next_reader() {
             }
         }
         _cur_reader_eof = false;
+        _init_adaptive_batch_size_state(format_type);
         break;
     }
     return Status::OK();
diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h
index 08b808ef2af1fc..8a32d154695910 100644
--- a/be/src/exec/scan/file_scanner.h
+++ b/be/src/exec/scan/file_scanner.h
@@ -40,6 +40,7 @@
 #include "runtime/descriptors.h"
 #include "runtime/runtime_profile.h"
 #include "storage/olap_scan_common.h"
+#include "storage/segment/adaptive_block_size_predictor.h"
 
 namespace doris {
 class RuntimeState;
@@ -59,6 +60,7 @@ class FileScanner : public Scanner {
 
 public:
     static constexpr const char* NAME = "FileScanner";
+    static constexpr size_t ADAPTIVE_BATCH_INITIAL_PROBE_ROWS = 32;
 
     // sub profile name (for parquet/orc)
     static const std::string FileReadBytesProfile;
@@ -212,6 +214,10 @@ class FileScanner : public Scanner {
     RuntimeProfile::Counter* _file_read_calls_counter = nullptr;
     RuntimeProfile::Counter* _file_read_time_counter = nullptr;
     RuntimeProfile::Counter* _runtime_filter_partition_pruned_range_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_predicted_rows_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_actual_bytes_before_truncate_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_actual_bytes_after_truncate_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_probe_count_counter = nullptr;
 
     const std::unordered_map<std::string, int>* _col_name_to_slot_id = nullptr;
     // single slot filter conjuncts
@@ -237,7 +243,8 @@ class FileScanner : public Scanner {
     int64_t _last_bytes_read_from_local = 0;
     int64_t _last_bytes_read_from_remote = 0;
 
-private:
+    std::unique_ptr<AdaptiveBlockSizePredictor> _block_size_predictor;
+
     Status _init_expr_ctxes();
     Status _init_src_block(Block* block);
     Status _check_output_block_types();
@@ -282,11 +289,19 @@ class FileScanner : public Scanner {
         _counter.num_rows_filtered = 0;
     }
 
-    TPushAggOp::type _get_push_down_agg_type() {
+    TPushAggOp::type _get_push_down_agg_type() const {
         return _local_state == nullptr ? TPushAggOp::type::NONE
                                        : _local_state->get_push_down_agg_type();
     }
 
+    void _reset_adaptive_batch_size_state();
+    void _init_adaptive_batch_size_state(TFileFormatType::type format_type);
+    bool _should_enable_adaptive_batch_size(TFileFormatType::type format_type) const;
+    bool _should_run_adaptive_batch_size() const;
+    size_t _predict_reader_batch_rows();
+    void _update_adaptive_batch_size_before_truncate(const Block& block);
+    void _update_adaptive_batch_size_after_truncate(const Block& block);
+
     // enable the file meta cache only when
     // 1. max_external_file_meta_cache_num is > 0
     // 2. the file number is less than 1/3 of cache's capacibility
diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp
index 0bcf74c8e47e93..2fab478562f59e 100644
--- a/be/src/exec/scan/olap_scanner.cpp
+++ b/be/src/exec/scan/olap_scanner.cpp
@@ -163,6 +163,9 @@ Status OlapScanner::prepare() {
     // value (e.g. select a from t where a .. and b ... limit 1),
     // it will be very slow when reading data in segment iterator
     _tablet_reader->set_batch_size(_state->batch_size());
+    // Adaptive batch size: pass byte-budget settings to the storage reader.
+    // The reader still uses batch_size() as the row ceiling.
+    _tablet_reader->set_preferred_block_size_bytes(_state->preferred_block_size_bytes());
     {
         TOlapScanNode& olap_scan_node = local_state->olap_scan_node();
 
@@ -775,6 +778,13 @@ void OlapScanner::_collect_profile_before_close() {
     COUNTER_UPDATE(local_state->_variant_doc_value_column_iter_count,
                    stats.variant_doc_value_column_iter_count);
 
+    if (stats.adaptive_batch_size_predict_max_rows > 0) {
+        local_state->_adaptive_batch_predict_min_rows_counter->set(
+                stats.adaptive_batch_size_predict_min_rows);
+        local_state->_adaptive_batch_predict_max_rows_counter->set(
+                stats.adaptive_batch_size_predict_max_rows);
+    }
+
     InvertedIndexProfileReporter inverted_index_profile;
     inverted_index_profile.update(local_state->_index_filter_profile.get(),
                                   &stats.inverted_index_stats);
diff --git a/be/src/exec/scan/scanner.cpp b/be/src/exec/scan/scanner.cpp
index 0b5df2cc054264..4fc0d44561673e 100644
--- a/be/src/exec/scan/scanner.cpp
+++ b/be/src/exec/scan/scanner.cpp
@@ -87,13 +87,16 @@ Status Scanner::get_block_after_projects(RuntimeState* state, Block* block, bool
         } else {
             _origin_block.clear_column_data(row_descriptor.num_materialized_slots());
             const auto min_batch_size = std::max(state->batch_size() / 2, 1);
-            while (_padding_block.rows() < min_batch_size && !*eos) {
+            const auto block_max_bytes = state->preferred_block_size_bytes();
+            while (_padding_block.rows() < min_batch_size &&
+                   _padding_block.bytes() < block_max_bytes && !*eos) {
                 RETURN_IF_ERROR(get_block(state, &_origin_block, eos));
                 if (_origin_block.rows() >= min_batch_size) {
                     break;
                 }
 
-                if (_origin_block.rows() + _padding_block.rows() <= state->batch_size()) {
+                if (_origin_block.rows() + _padding_block.rows() <= state->batch_size() &&
+                    _origin_block.bytes() + _padding_block.bytes() <= block_max_bytes) {
                     RETURN_IF_ERROR(_merge_padding_block());
                     _origin_block.clear_column_data(row_descriptor.num_materialized_slots());
                 } else {
diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp
index 569d932c26e9ec..90d8dc27787741 100644
--- a/be/src/format/csv/csv_reader.cpp
+++ b/be/src/format/csv/csv_reader.cpp
@@ -26,6 +26,7 @@
 #include <cstddef>
 #include <map>
 #include <memory>
+#include <numeric>
 #include <ostream>
 #include <regex>
 #include <utility>
@@ -171,8 +172,8 @@ void PlainCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>*
 
 CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                      const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                     const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx,
-                     std::shared_ptr<io::IOContext> io_ctx_holder)
+                     const std::vector<SlotDescriptor*>& file_slot_descs, size_t batch_size,
+                     io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder)
         : _profile(profile),
           _params(params),
           _file_reader(nullptr),
@@ -185,7 +186,8 @@ CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounte
           _line_reader_eof(false),
           _skip_lines(0),
           _io_ctx(io_ctx),
-          _io_ctx_holder(std::move(io_ctx_holder)) {
+          _io_ctx_holder(std::move(io_ctx_holder)),
+          _batch_size(std::max(batch_size, 1UL)) {
     if (_io_ctx == nullptr && _io_ctx_holder) {
         _io_ctx = _io_ctx_holder.get();
     }
@@ -307,13 +309,22 @@ Status CsvReader::init_reader(bool is_load) {
     return Status::OK();
 }
 
+void CsvReader::set_batch_size(size_t batch_size) {
+    // 0 means "not set" / "use default" for the row-based readers; we must
+    // never let _batch_size be 0 because _do_get_next_block uses it as the
+    // upper bound of a `while (rows < _batch_size)` loop and a 0 would make
+    // the reader return empty blocks and incorrectly signal EOF.
+    _batch_size = std::max(batch_size, 1UL);
+}
+
 Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     if (_line_reader_eof) {
         *eof = true;
         return Status::OK();
     }
 
-    const int batch_size = std::max(_state->batch_size(), (int)_MIN_BATCH_SIZE);
+    const size_t batch_size = _batch_size;
+    const auto max_block_bytes = _state->preferred_block_size_bytes();
     size_t rows = 0;
 
     bool success = false;
@@ -355,7 +366,8 @@ Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
         block->set_columns(std::move(mutate_columns));
     } else {
         auto columns = block->mutate_columns();
-        while (rows < batch_size && !_line_reader_eof) {
+
+        while (rows < batch_size && !_line_reader_eof && (block->bytes() < max_block_bytes)) {
             const uint8_t* ptr = nullptr;
             size_t size = 0;
             RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx));
diff --git a/be/src/format/csv/csv_reader.h b/be/src/format/csv/csv_reader.h
index 4e24be28d15b95..5120cb83ff4e21 100644
--- a/be/src/format/csv/csv_reader.h
+++ b/be/src/format/csv/csv_reader.h
@@ -172,15 +172,18 @@ class CsvReader : public GenericReader {
 public:
     CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
               const TFileScanRangeParams& params, const TFileRangeDesc& range,
-              const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx,
-              std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
+              const std::vector<SlotDescriptor*>& file_slot_descs, size_t batch_size,
+              io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
     ~CsvReader() override = default;
 
     Status init_reader(bool is_load);
+
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
     Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
                        std::unordered_set<std::string>* missing_cols) override;
 
+    void set_batch_size(size_t batch_size) override;
+
     Status init_schema_reader() override;
     // get schema of csv file from first one line or first two lines.
     // if file format is FORMAT_CSV_DEFLATE and if
@@ -279,6 +282,8 @@ class CsvReader : public GenericReader {
 
     io::IOContext* _io_ctx = nullptr;
     std::shared_ptr<io::IOContext> _io_ctx_holder;
+    // Adaptive batch size set by FileScanner. 0 means not set (use _state->batch_size()).
+    size_t _batch_size;
     // Stored to adjust column_sep_positions when BOM is removed in enclose mode
     std::shared_ptr<EncloseCsvLineReaderCtx> _enclose_reader_ctx;
     // save source text which have been splitted.
diff --git a/be/src/format/generic_reader.h b/be/src/format/generic_reader.h
index d68c9aa6bb9f33..e81358ed36dd40 100644
--- a/be/src/format/generic_reader.h
+++ b/be/src/format/generic_reader.h
@@ -46,6 +46,10 @@ class GenericReader : public ProfileCollector {
 
     virtual Status get_next_block(Block* block, size_t* read_rows, bool* eof) = 0;
 
+    // Override this in readers that can adjust batch size between consecutive reads.
+    virtual void set_batch_size(size_t batch_size) {}
+    virtual size_t get_batch_size() const { return 0; }
+
     // Type is always nullable to process illegal values.
     virtual Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
                                std::unordered_set<std::string>* missing_cols) {
@@ -100,6 +104,7 @@ class GenericReader : public ProfileCollector {
 
     /// Whether the underlying FileReader has filled the partition&missing columns
     bool _fill_all_columns = false;
+
     TPushAggOp::type _push_down_agg_type {};
 
     // For TopN queries, rows will be read according to row ids produced by TopN result.
diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp
index cecfcf3f0dcf54..4060744c9a85c1 100644
--- a/be/src/format/json/new_json_reader.cpp
+++ b/be/src/format/json/new_json_reader.cpp
@@ -79,7 +79,8 @@ using namespace ErrorCode;
 NewJsonReader::NewJsonReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                              const TFileScanRangeParams& params, const TFileRangeDesc& range,
                              const std::vector<SlotDescriptor*>& file_slot_descs, bool* scanner_eof,
-                             io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder)
+                             size_t batch_size, io::IOContext* io_ctx,
+                             std::shared_ptr<io::IOContext> io_ctx_holder)
         : _vhandle_json_callback(nullptr),
           _state(state),
           _profile(profile),
@@ -100,7 +101,8 @@ NewJsonReader::NewJsonReader(RuntimeState* state, RuntimeProfile* profile, Scann
           _scanner_eof(scanner_eof),
           _current_offset(0),
           _io_ctx(io_ctx),
-          _io_ctx_holder(std::move(io_ctx_holder)) {
+          _io_ctx_holder(std::move(io_ctx_holder)),
+          _batch_size(std::max(batch_size, 1UL)) {
     if (_io_ctx == nullptr && _io_ctx_holder) {
         _io_ctx = _io_ctx_holder.get();
     }
@@ -117,7 +119,7 @@ NewJsonReader::NewJsonReader(RuntimeState* state, RuntimeProfile* profile, Scann
 
 NewJsonReader::NewJsonReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
                              const TFileRangeDesc& range,
-                             const std::vector<SlotDescriptor*>& file_slot_descs,
+                             const std::vector<SlotDescriptor*>& file_slot_descs, size_t batch_size,
                              io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder)
         : _vhandle_json_callback(nullptr),
           _state(nullptr),
@@ -135,7 +137,8 @@ NewJsonReader::NewJsonReader(RuntimeProfile* profile, const TFileScanRangeParams
           _parse_allocator(_parse_buffer, sizeof(_parse_buffer)),
           _origin_json_doc(&_value_allocator, sizeof(_parse_buffer), &_parse_allocator),
           _io_ctx(io_ctx),
-          _io_ctx_holder(std::move(io_ctx_holder)) {
+          _io_ctx_holder(std::move(io_ctx_holder)),
+          _batch_size(std::max(batch_size, 1UL)) {
     if (_io_ctx == nullptr && _io_ctx_holder) {
         _io_ctx = _io_ctx_holder.get();
     }
@@ -203,9 +206,10 @@ Status NewJsonReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
         return Status::OK();
     }
 
-    const int batch_size = std::max(_state->batch_size(), (int)_MIN_BATCH_SIZE);
+    const auto batch_size = _batch_size;
+    const auto max_block_bytes = _state->preferred_block_size_bytes();
 
-    while (block->rows() < batch_size && !_reader_eof) {
+    while (block->rows() < batch_size && !_reader_eof && (block->bytes() < max_block_bytes)) {
         if (UNLIKELY(_read_json_by_line && _skip_first_line)) {
             size_t size = 0;
             const uint8_t* line_ptr = nullptr;
@@ -251,6 +255,15 @@ Status NewJsonReader::init_schema_reader() {
     return Status::OK();
 }
 
+void NewJsonReader::set_batch_size(size_t batch_size) {
+    // 0 means "not set" / "use default" for the row-based readers; we must
+    // never let _batch_size be 0 because _do_get_next_block uses it as the
+    // upper bound of a `while (block->rows() < batch_size)` loop and a 0
+    // would make the reader return without setting eof, causing the scanner
+    // to spin on empty blocks.
+    _batch_size = std::max(batch_size, 1UL);
+}
+
 Status NewJsonReader::get_parsed_schema(std::vector<std::string>* col_names,
                                         std::vector<DataTypePtr>* col_types) {
     bool eof = false;
diff --git a/be/src/format/json/new_json_reader.h b/be/src/format/json/new_json_reader.h
index 4d803fc1050b19..58876384f0e4e2 100644
--- a/be/src/format/json/new_json_reader.h
+++ b/be/src/format/json/new_json_reader.h
@@ -70,19 +70,29 @@ class NewJsonReader : public GenericReader {
     NewJsonReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                   const TFileScanRangeParams& params, const TFileRangeDesc& range,
                   const std::vector<SlotDescriptor*>& file_slot_descs, bool* scanner_eof,
-                  io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
+                  size_t batch_size, io::IOContext* io_ctx,
+                  std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
 
     NewJsonReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
                   const TFileRangeDesc& range, const std::vector<SlotDescriptor*>& file_slot_descs,
-                  io::IOContext* io_ctx, std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
+                  size_t batch_size, io::IOContext* io_ctx,
+                  std::shared_ptr<io::IOContext> io_ctx_holder = nullptr);
     ~NewJsonReader() override = default;
 
     Status init_reader(
             const std::unordered_map<std::string, VExprContextSPtr>& col_default_value_ctx,
             bool is_load);
+
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
     Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
                        std::unordered_set<std::string>* missing_cols) override;
+
+    // Row-based readers control throughput via row count, not byte budget.
+    // The FileScanner's AdaptiveBlockSizePredictor converts the byte budget
+    // into a predicted row count and calls set_batch_size() with it.
+    void set_batch_size(size_t batch_size) override;
+    size_t get_batch_size() const override { return _batch_size; }
+
     Status init_schema_reader() override;
     Status get_parsed_schema(std::vector<std::string>* col_names,
                              std::vector<DataTypePtr>* col_types) override;
@@ -296,6 +306,8 @@ class NewJsonReader : public GenericReader {
 
     DataTypeSerDeSPtrs _serdes;
     DataTypeSerDe::FormatOptions _serde_options;
+    // Adaptive batch size set by FileScanner.
+    size_t _batch_size;
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp
index bf5a67fb0d28a1..67fc7c91613cc4 100644
--- a/be/src/format/orc/vorc_reader.cpp
+++ b/be/src/format/orc/vorc_reader.cpp
@@ -243,7 +243,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
           _state(state),
           _scan_params(params),
           _scan_range(range),
-          _batch_size(std::max(batch_size, _MIN_BATCH_SIZE)),
+          _batch_size(std::max(batch_size, 1UL)),
           _range_start_offset(range.start_offset),
           _range_size(range.size),
           _ctz(ctz),
@@ -268,7 +268,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
           _state(state),
           _scan_params(params),
           _scan_range(range),
-          _batch_size(std::max(batch_size, _MIN_BATCH_SIZE)),
+          _batch_size(std::max(batch_size, 1UL)),
           _range_start_offset(range.start_offset),
           _range_size(range.size),
           _ctz(ctz),
@@ -285,12 +285,27 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
     _init_file_description();
 }
 
+void OrcReader::set_batch_size(size_t batch_size) {
+    DCHECK_GT(batch_size, 0);
+    if (_batch_size == batch_size) {
+        return;
+    }
+
+    _batch_size = batch_size;
+    if (_row_reader != nullptr) {
+        // ORC stores the batch capacity inside the row batch object returned by createRowBatch().
+        // Rebuild it when the requested batch size changes so the next call uses the new limit.
+        _batch = _row_reader->createRowBatch(_batch_size);
+    }
+}
+
 OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                     const std::string& ctz, io::IOContext* io_ctx, FileMetaCache* meta_cache,
-                     bool enable_lazy_mat)
+                     size_t batch_size, const std::string& ctz, io::IOContext* io_ctx,
+                     FileMetaCache* meta_cache, bool enable_lazy_mat)
         : _profile(nullptr),
           _scan_params(params),
           _scan_range(range),
+          _batch_size(std::max(batch_size, 1UL)),
           _ctz(ctz),
           _file_system(nullptr),
           _io_ctx(io_ctx),
@@ -303,11 +318,13 @@ OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& r
 }
 
 OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                     const std::string& ctz, std::shared_ptr<io::IOContext> io_ctx_holder,
-                     FileMetaCache* meta_cache, bool enable_lazy_mat)
+                     size_t batch_size, const std::string& ctz,
+                     std::shared_ptr<io::IOContext> io_ctx_holder, FileMetaCache* meta_cache,
+                     bool enable_lazy_mat)
         : _profile(nullptr),
           _scan_params(params),
           _scan_range(range),
+          _batch_size(std::max(batch_size, 1UL)),
           _ctz(ctz),
           _file_system(nullptr),
           _io_ctx(io_ctx_holder ? io_ctx_holder.get() : nullptr),
diff --git a/be/src/format/orc/vorc_reader.h b/be/src/format/orc/vorc_reader.h
index 2697a108200f58..cfbd7abb8cb039 100644
--- a/be/src/format/orc/vorc_reader.h
+++ b/be/src/format/orc/vorc_reader.h
@@ -152,11 +152,11 @@ class OrcReader : public GenericReader {
               std::shared_ptr<io::IOContext> io_ctx_holder, FileMetaCache* meta_cache = nullptr,
               bool enable_lazy_mat = true);
 
-    OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
+    OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range, size_t batch_size,
               const std::string& ctz, io::IOContext* io_ctx, FileMetaCache* meta_cache = nullptr,
               bool enable_lazy_mat = true);
 
-    OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
+    OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range, size_t batch_size,
               const std::string& ctz, std::shared_ptr<io::IOContext> io_ctx_holder,
               FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);
 
@@ -181,6 +181,8 @@ class OrcReader : public GenericReader {
 
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
+    void set_batch_size(size_t batch_size) override;
+
     int64_t size() const;
 
     Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
@@ -236,6 +238,8 @@ class OrcReader : public GenericReader {
 
     bool count_read_rows() override { return true; }
 
+    size_t get_batch_size() const override { return _batch_size; }
+
 protected:
     void _collect_profile_before_close() override;
 
@@ -676,6 +680,7 @@ class OrcReader : public GenericReader {
     io::FileDescription _file_description;
     size_t _batch_size;
     int64_t _range_start_offset;
+
     int64_t _range_size;
     std::string _ctz;
 
diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp
index e1170ff08619d4..22e5ebd62b8a1b 100644
--- a/be/src/format/parquet/vparquet_reader.cpp
+++ b/be/src/format/parquet/vparquet_reader.cpp
@@ -87,7 +87,7 @@ ParquetReader::ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams
         : _profile(profile),
           _scan_params(params),
           _scan_range(range),
-          _batch_size(std::max(batch_size, _MIN_BATCH_SIZE)),
+          _batch_size(std::max(batch_size, 1UL)),
           _range_start_offset(range.start_offset),
           _range_size(range.size),
           _ctz(ctz),
@@ -106,6 +106,13 @@ ParquetReader::ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams
     _init_file_description();
 }
 
+void ParquetReader::set_batch_size(size_t batch_size) {
+    if (_batch_size == batch_size) {
+        return;
+    }
+    _batch_size = batch_size;
+}
+
 ParquetReader::ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
                              const TFileRangeDesc& range, size_t batch_size, cctz::time_zone* ctz,
                              std::shared_ptr<io::IOContext> io_ctx_holder, RuntimeState* state,
@@ -113,7 +120,7 @@ ParquetReader::ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams
         : _profile(profile),
           _scan_params(params),
           _scan_range(range),
-          _batch_size(std::max(batch_size, _MIN_BATCH_SIZE)),
+          _batch_size(std::max(batch_size, 1UL)),
           _range_start_offset(range.start_offset),
           _range_size(range.size),
           _ctz(ctz),
diff --git a/be/src/format/parquet/vparquet_reader.h b/be/src/format/parquet/vparquet_reader.h
index 402fdd11138f77..e2dbd0f963f661 100644
--- a/be/src/format/parquet/vparquet_reader.h
+++ b/be/src/format/parquet/vparquet_reader.h
@@ -140,6 +140,8 @@ class ParquetReader : public GenericReader {
 
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
+    void set_batch_size(size_t batch_size) override;
+
     Status close() override;
 
     // set the delete rows in current parquet file
@@ -356,6 +358,7 @@ class ParquetReader : public GenericReader {
     const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
     const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr;
     std::unordered_map<tparquet::Type::type, bool> _ignored_stats;
+    size_t get_batch_size() const override { return _batch_size; }
 
     std::pair<std::shared_ptr<RowIdColumnIteratorV2>, int> _row_id_column_iterator_pair = {nullptr,
                                                                                            -1};
@@ -363,6 +366,7 @@ class ParquetReader : public GenericReader {
 
 protected:
     bool _filter_groups = true;
+
     RowGroupReader::IcebergRowIdParams _iceberg_rowid_params;
 
     std::set<uint64_t> _column_ids;
diff --git a/be/src/format/text/text_reader.cpp b/be/src/format/text/text_reader.cpp
index 2f98ad517cd9d6..e52da7f3249036 100644
--- a/be/src/format/text/text_reader.cpp
+++ b/be/src/format/text/text_reader.cpp
@@ -113,8 +113,9 @@ void HiveTextFieldSplitter::_split_field_multi_char(const Slice& line,
 
 TextReader::TextReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                        const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                       const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx)
-        : CsvReader(state, profile, counter, params, range, file_slot_descs, io_ctx) {}
+                       const std::vector<SlotDescriptor*>& file_slot_descs, size_t batch_size,
+                       io::IOContext* io_ctx)
+        : CsvReader(state, profile, counter, params, range, file_slot_descs, batch_size, io_ctx) {}
 
 Status TextReader::_init_options() {
     // get column_separator and line_delimiter
diff --git a/be/src/format/text/text_reader.h b/be/src/format/text/text_reader.h
index b7251d5f5f8575..22073c130a8486 100644
--- a/be/src/format/text/text_reader.h
+++ b/be/src/format/text/text_reader.h
@@ -56,7 +56,8 @@ class TextReader : public CsvReader {
 public:
     TextReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                const TFileScanRangeParams& params, const TFileRangeDesc& range,
-               const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx);
+               const std::vector<SlotDescriptor*>& file_slot_descs, size_t batch_size,
+               io::IOContext* io_ctx);
 
     ~TextReader() override = default;
 
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index a6b98367cf92ce..b8a85d097f99cb 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -139,7 +139,35 @@ class RuntimeState {
 
     const DescriptorTbl& desc_tbl() const { return *_desc_tbl; }
     void set_desc_tbl(const DescriptorTbl* desc_tbl) { _desc_tbl = desc_tbl; }
-    MOCK_FUNCTION int batch_size() const { return _query_options.batch_size; }
+
+    // Row-count limit for output blocks. Clamp to [1, 65535].
+    // Adaptive byte budgeting still uses this as the hard row ceiling.
+    MOCK_FUNCTION int batch_size() const {
+        static constexpr int kMax = 65535;
+        auto v = _query_options.batch_size;
+        return std::min(std::max(1, v), kMax);
+    }
+
+    // Target byte budget per output block (default 8MB when adaptive is enabled).
+    // The public FE/session contract is [1MB, 512MB]; this accessor still clamps any direct
+    // thrift or mixed-version out-of-range value into that range. Returns `kMax` when adaptive
+    // is disabled by BE config so the value is always a legal byte budget; callers that need
+    // to know whether adaptive batch size is active should test
+    // `config::enable_adaptive_batch_size` explicitly.
+    MOCK_FUNCTION size_t preferred_block_size_bytes() const {
+        static constexpr int64_t kDefault = 8388608L; // 8MB
+        static constexpr int64_t kMax = 536870912L;   // 512MB
+        static constexpr int64_t kMin = 1048576L;     // 1MB
+        if (!config::enable_adaptive_batch_size) [[unlikely]] {
+            return kMax;
+        }
+        if (_query_options.__isset.preferred_block_size_bytes) [[likely]] {
+            return std::max<int64_t>(
+                    kMin, std::min<int64_t>(_query_options.preferred_block_size_bytes, kMax));
+        }
+        return kDefault;
+    }
+
     int query_parallel_instance_num() const { return _query_options.parallel_instance; }
     int max_errors() const { return _query_options.max_errors; }
     int execution_timeout() const {
diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp
index 148a2002f1fb0b..d5937fec14304e 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -828,6 +828,7 @@ void PInternalService::fetch_table_schema(google::protobuf::RpcController* contr
         auto file_reader_stats = std::make_shared<io::FileReaderStats>();
         io_ctx->file_cache_stats = file_cache_statis.get();
         io_ctx->file_reader_stats = file_reader_stats.get();
+        constexpr size_t fetch_schema_batch_size = 4064;
         // file_slots is no use, but the lifetime should be longer than reader
         std::vector<SlotDescriptor*> file_slots;
         switch (params.format_type) {
@@ -840,12 +841,13 @@ void PInternalService::fetch_table_schema(google::protobuf::RpcController* contr
         case TFileFormatType::FORMAT_CSV_LZOP:
         case TFileFormatType::FORMAT_CSV_DEFLATE: {
             reader = CsvReader::create_unique(nullptr, profile.get(), nullptr, params, range,
-                                              file_slots, io_ctx.get(), io_ctx);
+                                              file_slots, fetch_schema_batch_size, io_ctx.get(),
+                                              io_ctx);
             break;
         }
         case TFileFormatType::FORMAT_TEXT: {
             reader = TextReader::create_unique(nullptr, profile.get(), nullptr, params, range,
-                                               file_slots, io_ctx.get());
+                                               file_slots, fetch_schema_batch_size, io_ctx.get());
             break;
         }
         case TFileFormatType::FORMAT_PARQUET: {
@@ -853,7 +855,7 @@ void PInternalService::fetch_table_schema(google::protobuf::RpcController* contr
             break;
         }
         case TFileFormatType::FORMAT_ORC: {
-            reader = OrcReader::create_unique(params, range, "", io_ctx);
+            reader = OrcReader::create_unique(params, range, fetch_schema_batch_size, "", io_ctx);
             break;
         }
         case TFileFormatType::FORMAT_NATIVE: {
@@ -863,7 +865,7 @@ void PInternalService::fetch_table_schema(google::protobuf::RpcController* contr
         }
         case TFileFormatType::FORMAT_JSON: {
             reader = NewJsonReader::create_unique(profile.get(), params, range, file_slots,
-                                                  io_ctx.get(), io_ctx);
+                                                  fetch_schema_batch_size, io_ctx.get(), io_ctx);
             break;
         }
         case TFileFormatType::FORMAT_AVRO: {
diff --git a/be/src/storage/compaction/compaction.cpp b/be/src/storage/compaction/compaction.cpp
index 22434bc083a250..6e553bfb901dfe 100644
--- a/be/src/storage/compaction/compaction.cpp
+++ b/be/src/storage/compaction/compaction.cpp
@@ -1294,6 +1294,7 @@ Status CompactionMixin::modify_rowsets() {
         _tablet->enable_unique_key_merge_on_write()) {
         Version version = tablet()->max_version();
         DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+        DeleteBitmap output_rowset_internal_delete_bitmap(_tablet->tablet_id());
         std::unique_ptr<RowLocationSet> missed_rows;
         if ((config::enable_missing_rows_correctness_check ||
              config::enable_mow_compaction_correctness_check_core ||
@@ -1313,12 +1314,20 @@ Status CompactionMixin::modify_rowsets() {
         // New loads are not blocked, so some keys of input rowsets might
         // be deleted during the time. We need to deal with delete bitmap
         // of incremental data later.
-        // TODO(LiaoXin): check if there are duplicate keys
         std::size_t missed_rows_size = 0;
         tablet()->calc_compaction_output_rowset_delete_bitmap(
                 _input_rowsets, *_rowid_conversion, 0, version.second + 1, missed_rows.get(),
                 location_map.get(), _tablet->tablet_meta()->delete_bitmap(),
                 &output_rowset_delete_bitmap);
+        // In cluster-key MOW compaction, rows are sorted by cluster key, so duplicate unique keys
+        // may be non-adjacent in merge order. Scan the output primary key index to delete older
+        // duplicate rows inside the output rowset.
+        if (!tablet()->tablet_schema()->cluster_key_uids().empty()) {
+            RETURN_IF_ERROR(tablet()->calc_compaction_output_rowset_internal_delete_bitmap(
+                    _input_rowsets, _output_rowset, *_rowid_conversion,
+                    &output_rowset_internal_delete_bitmap));
+            output_rowset_delete_bitmap.merge(output_rowset_internal_delete_bitmap);
+        }
         if (missed_rows) {
             missed_rows_size = missed_rows->size();
             std::size_t merged_missed_rows_size = _stats.merged_rows;
@@ -1418,6 +1427,7 @@ Status CompactionMixin::modify_rowsets() {
                 tablet()->calc_compaction_output_rowset_delete_bitmap(
                         _input_rowsets, *_rowid_conversion, 0, UINT64_MAX, missed_rows.get(),
                         location_map.get(), *it.delete_bitmap.get(), &txn_output_delete_bitmap);
+                txn_output_delete_bitmap.merge(output_rowset_internal_delete_bitmap);
                 if (config::enable_merge_on_write_correctness_check) {
                     RowsetIdUnorderedSet rowsetids;
                     rowsetids.insert(_output_rowset->rowset_id());
diff --git a/be/src/storage/iterator/block_reader.cpp b/be/src/storage/iterator/block_reader.cpp
index f007a1f1e9435a..161cc8cc4b551a 100644
--- a/be/src/storage/iterator/block_reader.cpp
+++ b/be/src/storage/iterator/block_reader.cpp
@@ -30,6 +30,7 @@
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "cloud/config.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/config.h"
 #include "common/status.h"
 #include "core/block/column_with_type_and_name.h"
 #include "core/column/column_nullable.h"
@@ -55,6 +56,8 @@ namespace doris {
 #include "common/compile_check_begin.h"
 using namespace ErrorCode;
 
+static constexpr int32_t BLOCK_SIZE_CHECK_INTERVAL_ROWS = 64;
+
 BlockReader::~BlockReader() {
     for (int i = 0; i < _agg_functions.size(); ++i) {
         _agg_functions[i]->destroy(_agg_places[i]);
@@ -166,7 +169,7 @@ Status BlockReader::_init_agg_state(const ReaderParams& read_params) {
     }
 
     _stored_data_columns =
-            _next_row.block->create_same_struct_block(_reader_context.batch_size)->mutate_columns();
+            _next_row.block->create_same_struct_block(batch_max_rows())->mutate_columns();
 
     _stored_has_null_tag.resize(_stored_data_columns.size());
     _stored_has_variable_length_tag.resize(_stored_data_columns.size());
@@ -310,9 +313,16 @@ Status BlockReader::_agg_key_next_block(Block* block, bool* eof) {
         }
 
         if (!_next_row.is_same) {
-            if (target_block_row == _reader_context.batch_size) {
+            if (target_block_row == batch_max_rows()) {
+                break;
+            }
+            // Byte-budget check at group boundary: _next_row is the first row of the new group
+            // and is still pending (not yet inserted), so stopping here is safe.
+            if (target_block_row % BLOCK_SIZE_CHECK_INTERVAL_ROWS == 0 &&
+                _reached_byte_budget(target_columns)) {
                 break;
             }
+
             _agg_data_counters.push_back(_last_agg_data_counter);
             _last_agg_data_counter = 0;
 
@@ -344,7 +354,7 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) {
     auto target_block_row = 0;
     auto target_columns = block->mutate_columns();
     if (UNLIKELY(_reader_context.record_rowids)) {
-        _block_row_locations.resize(_reader_context.batch_size);
+        _block_row_locations.resize(batch_max_rows());
     }
 
     do {
@@ -372,7 +382,15 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) {
             LOG(WARNING) << "next failed: " << res;
             return res;
         }
-    } while (target_block_row < _reader_context.batch_size);
+        // Byte-budget check: _next_row is already saved so stopping here is safe.
+        if (target_block_row % BLOCK_SIZE_CHECK_INTERVAL_ROWS == 0 &&
+            _reached_byte_budget(target_columns)) {
+            if (UNLIKELY(_reader_context.record_rowids)) {
+                _block_row_locations.resize(target_block_row);
+            }
+            break;
+        }
+    } while (target_block_row < batch_max_rows());
 
     if (_delete_sign_available) {
         int delete_sign_idx = _reader_context.tablet_schema->field_index(DELETE_SIGN);
@@ -420,6 +438,11 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) {
     return Status::OK();
 }
 
+bool BlockReader::_reached_byte_budget(const MutableColumns& columns) const {
+    return config::enable_adaptive_batch_size && _reader_context.preferred_block_size_bytes > 0 &&
+           Block::columns_byte_size(columns) >= _reader_context.preferred_block_size_bytes;
+}
+
 Status BlockReader::_insert_data_normal(MutableColumns& columns) {
     auto block = _next_row.block.get();
 
@@ -436,9 +459,11 @@ void BlockReader::_append_agg_data(MutableColumns& columns) {
     _stored_row_ref.push_back(_next_row);
     _last_agg_data_counter++;
 
-    // execute aggregate when have `batch_size` column or some ref invalid soon
+    // execute aggregate when accumulated `batch_max_rows()` rows or some ref invalid soon
+    // `_stored_data_columns` is sized to `batch_max_rows()`,
+    // this flush keeps the number of rows in `_stored_row_ref` within `batch_max_rows()`.
     bool is_last = (_next_row.block->rows() == _next_row.row_pos + 1);
-    if (is_last || _stored_row_ref.size() == _reader_context.batch_size) {
+    if (is_last || _stored_row_ref.size() == batch_max_rows()) {
         _update_agg_data(columns);
     }
 }
diff --git a/be/src/storage/iterator/block_reader.h b/be/src/storage/iterator/block_reader.h
index 88b59971713733..270adf536650d1 100644
--- a/be/src/storage/iterator/block_reader.h
+++ b/be/src/storage/iterator/block_reader.h
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "common/config.h"
 #include "common/status.h"
 #include "core/block/block.h"
 #include "core/column/column.h"
@@ -53,6 +54,11 @@ class BlockReader final : public TabletReader {
         return _vcollect_iter.update_profile(profile);
     }
 
+    // Returns the configured preferred output block byte budget; 0 when adaptive is disabled.
+    size_t preferred_block_size_bytes() const override {
+        return config::enable_adaptive_batch_size ? _reader_context.preferred_block_size_bytes : 0;
+    }
+
 private:
     // Directly read row from rowset and pass to upper caller. No need to do aggregation.
     // This is usually used for DUPLICATE KEY tables
@@ -74,6 +80,10 @@ class BlockReader final : public TabletReader {
 
     Status _insert_data_normal(MutableColumns& columns);
 
+    // Check if the accumulated output columns have reached the preferred byte budget,
+    // used to limit the output block size for adaptive batch sizing.
+    bool _reached_byte_budget(const MutableColumns& columns) const;
+
     void _append_agg_data(MutableColumns& columns);
 
     void _update_agg_data(MutableColumns& columns);
diff --git a/be/src/storage/iterator/vcollect_iterator.cpp b/be/src/storage/iterator/vcollect_iterator.cpp
index 04db8011b0661b..cbf37605f4119f 100644
--- a/be/src/storage/iterator/vcollect_iterator.cpp
+++ b/be/src/storage/iterator/vcollect_iterator.cpp
@@ -22,6 +22,7 @@
 
 #include <algorithm>
 #include <iterator>
+#include <limits>
 #include <memory>
 #include <ostream>
 #include <set>
@@ -29,6 +30,7 @@
 
 #include "common/cast_set.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/config.h"
 #include "common/status.h"
 #include "core/block/column_with_type_and_name.h"
 #include "core/column/column.h"
@@ -446,7 +448,6 @@ Status VCollectIterator::_topn_next(Block* block) {
                << " sorted_row_pos.size()=" << sorted_row_pos.size()
                << " mutable_block.rows()=" << mutable_block.rows();
     *block = mutable_block.to_block();
-
     _topn_eof = true;
     return block->rows() > 0 ? Status::OK() : Status::Error<END_OF_FILE>("");
 }
@@ -793,6 +794,46 @@ Status VCollectIterator::Level1Iterator::_normal_next(IteratorRowRef* ref) {
     }
 }
 
+// Estimate whether the output block has collected enough data to meet the byte budget.
+bool estimate_collected_enough(size_t present_bytes, size_t present_rows, int rows_to_merge,
+                               size_t preferred_block_size_bytes) {
+    DCHECK_GE(rows_to_merge, 0);
+
+    if (preferred_block_size_bytes == 0 || present_rows == 0) {
+        return false;
+    }
+
+    if (present_bytes >= preferred_block_size_bytes) {
+        return true;
+    }
+
+    // Predict total bytes after flushing the pending rows_to_merge.
+    const size_t total_rows = static_cast<size_t>(rows_to_merge) + present_rows;
+    // Guard against overflow: if multiplication would wrap, the budget is surely exceeded.
+    if (present_bytes > std::numeric_limits<size_t>::max() / total_rows) {
+        return true;
+    }
+    return present_bytes * total_rows / present_rows >= preferred_block_size_bytes;
+}
+
+bool VCollectIterator::Level1Iterator::collected_enough_rows(const MutableColumns& columns,
+                                                             int rows_to_merge) const {
+    if (!config::enable_adaptive_batch_size) {
+        return false;
+    }
+
+    const auto preferred_block_size_bytes = _reader->preferred_block_size_bytes();
+    if (preferred_block_size_bytes == 0) {
+        return false;
+    }
+
+    const auto present_bytes = Block::columns_byte_size(columns);
+    const auto present_rows = columns.empty() ? 0 : columns[0]->size();
+
+    return estimate_collected_enough(present_bytes, present_rows, rows_to_merge,
+                                     preferred_block_size_bytes);
+}
+
 Status VCollectIterator::Level1Iterator::_merge_next(Block* block) {
     SCOPED_RAW_TIMER(&_reader->_stats.collect_iterator_merge_next_timer);
     int target_block_row = 0;
@@ -806,7 +847,7 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) {
         block->insert(cur_row.block->get_by_position(i).clone_empty());
     }
 
-    auto batch_size = _reader->batch_size();
+    auto batch_size = _reader->batch_max_rows();
     if (UNLIKELY(_reader->_reader_context.record_rowids)) {
         _block_row_locations.resize(batch_size);
     }
@@ -870,6 +911,24 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) {
             continuous_row_in_block = 0;
             pre_row_ref = cur_row;
         }
+
+        // Byte-budget check: _merge_next() has already advanced _ref to the next unread row,
+        // so it is safe to stop here without duplicating any data.
+        if (collected_enough_rows(target_columns, continuous_row_in_block)) {
+            if (continuous_row_in_block > 0) {
+                const auto& src_block = pre_row_ref.block;
+                for (size_t i = 0; i < column_count; ++i) {
+                    target_columns[i]->insert_range_from(*(src_block->get_by_position(i).column),
+                                                         pre_row_ref.row_pos,
+                                                         continuous_row_in_block);
+                }
+            }
+            if (UNLIKELY(_reader->_reader_context.record_rowids)) {
+                _block_row_locations.resize(target_block_row);
+            }
+            block->set_columns(std::move(target_columns));
+            return Status::OK();
+        }
     } while (true);
 
     return Status::OK();
diff --git a/be/src/storage/iterator/vcollect_iterator.h b/be/src/storage/iterator/vcollect_iterator.h
index 4201546c04882b..710d6f7903e5f1 100644
--- a/be/src/storage/iterator/vcollect_iterator.h
+++ b/be/src/storage/iterator/vcollect_iterator.h
@@ -47,6 +47,12 @@ namespace doris {
 class TabletSchema;
 class RuntimeProfile;
 
+// Pure-computation helper: estimate whether collected data meets the byte budget
+// after flushing rows_to_merge additional rows.  Extracted from Level1Iterator so
+// it can be unit-tested independently.
+bool estimate_collected_enough(size_t present_bytes, size_t present_rows, int rows_to_merge,
+                               size_t preferred_block_size_bytes);
+
 class VCollectIterator {
 public:
     // Hold reader point to get reader params
@@ -303,6 +309,8 @@ class VCollectIterator {
 
         void init_level0_iterators_for_union();
 
+        bool collected_enough_rows(const MutableColumns& columns, int rows_to_merge) const;
+
     private:
         Status _merge_next(IteratorRowRef* ref);
 
@@ -348,6 +356,9 @@ class VCollectIterator {
     // for topn next
     size_t _topn_limit = 0;
     bool _topn_eof = false;
+    // For chunked topN output when result exceeds byte budget.
+    Block _topn_result_block;
+    size_t _topn_result_offset = 0;
     std::vector<RowSetSplits> _rs_splits;
 
     // Hold reader point to access read params, such as fetch conditions.
diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h
index a55f87e0cea561..1c9b551874360c 100644
--- a/be/src/storage/iterators.h
+++ b/be/src/storage/iterators.h
@@ -111,6 +111,8 @@ class StorageReadOptions {
     OlapReaderStatistics* stats = nullptr;
     bool use_page_cache = false;
     uint32_t block_row_max = 4096 - 32; // see https://github.com/apache/doris/pull/11816
+    // Effective adaptive batch size byte budget.
+    size_t preferred_block_size_bytes = 8388608UL;
 
     TabletSchemaSPtr tablet_schema = nullptr;
     bool enable_unique_key_merge_on_write = false;
diff --git a/be/src/storage/olap_common.h b/be/src/storage/olap_common.h
index e09146d0cde0b3..9185ec262699bd 100644
--- a/be/src/storage/olap_common.h
+++ b/be/src/storage/olap_common.h
@@ -444,6 +444,9 @@ struct OlapReaderStatistics {
     int64_t segment_create_column_readers_timer_ns = 0;
     int64_t segment_load_index_timer_ns = 0;
 
+    int64_t adaptive_batch_size_predict_min_rows = INT64_MAX;
+    int64_t adaptive_batch_size_predict_max_rows = 0;
+
     int64_t variant_scan_sparse_column_timer_ns = 0;
     int64_t variant_scan_sparse_column_bytes = 0;
     int64_t variant_fill_path_from_sparse_column_timer_ns = 0;
diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp
index e4a2b45b21fdaf..94b76272a1019e 100644
--- a/be/src/storage/rowset/beta_rowset_reader.cpp
+++ b/be/src/storage/rowset/beta_rowset_reader.cpp
@@ -97,6 +97,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
 
     // convert RowsetReaderContext to StorageReadOptions
     _read_options.block_row_max = read_context->batch_size;
+    _read_options.preferred_block_size_bytes = read_context->preferred_block_size_bytes;
     _read_options.stats = _stats;
     _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt;
     _read_options.remaining_conjunct_roots = _read_context->remaining_conjunct_roots;
diff --git a/be/src/storage/rowset/beta_rowset_writer.cpp b/be/src/storage/rowset/beta_rowset_writer.cpp
index f41211feb7a4d7..f728e56e0ba811 100644
--- a/be/src/storage/rowset/beta_rowset_writer.cpp
+++ b/be/src/storage/rowset/beta_rowset_writer.cpp
@@ -360,72 +360,74 @@ Status BaseBetaRowsetWriter::_generate_delete_bitmap(int32_t segment_id) {
     // Submit the entire delete bitmap calculation process to thread pool for async execution
     // This avoids blocking memtable flush thread while waiting for file upload to complete
     // The process includes: file_writer->close(), _build_tmp, load_segments, and calc_delete_bitmap
-    return _calc_delete_bitmap_token->submit_func(
-            [this, segment_id, specified_rowsets = std::move(specified_rowsets)]() -> Status {
-                Status st = Status::OK();
-                // Step 1: Close file_writer (must be done before load_segments)
-                auto* file_writer = _seg_files.get(segment_id);
-                if (file_writer && file_writer->state() != io::FileWriter::State::CLOSED) {
-                    MonotonicStopWatch close_timer;
-                    close_timer.start();
-                    st = file_writer->close();
-                    close_timer.stop();
-
-                    auto close_time_ms = close_timer.elapsed_time_milliseconds();
-                    if (close_time_ms > 1000) {
-                        LOG(INFO) << "file_writer->close() took " << close_time_ms
-                                  << "ms for segment_id=" << segment_id
-                                  << ", tablet_id=" << _context.tablet_id
-                                  << ", rowset_id=" << _context.rowset_id;
-                    }
-                    if (!st.ok()) {
-                        return st;
-                    }
-                }
+    return _calc_delete_bitmap_token->submit_func([this, segment_id,
+                                                   specified_rowsets = std::move(
+                                                           specified_rowsets)]() -> Status {
+        Status st = Status::OK();
+        // Step 1: Close file_writer (must be done before load_segments)
+        auto* file_writer = _seg_files.get(segment_id);
+        if (file_writer && file_writer->state() != io::FileWriter::State::CLOSED) {
+            MonotonicStopWatch close_timer;
+            close_timer.start();
+            st = file_writer->close();
+            close_timer.stop();
+
+            auto close_time_ms = close_timer.elapsed_time_milliseconds();
+            if (close_time_ms > 1000) {
+                LOG(INFO) << "file_writer->close() took " << close_time_ms
+                          << "ms for segment_id=" << segment_id
+                          << ", tablet_id=" << _context.tablet_id
+                          << ", rowset_id=" << _context.rowset_id;
+            }
+            if (!st.ok()) {
+                return st;
+            }
+        }
 
-                OlapStopWatch watch;
-                // Step 2: Build tmp rowset (needs file_writer to be closed)
-                RowsetSharedPtr rowset_ptr;
-                st = _build_tmp(rowset_ptr);
-                if (!st.ok()) {
-                    return st;
-                }
+        OlapStopWatch watch;
+        // Step 2: Build tmp rowset (needs file_writer to be closed)
+        RowsetSharedPtr rowset_ptr;
+        st = _build_tmp(rowset_ptr);
+        if (!st.ok()) {
+            return st;
+        }
 
-                // Step 3: Load segments (needs file_writer to be closed and rowset to be built)
-                auto* beta_rowset = reinterpret_cast<BetaRowset*>(rowset_ptr.get());
-                std::vector<segment_v2::SegmentSharedPtr> segments;
-                st = beta_rowset->load_segments(segment_id, segment_id + 1, &segments);
-                if (!st.ok()) {
-                    return st;
-                }
+        DBUG_EXECUTE_IF("BaseBetaRowsetWriter::_generate_delete_bitmap.block_before_load_segments",
+                        DBUG_RUN_CALLBACK(segment_id));
 
-                // Step 4: Calculate delete bitmap
-                st = BaseTablet::calc_delete_bitmap(
-                        _context.tablet, rowset_ptr, segments, specified_rowsets,
-                        _context.mow_context->delete_bitmap, _context.mow_context->max_version,
-                        nullptr, nullptr, nullptr);
-                if (!st.ok()) {
-                    return st;
-                }
+        // Step 3: Load segments (needs file_writer to be closed and rowset to be built)
+        auto* beta_rowset = reinterpret_cast<BetaRowset*>(rowset_ptr.get());
+        std::vector<segment_v2::SegmentSharedPtr> segments;
+        st = beta_rowset->load_segments(segment_id, segment_id + 1, &segments);
+        if (!st.ok()) {
+            return st;
+        }
 
-                size_t total_rows =
-                        std::accumulate(segments.begin(), segments.end(), 0,
-                                        [](size_t sum, const segment_v2::SegmentSharedPtr& s) {
-                                            return sum += s->num_rows();
-                                        });
-                LOG(INFO) << "[Memtable Flush] construct delete bitmap tablet: "
-                          << _context.tablet->tablet_id()
-                          << ", rowset_ids: " << _context.mow_context->rowset_ids->size()
-                          << ", cur max_version: " << _context.mow_context->max_version
-                          << ", transaction_id: " << _context.mow_context->txn_id
-                          << ", delete_bitmap_count: "
-                          << _context.mow_context->delete_bitmap->get_delete_bitmap_count()
-                          << ", delete_bitmap_cardinality: "
-                          << _context.mow_context->delete_bitmap->cardinality()
-                          << ", cost: " << watch.get_elapse_time_us()
-                          << "(us), total rows: " << total_rows;
-                return Status::OK();
-            });
+        // Step 4: Calculate delete bitmap
+        st = BaseTablet::calc_delete_bitmap(_context.tablet, rowset_ptr, segments,
+                                            specified_rowsets, _context.mow_context->delete_bitmap,
+                                            _context.mow_context->max_version, nullptr, nullptr,
+                                            nullptr);
+        if (!st.ok()) {
+            return st;
+        }
+
+        size_t total_rows = std::accumulate(segments.begin(), segments.end(), 0,
+                                            [](size_t sum, const segment_v2::SegmentSharedPtr& s) {
+                                                return sum += s->num_rows();
+                                            });
+        LOG(INFO) << "[Memtable Flush] construct delete bitmap tablet: "
+                  << _context.tablet->tablet_id()
+                  << ", rowset_ids: " << _context.mow_context->rowset_ids->size()
+                  << ", cur max_version: " << _context.mow_context->max_version
+                  << ", transaction_id: " << _context.mow_context->txn_id
+                  << ", delete_bitmap_count: "
+                  << _context.mow_context->delete_bitmap->get_delete_bitmap_count()
+                  << ", delete_bitmap_cardinality: "
+                  << _context.mow_context->delete_bitmap->cardinality()
+                  << ", cost: " << watch.get_elapse_time_us() << "(us), total rows: " << total_rows;
+        return Status::OK();
+    });
 }
 
 Status BetaRowsetWriter::init(const RowsetWriterContext& rowset_writer_context) {
@@ -717,7 +719,12 @@ Status BetaRowsetWriter::_segcompaction_if_necessary() {
     } else {
         status = _check_segment_number_limit(_num_segcompacted);
     }
+
     if (status.ok() && (_num_segment - _segcompacted_point) >= config::segcompaction_batch_size) {
+        if (_calc_delete_bitmap_token != nullptr) {
+            status = _calc_delete_bitmap_token->wait();
+        }
+
         SegCompactionCandidatesSharedPtr segments;
         status = _find_longest_consecutive_small_segment(segments);
         if (LIKELY(status.ok()) && (!segments->empty())) {
diff --git a/be/src/storage/rowset/rowset_reader_context.h b/be/src/storage/rowset/rowset_reader_context.h
index e44733367c8441..c54a39f0a5557d 100644
--- a/be/src/storage/rowset/rowset_reader_context.h
+++ b/be/src/storage/rowset/rowset_reader_context.h
@@ -73,6 +73,9 @@ struct RowsetReaderContext {
     bool use_page_cache = false;
     int sequence_id_idx = -1;
     int batch_size = 1024;
+    // Effective adaptive batch size byte budget. 0 means disabled internally.
+    size_t preferred_block_size_bytes = 8388608UL;
+
     bool is_unique = false;
     //record row num merged in generic iterator
     uint64_t* merged_rows = nullptr;
diff --git a/be/src/storage/segment/adaptive_block_size_predictor.cpp b/be/src/storage/segment/adaptive_block_size_predictor.cpp
new file mode 100644
index 00000000000000..d8cc700f579853
--- /dev/null
+++ b/be/src/storage/segment/adaptive_block_size_predictor.cpp
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "storage/segment/adaptive_block_size_predictor.h"
+
+#include <algorithm>
+#include <cstddef>
+
+#include "core/block/block.h"
+
+namespace doris {
+AdaptiveBlockSizePredictor::AdaptiveBlockSizePredictor(size_t preferred_block_size_bytes,
+                                                       double metadata_hint_bytes_per_row,
+                                                       size_t probe_rows, size_t block_size_rows)
+        : _block_size_bytes(preferred_block_size_bytes),
+          _block_size_rows(block_size_rows),
+          _initial_probe_rows(probe_rows),
+          _metadata_hint_bytes_per_row(metadata_hint_bytes_per_row) {}
+
+void AdaptiveBlockSizePredictor::update(const Block& block) {
+    size_t rows = block.rows();
+    if (rows == 0) {
+        return;
+    }
+    double cur = static_cast<double>(block.bytes()) / static_cast<double>(rows);
+
+    if (!_has_history) {
+        _bytes_per_row = cur;
+        _has_history = true;
+    } else {
+        _bytes_per_row = kAlpha * _bytes_per_row + kBeta * cur;
+    }
+}
+
+size_t AdaptiveBlockSizePredictor::predict_next_rows() {
+    if (_block_size_bytes == 0) {
+        return _block_size_rows;
+    }
+
+    auto clamp_predicted_rows = [&](size_t predicted_rows) {
+        size_t clamped_rows = std::min(predicted_rows, _block_size_rows);
+        if (!_has_history) {
+            clamped_rows = std::min(clamped_rows, _initial_probe_rows);
+        }
+        return std::max(size_t(1), clamped_rows);
+    };
+
+    double estimated_bytes_per_row = 0.0;
+
+    if (!_has_history) {
+        if (_metadata_hint_bytes_per_row > 0.0) {
+            estimated_bytes_per_row = _metadata_hint_bytes_per_row;
+        } else {
+            return clamp_predicted_rows(_block_size_rows);
+        }
+    } else {
+        estimated_bytes_per_row = _bytes_per_row;
+    }
+
+    if (estimated_bytes_per_row <= 0.0) {
+        return clamp_predicted_rows(_block_size_rows);
+    }
+
+    auto predicted =
+            static_cast<size_t>(static_cast<double>(_block_size_bytes) / estimated_bytes_per_row);
+
+    return clamp_predicted_rows(predicted);
+}
+
+} // namespace doris
diff --git a/be/src/storage/segment/adaptive_block_size_predictor.h b/be/src/storage/segment/adaptive_block_size_predictor.h
new file mode 100644
index 00000000000000..e03f18c2a536d2
--- /dev/null
+++ b/be/src/storage/segment/adaptive_block_size_predictor.h
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "storage/olap_common.h"
+
+namespace doris {
+
+class Block;
+
+// Predicts the number of rows to read in the next batch so that the resulting Block stays close
+// to |preferred_block_size_bytes|.
+//
+// The predictor maintains an EWMA estimate of bytes-per-row for the whole block.  After each
+// successful batch the caller must invoke update(); before each batch the caller invokes
+// predict_next_rows() to obtain the recommended row count.
+//
+// Not thread-safe; must be used by a single thread per instance.
+class AdaptiveBlockSizePredictor {
+public:
+    static constexpr size_t kDefaultProbeRows = 4096;
+    static constexpr size_t kDefaultBlockSizeRows = 65535;
+
+    // Per-column metadata for computing segment-level hints.
+    struct ColumnMetadata {
+        ColumnId column_id;
+        uint64_t raw_bytes; // total raw data bytes for this column in the segment
+    };
+
+    // |preferred_block_size_bytes|: target total bytes of each output block chunk.
+    // |metadata_hint_bytes_per_row|: pre-computed conservative estimate from metadata (e.g.
+    //     segment footer or file statistics). 0.0 means no hint available.
+    // |probe_rows|: first-batch row cap before any real history is available.
+    // |block_size_rows|: hard maximum rows of each output block chunk.
+    AdaptiveBlockSizePredictor(size_t preferred_block_size_bytes,
+                               double metadata_hint_bytes_per_row,
+                               size_t probe_rows = kDefaultProbeRows,
+                               size_t block_size_rows = kDefaultBlockSizeRows);
+
+    // Update EWMA estimates from a completed batch.  Must be called only when block.rows() > 0
+    // and the batch returned Status::OK().
+    void update(const Block& block);
+
+    // Predict how many rows the next batch should read.
+    // Never exceeds |block_size_rows|; never returns less than 1.
+    // Uses pre-computed metadata hint for first-call estimate when no history exists.
+    // Does NOT modify internal state (_has_history is only flipped by update()).
+    size_t predict_next_rows();
+
+    bool has_history() const { return _has_history; }
+
+private:
+    // EWMA weight for historical estimate (0.9) and current sample (0.1).
+    static constexpr double kAlpha = 0.9;
+    static constexpr double kBeta = 0.1;
+
+    const size_t _block_size_bytes;
+    const size_t _block_size_rows;
+    const size_t _initial_probe_rows;
+
+    // EWMA estimate of total bytes per row across all output columns.
+    double _bytes_per_row = 0.0;
+
+    // Whether at least one update() has been called (i.e. we have real measured history).
+    bool _has_history = false;
+
+    // Cached conservative metadata estimate computed on the first predict_next_rows() call.
+    // Reused on subsequent first-round predictions (before _has_history is set) to avoid
+    // re-traversing the segment footer on every call.
+    double _metadata_hint_bytes_per_row = 0.0;
+
+#ifdef BE_TEST
+public:
+    double bytes_per_row_for_test() const { return _bytes_per_row; }
+    bool has_history_for_test() const { return _has_history; }
+    size_t probe_rows_for_test() const { return _initial_probe_rows; }
+    size_t block_size_rows_for_test() const { return _block_size_rows; }
+    static constexpr size_t default_probe_rows_for_test() { return kDefaultProbeRows; }
+    static constexpr size_t default_block_size_rows_for_test() { return kDefaultBlockSizeRows; }
+    void set_metadata_hint_for_test(double v) { _metadata_hint_bytes_per_row = v; }
+    void set_has_history_for_test(bool h, double bpr) {
+        _has_history = h;
+        _bytes_per_row = bpr;
+    }
+#endif
+};
+
+} // namespace doris
diff --git a/be/src/storage/segment/segment.cpp b/be/src/storage/segment/segment.cpp
index ec0b706bb54205..7563299a856826 100644
--- a/be/src/storage/segment/segment.cpp
+++ b/be/src/storage/segment/segment.cpp
@@ -29,6 +29,7 @@
 #include <utility>
 
 #include "cloud/config.h"
+#include "common/config.h"
 #include "common/exception.h"
 #include "common/logging.h"
 #include "common/status.h"
@@ -617,6 +618,22 @@ Status Segment::_create_column_meta(const SegmentFooterPB& footer) {
     // Initialize column meta accessor which internally maintains uid -> column_ordinal mapping.
     _column_meta_accessor = std::make_unique<ColumnMetaAccessor>();
     RETURN_IF_ERROR(_column_meta_accessor->init(footer, _file_reader));
+
+    if (config::enable_adaptive_batch_size) {
+        // Cache raw_data_bytes per column uid for adaptive batch size prediction.
+        // This runs under call_once, so no thread-safety concerns.
+        auto st = _column_meta_accessor->traverse_metas(footer, [this](const ColumnMetaPB& meta) {
+            if (meta.has_unique_id() && meta.unique_id() != -1 && meta.has_raw_data_bytes()) {
+                _column_uid_to_raw_bytes[meta.unique_id()] = meta.raw_data_bytes();
+            }
+        });
+
+        if (!st.ok()) {
+            LOG(WARNING) << "Failed to traverse column metas to cache raw_data_bytes, error: "
+                         << st.to_string();
+        }
+    }
+
     _column_reader_cache = std::make_unique<ColumnReaderCache>(
             _column_meta_accessor.get(), _tablet_schema, _file_reader, _num_rows,
             [this](std::shared_ptr<SegmentFooterPB>& footer_pb, OlapReaderStatistics* stats) {
diff --git a/be/src/storage/segment/segment.h b/be/src/storage/segment/segment.h
index eb23c74943713f..3eb3a018e42a2e 100644
--- a/be/src/storage/segment/segment.h
+++ b/be/src/storage/segment/segment.h
@@ -30,6 +30,7 @@
 #include <vector>
 
 #include "agent/be_exec_version_manager.h"
+#include "common/be_mock_util.h"
 #include "common/status.h" // Status
 #include "core/column/column.h"
 #include "core/data_type/data_type.h"
@@ -116,7 +117,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
 
     RowsetId rowset_id() const { return _rowset_id; }
 
-    uint32_t num_rows() const { return _num_rows; }
+    MOCK_FUNCTION uint32_t num_rows() const { return _num_rows; }
 
     // if variant_sparse_column_cache is nullptr, means the sparse column cache is not used
     Status new_column_iterator(const TabletColumn& tablet_column,
@@ -201,7 +202,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
         }
     }
 
-    const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
+    const TabletSchemaSPtr& tablet_schema() const { return _tablet_schema; }
 
     // get the column reader by tablet column, return NOT_FOUND if not found reader in this segment
     Status get_column_reader(const TabletColumn& col, std::shared_ptr<ColumnReader>* column_reader,
@@ -213,6 +214,13 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
 
     Status traverse_column_meta_pbs(const std::function<void(const ColumnMetaPB&)>& visitor);
 
+    // Returns the cached raw_data_bytes for the given column unique id, or 0 if not found.
+    // Data is populated during _create_column_meta (under call_once), so thread-safe after init.
+    uint64_t column_raw_data_bytes(int32_t column_uid) const {
+        auto it = _column_uid_to_raw_bytes.find(column_uid);
+        return it != _column_uid_to_raw_bytes.end() ? it->second : 0;
+    }
+
     static StoragePageCache::CacheKey get_segment_footer_cache_key(
             const io::FileReaderSPtr& file_reader);
 
@@ -287,6 +295,9 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
 
     std::weak_ptr<SegmentFooterPB> _footer_pb;
 
+    // Cached raw_data_bytes per column unique id, populated once in _create_column_meta().
+    std::unordered_map<int32_t, uint64_t> _column_uid_to_raw_bytes;
+
     // used to hold short key index page in memory
     PageHandle _sk_index_handle;
     // short key index decoder
diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp
index f7d0107db88322..e98535fcdb5eb7 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -347,6 +347,40 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
     return status;
 }
 
+std::unique_ptr<AdaptiveBlockSizePredictor> SegmentIterator::_make_block_size_predictor() const {
+    if (!config::enable_adaptive_batch_size || _opts.preferred_block_size_bytes == 0) {
+        return nullptr;
+    }
+
+    // Collect per-column raw byte metadata from the segment footer for the columns
+    // this iterator will actually output (defined by _schema, which is built from
+    // _opts.return_columns).
+    std::vector<AdaptiveBlockSizePredictor::ColumnMetadata> col_metadata;
+    uint32_t seg_rows = _segment->num_rows();
+    uint64_t total_raw_bytes = 0;
+    double metadata_hint_bytes_per_row = 0.0;
+    if (seg_rows > 0) {
+        const auto& ts = _segment->tablet_schema();
+        if (ts) {
+            for (ColumnId cid : _schema->column_ids()) {
+                if (static_cast<size_t>(cid) < ts->num_columns()) {
+                    int32_t uid = ts->column(cid).unique_id();
+                    uint64_t raw_bytes = _segment->column_raw_data_bytes(uid);
+                    if (uid >= 0 && raw_bytes > 0) {
+                        total_raw_bytes += raw_bytes;
+                    }
+                }
+            }
+            metadata_hint_bytes_per_row =
+                    static_cast<double>(total_raw_bytes) / static_cast<double>(seg_rows);
+        }
+    }
+
+    return std::make_unique<AdaptiveBlockSizePredictor>(
+            _opts.preferred_block_size_bytes, metadata_hint_bytes_per_row,
+            AdaptiveBlockSizePredictor::kDefaultProbeRows, _opts.block_row_max);
+}
+
 Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
     // get file handle from file descriptor of segment
     if (_inited) {
@@ -369,6 +403,10 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
     // Read options will not change, so that just resize here
     _block_rowids.resize(_opts.block_row_max);
 
+    // Adaptive batch size: snapshot the initial row limit and create predictor if enabled.
+    _initial_block_row_max = _opts.block_row_max;
+    _block_size_predictor = _make_block_size_predictor();
+
     _remaining_conjunct_roots = opts.remaining_conjunct_roots;
 
     if (_schema->rowid_col_idx() > 0) {
@@ -492,10 +530,14 @@ Status SegmentIterator::_lazy_init(Block* block) {
         _range_iter.reset(new BitmapRangeIterator(_row_bitmap));
     }
 
-    // If the row bitmap size is smaller than block_row_max, there's no need to reserve that many column rows.
-    auto nrows_reserve_limit = std::min(_row_bitmap.cardinality(), uint64_t(_opts.block_row_max));
+    // Reserve columns for _initial_block_row_max (the original max before any adaptive
+    // prediction) because the predictor may increase block_row_max on subsequent batches
+    // up to this ceiling. Using the current (possibly reduced) _opts.block_row_max would
+    // cause heap-buffer-overflow if a later prediction is larger.
+    auto nrows_reserve_limit =
+            std::min(_row_bitmap.cardinality(), uint64_t(_initial_block_row_max));
     if (_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval) {
-        _block_rowids.resize(_opts.block_row_max);
+        _block_rowids.resize(_initial_block_row_max);
     }
     _current_return_columns.resize(_schema->columns().size());
 
@@ -2510,6 +2552,28 @@ Status SegmentIterator::next_batch(Block* block) {
     _init_virtual_columns(block);
     auto status = [&]() {
         RETURN_IF_CATCH_EXCEPTION({
+            // Adaptive batch size: predict how many rows this batch should read.
+            if (_block_size_predictor) {
+                auto predicted = static_cast<uint32_t>(_block_size_predictor->predict_next_rows());
+                _opts.block_row_max = std::min(predicted, _initial_block_row_max);
+                _opts.stats->adaptive_batch_size_predict_min_rows =
+                        std::min(_opts.stats->adaptive_batch_size_predict_min_rows,
+                                 static_cast<int64_t>(predicted));
+                _opts.stats->adaptive_batch_size_predict_max_rows =
+                        std::max(_opts.stats->adaptive_batch_size_predict_max_rows,
+                                 static_cast<int64_t>(predicted));
+            } else {
+                // No predictor — record the fixed batch size using min/max so we don't
+                // clobber values already accumulated by other segment iterators that
+                // share the same OlapReaderStatistics.
+                _opts.stats->adaptive_batch_size_predict_min_rows =
+                        std::min(_opts.stats->adaptive_batch_size_predict_min_rows,
+                                 static_cast<int64_t>(_opts.block_row_max));
+                _opts.stats->adaptive_batch_size_predict_max_rows =
+                        std::max(_opts.stats->adaptive_batch_size_predict_max_rows,
+                                 static_cast<int64_t>(_opts.block_row_max));
+            }
+
             auto res = _next_batch_internal(block);
 
             if (res.is<END_OF_FILE>()) {
@@ -2555,6 +2619,13 @@ Status SegmentIterator::next_batch(Block* block) {
 
             RETURN_IF_ERROR(block->check_type_and_column());
 
+            // Adaptive batch size: update EWMA estimate from the completed batch.
+            // block->bytes() is accurate here: predicates have been applied and non-predicate
+            // columns have been filled for surviving rows by _next_batch_internal.
+            if (_block_size_predictor && block->rows() > 0) {
+                _block_size_predictor->update(*block);
+            }
+
             return Status::OK();
         });
     }();
diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h
index 142d252af138ee..1d804eb106b5c3 100644
--- a/be/src/storage/segment/segment_iterator.h
+++ b/be/src/storage/segment/segment_iterator.h
@@ -53,6 +53,7 @@
 #include "storage/predicate/column_predicate.h"
 #include "storage/row_cursor.h"
 #include "storage/schema.h"
+#include "storage/segment/adaptive_block_size_predictor.h"
 #include "storage/segment/common.h"
 #include "storage/segment/segment.h"
 #include "util/slice.h"
@@ -405,6 +406,15 @@ class SegmentIterator : public RowwiseIterator {
     bool _inited;
 
     StorageReadOptions _opts;
+    // Adaptive batch size predictor; null when the feature is disabled.
+    std::unique_ptr<AdaptiveBlockSizePredictor> _block_size_predictor;
+    // Build the AdaptiveBlockSizePredictor for this segment based on segment footer
+    // metadata for the projected output columns. Returns nullptr if the feature is
+    // disabled or the byte budget is non-positive.
+    std::unique_ptr<AdaptiveBlockSizePredictor> _make_block_size_predictor() const;
+    // Snapshot of _opts.block_row_max at init time; used as the hard upper bound so that
+    // dynamic adjustments never exceed the capacity of pre-allocated buffers.
+    uint32_t _initial_block_row_max = 0;
     // make a copy of `_opts.column_predicates` in order to make local changes
     std::vector<std::shared_ptr<ColumnPredicate>> _col_predicates;
     VExprContextSPtrs _common_expr_ctxs_push_down;
diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp
index 87079069f553c3..a87e3a75656f87 100644
--- a/be/src/storage/tablet/base_tablet.cpp
+++ b/be/src/storage/tablet/base_tablet.cpp
@@ -25,6 +25,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <iterator>
+#include <queue>
 #include <random>
 #include <shared_mutex>
 
@@ -73,6 +74,103 @@ bvar::LatencyRecorder g_tablet_update_delete_bitmap_latency("doris_pk", "update_
 
 static bvar::Adder<size_t> g_total_tablet_num("doris_total_tablet_num");
 
+struct CompactionOutputRowSource {
+    Version version;
+    RowLocation src;
+    bool valid = false;
+};
+
+struct CompactionOutputPkEntry {
+    std::string unique_key;
+    std::string encoded_seq_value;
+    RowLocation dst;
+    CompactionOutputRowSource source;
+};
+
+struct CompactionOutputPkScanner {
+    uint32_t segment_id = 0;
+    int64_t remaining = 0;
+    uint32_t next_ordinal = 0;
+    std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+    DataTypePtr index_type;
+    CompactionOutputPkEntry current;
+};
+
+bool is_newer_compaction_output_row(const CompactionOutputPkEntry& lhs,
+                                    const CompactionOutputPkEntry& rhs) {
+    if (lhs.encoded_seq_value != rhs.encoded_seq_value) {
+        return lhs.encoded_seq_value > rhs.encoded_seq_value;
+    }
+    if (lhs.source.version.second != rhs.source.version.second) {
+        return lhs.source.version.second > rhs.source.version.second;
+    }
+    if (lhs.source.version.first != rhs.source.version.first) {
+        return lhs.source.version.first > rhs.source.version.first;
+    }
+    if (lhs.source.src.segment_id != rhs.source.src.segment_id) {
+        return lhs.source.src.segment_id > rhs.source.src.segment_id;
+    }
+    return lhs.source.src.row_id > rhs.source.src.row_id;
+}
+
+Status parse_compaction_output_pk_entry(
+        const Slice& encoded_key, const RowsetId& output_rowset_id, uint32_t output_segment_id,
+        size_t seq_col_length,
+        const std::vector<std::vector<CompactionOutputRowSource>>& output_row_sources,
+        CompactionOutputPkEntry* entry) {
+    size_t rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH;
+    if (UNLIKELY(encoded_key.get_size() < seq_col_length + rowid_length)) {
+        return Status::InternalError("invalid cluster-key MOW primary key size: {}",
+                                     encoded_key.get_size());
+    }
+    auto unique_key_length = encoded_key.get_size() - seq_col_length - rowid_length;
+    entry->unique_key.assign(encoded_key.get_data(), unique_key_length);
+    entry->encoded_seq_value.assign(encoded_key.get_data() + unique_key_length, seq_col_length);
+
+    Slice rowid_slice(encoded_key.get_data() + unique_key_length + seq_col_length + 1,
+                      rowid_length - 1);
+    const auto* type_info = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>();
+    const auto* rowid_coder = get_key_coder(type_info->type());
+    uint32_t row_id = 0;
+    RETURN_IF_ERROR(rowid_coder->decode_ascending(&rowid_slice, rowid_length,
+                                                  reinterpret_cast<uint8_t*>(&row_id)));
+
+    entry->dst = RowLocation(output_rowset_id, output_segment_id, row_id);
+    if (UNLIKELY(output_segment_id >= output_row_sources.size() ||
+                 row_id >= output_row_sources[output_segment_id].size())) {
+        return Status::InternalError(
+                "invalid rowid in cluster-key MOW primary key, segment_id={}, row_id={}",
+                output_segment_id, row_id);
+    }
+    entry->source = output_row_sources[output_segment_id][row_id];
+    if (UNLIKELY(!entry->source.valid)) {
+        return Status::InternalError(
+                "missing rowid conversion source for output rowset={}, segment_id={}, row_id={}",
+                output_rowset_id.to_string(), output_segment_id, row_id);
+    }
+    return Status::OK();
+}
+
+Status load_next_compaction_output_pk_entry(
+        const RowsetId& output_rowset_id, size_t seq_col_length,
+        const std::vector<std::vector<CompactionOutputRowSource>>& output_row_sources,
+        CompactionOutputPkScanner* scanner) {
+    if (scanner->remaining <= 0) {
+        return Status::OK();
+    }
+
+    auto index_column = scanner->index_type->create_column();
+    size_t num_read = 1;
+    RETURN_IF_ERROR(scanner->iter->seek_to_ordinal(scanner->next_ordinal++));
+    RETURN_IF_ERROR(scanner->iter->next_batch(&num_read, index_column));
+    DCHECK_EQ(1, num_read);
+    --scanner->remaining;
+
+    Slice encoded_key(index_column->get_data_at(0).data, index_column->get_data_at(0).size);
+    return parse_compaction_output_pk_entry(encoded_key, output_rowset_id, scanner->segment_id,
+                                            seq_col_length, output_row_sources, &scanner->current);
+}
+
 Status _get_segment_column_iterator(const BetaRowsetSharedPtr& rowset, uint32_t segid,
                                     const TabletColumn& target_column,
                                     SegmentCacheHandle* segment_cache_handle,
@@ -1645,6 +1743,124 @@ void BaseTablet::calc_compaction_output_rowset_delete_bitmap(
     }
 }
 
+Status BaseTablet::calc_compaction_output_rowset_internal_delete_bitmap(
+        const std::vector<RowsetSharedPtr>& input_rowsets, RowsetSharedPtr output_rowset,
+        const RowIdConversion& rowid_conversion, DeleteBitmap* output_rowset_delete_bitmap) {
+    DCHECK(!tablet_schema()->cluster_key_uids().empty());
+    DCHECK(output_rowset != nullptr);
+
+    std::vector<segment_v2::SegmentSharedPtr> output_segments;
+    RETURN_IF_ERROR(
+            std::dynamic_pointer_cast<BetaRowset>(output_rowset)->load_segments(&output_segments));
+
+    std::vector<std::vector<CompactionOutputRowSource>> output_row_sources(output_segments.size());
+    for (size_t segment_id = 0; segment_id < output_segments.size(); ++segment_id) {
+        output_row_sources[segment_id].resize(output_segments[segment_id]->num_rows());
+    }
+
+    std::map<RowsetId, Version> input_rowset_versions;
+    for (const auto& rowset : input_rowsets) {
+        input_rowset_versions.emplace(rowset->rowset_id(), rowset->version());
+    }
+
+    const auto& rowid_conversion_map = rowid_conversion.get_rowid_conversion_map();
+    for (uint32_t source_segment_index = 0; source_segment_index < rowid_conversion_map.size();
+         ++source_segment_index) {
+        auto source_segment = rowid_conversion.get_segment_by_id(source_segment_index);
+        auto version_iter = input_rowset_versions.find(source_segment.first);
+        if (UNLIKELY(version_iter == input_rowset_versions.end())) {
+            return Status::InternalError("missing input rowset version for rowset_id={}",
+                                         source_segment.first.to_string());
+        }
+        const auto& source_rowid_map = rowid_conversion_map[source_segment_index];
+        for (uint32_t source_rowid = 0; source_rowid < source_rowid_map.size(); ++source_rowid) {
+            const auto& [dst_segment_id, dst_rowid] = source_rowid_map[source_rowid];
+            if (dst_segment_id == UINT32_MAX && dst_rowid == UINT32_MAX) {
+                continue;
+            }
+            if (UNLIKELY(dst_segment_id >= output_row_sources.size() ||
+                         dst_rowid >= output_row_sources[dst_segment_id].size())) {
+                return Status::InternalError(
+                        "invalid rowid conversion destination, rowset_id={}, segment_id={}, "
+                        "row_id={}",
+                        output_rowset->rowset_id().to_string(), dst_segment_id, dst_rowid);
+            }
+            output_row_sources[dst_segment_id][dst_rowid] = {
+                    .version = version_iter->second,
+                    .src = RowLocation(source_segment.first, source_segment.second, source_rowid),
+                    .valid = true};
+        }
+    }
+
+    size_t seq_col_length = 0;
+    if (tablet_schema()->has_sequence_col()) {
+        seq_col_length = tablet_schema()->column(tablet_schema()->sequence_col_idx()).length() + 1;
+    }
+
+    struct ScannerComparator {
+        bool operator()(const CompactionOutputPkScanner* lhs,
+                        const CompactionOutputPkScanner* rhs) const {
+            return lhs->current.unique_key > rhs->current.unique_key;
+        }
+    };
+    std::priority_queue<CompactionOutputPkScanner*, std::vector<CompactionOutputPkScanner*>,
+                        ScannerComparator>
+            scanners_heap;
+    std::vector<std::unique_ptr<CompactionOutputPkScanner>> scanners;
+    scanners.reserve(output_segments.size());
+
+    for (uint32_t segment_id = 0; segment_id < output_segments.size(); ++segment_id) {
+        auto& segment = output_segments[segment_id];
+        RETURN_IF_ERROR(segment->load_pk_index_and_bf(nullptr));
+        const auto* pk_index = segment->get_primary_key_index();
+        DCHECK(pk_index != nullptr);
+        if (pk_index->num_rows() == 0) {
+            continue;
+        }
+
+        auto scanner = std::make_unique<CompactionOutputPkScanner>();
+        scanner->segment_id = segment_id;
+        scanner->remaining = pk_index->num_rows();
+        scanner->index_type =
+                DataTypeFactory::instance().create_data_type(pk_index->type_info()->type(), 1, 0);
+        RETURN_IF_ERROR(pk_index->new_iterator(&scanner->iter, nullptr));
+        RETURN_IF_ERROR(load_next_compaction_output_pk_entry(
+                output_rowset->rowset_id(), seq_col_length, output_row_sources, scanner.get()));
+        scanners_heap.push(scanner.get());
+        scanners.push_back(std::move(scanner));
+    }
+
+    bool has_current_key = false;
+    CompactionOutputPkEntry current_visible_entry;
+    const auto delete_version = output_rowset->version().second;
+    while (!scanners_heap.empty()) {
+        auto* scanner = scanners_heap.top();
+        scanners_heap.pop();
+        auto entry = scanner->current;
+
+        if (!has_current_key || current_visible_entry.unique_key != entry.unique_key) {
+            current_visible_entry = std::move(entry);
+            has_current_key = true;
+        } else if (is_newer_compaction_output_row(entry, current_visible_entry)) {
+            output_rowset_delete_bitmap->add({current_visible_entry.dst.rowset_id,
+                                              current_visible_entry.dst.segment_id, delete_version},
+                                             current_visible_entry.dst.row_id);
+            current_visible_entry = std::move(entry);
+        } else {
+            output_rowset_delete_bitmap->add(
+                    {entry.dst.rowset_id, entry.dst.segment_id, delete_version}, entry.dst.row_id);
+        }
+
+        if (scanner->remaining > 0) {
+            RETURN_IF_ERROR(load_next_compaction_output_pk_entry(
+                    output_rowset->rowset_id(), seq_col_length, output_row_sources, scanner));
+            scanners_heap.push(scanner);
+        }
+    }
+
+    return Status::OK();
+}
+
 Status BaseTablet::check_rowid_conversion(
         RowsetSharedPtr dst_rowset,
         const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>&
diff --git a/be/src/storage/tablet/base_tablet.h b/be/src/storage/tablet/base_tablet.h
index b98a89eb734f2c..7e75e320aca930 100644
--- a/be/src/storage/tablet/base_tablet.h
+++ b/be/src/storage/tablet/base_tablet.h
@@ -261,6 +261,10 @@ class BaseTablet : public std::enable_shared_from_this<BaseTablet> {
             std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map,
             const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap);
 
+    Status calc_compaction_output_rowset_internal_delete_bitmap(
+            const std::vector<RowsetSharedPtr>& input_rowsets, RowsetSharedPtr output_rowset,
+            const RowIdConversion& rowid_conversion, DeleteBitmap* output_rowset_delete_bitmap);
+
     Status check_rowid_conversion(
             RowsetSharedPtr dst_rowset,
             const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>&
diff --git a/be/src/storage/tablet/tablet_reader.h b/be/src/storage/tablet/tablet_reader.h
index 6f6683bfaa217a..43da5879874585 100644
--- a/be/src/storage/tablet/tablet_reader.h
+++ b/be/src/storage/tablet/tablet_reader.h
@@ -133,7 +133,7 @@ class TabletReader {
         bool direct_mode = false;
         bool aggregation = false;
         // for compaction, schema_change, check_sum: we don't use page cache
-        // for query and config::disable_storage_page_cache is false, we use page cache
+        // for query, when the BE config disable_storage_page_cache is false, we use page cache
         bool use_page_cache = false;
         Version version = Version(-1, 0);
 
@@ -246,6 +246,17 @@ class TabletReader {
 
     int batch_size() const { return _reader_context.batch_size; }
 
+    size_t batch_max_rows() const { return _reader_context.batch_size; }
+
+    void set_preferred_block_size_bytes(size_t bytes) {
+        _reader_context.preferred_block_size_bytes = bytes;
+    }
+
+    // Returns the preferred output block byte budget. Subclasses that support adaptive batch size
+    // should override this; the base returns 0 (disabled) so VCollectIterator degrades safely
+    // when called through a TabletReader* that has not been configured.
+    virtual size_t preferred_block_size_bytes() const { return 0; }
+
     const OlapReaderStatistics& stats() const { return _stats; }
     OlapReaderStatistics* mutable_stats() { return &_stats; }
 
diff --git a/be/src/util/block_budget.h b/be/src/util/block_budget.h
new file mode 100644
index 00000000000000..391a213107d5ca
--- /dev/null
+++ b/be/src/util/block_budget.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+
+namespace doris {
+
+// Lightweight value type that captures the dual row+byte budget for block
+// output sizing.  Every operator that needs to respect the adaptive batch
+// size feature can construct a BlockBudget from RuntimeState's batch_size()
+// and preferred_block_size_bytes() and use the helper methods instead of
+// reimplementing the same row/byte logic inline.
+//
+// Typical usage:
+//   BlockBudget budget(state->batch_size(), state->preferred_block_size_bytes());
+//   size_t eff = budget.effective_max_rows(estimated_row_bytes);
+//   while (budget.within_budget(block.rows(), block.bytes())) { ... }
+//
+struct BlockBudget {
+    size_t max_rows;
+    size_t max_bytes; // byte budget from preferred_block_size_bytes(), 0 means disabled
+
+    BlockBudget(size_t max_rows_, size_t max_bytes_) : max_rows(max_rows_), max_bytes(max_bytes_) {}
+
+    // Pre-compute effective row limit from an estimated average row byte size.
+    // When max_bytes == 0 or estimated_row_bytes == 0, returns max_rows.
+    // Always returns at least 1.
+    size_t effective_max_rows(size_t estimated_row_bytes) const {
+        if (max_bytes > 0 && estimated_row_bytes > 0) {
+            size_t bytes_limit = max_bytes / estimated_row_bytes;
+            return std::max(size_t(1), std::min(max_rows, bytes_limit));
+        }
+        return max_rows;
+    }
+
+    // Check if a block with the given rows/bytes is still within budget.
+    // Use this in loop *continuation* conditions (while/for).
+    bool within_budget(size_t rows, size_t bytes) const {
+        return rows < max_rows && (max_bytes == 0 || bytes < max_bytes);
+    }
+
+    // Check if a block with the given rows/bytes has exceeded the budget.
+    // Use this in loop *break* conditions.
+    bool exceeded(size_t rows, size_t bytes) const {
+        return rows >= max_rows || (max_bytes > 0 && bytes >= max_bytes);
+    }
+
+    // Compute how many more rows can be added to a block that currently
+    // has current_rows rows and current_bytes bytes, respecting both the
+    // row cap and the byte budget.
+    // The 3-arg overload accepts an explicit estimated_row_bytes (useful when
+    // the estimate comes from a different source, e.g. a child block).
+    // The 2-arg overload derives the estimate from current_bytes / current_rows.
+    // Returns 0 when the block is already at or over budget.
+    size_t remaining_rows(size_t current_rows, size_t current_bytes,
+                          size_t estimated_row_bytes) const {
+        size_t row_capacity = (current_rows < max_rows) ? (max_rows - current_rows) : 0;
+        if (max_bytes > 0 && estimated_row_bytes > 0) {
+            if (current_bytes >= max_bytes) {
+                return 0;
+            }
+            size_t byte_capacity = (max_bytes - current_bytes) / estimated_row_bytes;
+            row_capacity = std::min(row_capacity, byte_capacity);
+        }
+        return row_capacity;
+    }
+
+    size_t remaining_rows(size_t current_rows, size_t current_bytes) const {
+        size_t estimated =
+                (current_rows > 0 && current_bytes > 0) ? (current_bytes / current_rows) : 0;
+        return remaining_rows(current_rows, current_bytes, estimated);
+    }
+};
+
+} // namespace doris
diff --git a/be/test/common/block_budget_test.cpp b/be/test/common/block_budget_test.cpp
new file mode 100644
index 00000000000000..6eb6a7be7a57e0
--- /dev/null
+++ b/be/test/common/block_budget_test.cpp
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/block_budget.h"
+
+#include <gtest/gtest.h>
+
+namespace doris {
+
+class BlockBudgetTest : public ::testing::Test {};
+
+// ── effective_max_rows ──────────────────────────────────────────────────────
+
+TEST_F(BlockBudgetTest, EffectiveMaxRowsNoByteBudget) {
+    BlockBudget b(4096, 0);
+    EXPECT_EQ(b.effective_max_rows(100), 4096);
+    EXPECT_EQ(b.effective_max_rows(0), 4096);
+}
+
+TEST_F(BlockBudgetTest, EffectiveMaxRowsZeroEstimate) {
+    BlockBudget b(4096, 8 * 1024 * 1024);
+    // When estimate is 0, fall back to max_rows.
+    EXPECT_EQ(b.effective_max_rows(0), 4096);
+}
+
+TEST_F(BlockBudgetTest, EffectiveMaxRowsByteLimited) {
+    // 8 MB budget, 10 KB per row → 819 rows (< 4096 max_rows)
+    BlockBudget b(4096, 8 * 1024 * 1024);
+    EXPECT_EQ(b.effective_max_rows(10 * 1024), 819);
+}
+
+TEST_F(BlockBudgetTest, EffectiveMaxRowsRowLimited) {
+    // 8 MB budget, 10 bytes per row → 838860 rows, but max_rows = 4096
+    BlockBudget b(4096, 8 * 1024 * 1024);
+    EXPECT_EQ(b.effective_max_rows(10), 4096);
+}
+
+TEST_F(BlockBudgetTest, EffectiveMaxRowsReturnsAtLeastOne) {
+    // Huge rows: 100 MB per row, 8 MB budget → 0, but clamped to 1
+    BlockBudget b(4096, 8 * 1024 * 1024);
+    EXPECT_EQ(b.effective_max_rows(100 * 1024 * 1024), 1);
+}
+
+// ── within_budget / exceeded ────────────────────────────────────────────────
+
+TEST_F(BlockBudgetTest, WithinBudgetNoByteBudget) {
+    BlockBudget b(100, 0);
+    EXPECT_TRUE(b.within_budget(0, 0));
+    EXPECT_TRUE(b.within_budget(99, 999999999));
+    EXPECT_FALSE(b.within_budget(100, 0));
+    EXPECT_FALSE(b.within_budget(200, 0));
+}
+
+TEST_F(BlockBudgetTest, WithinBudgetWithByteBudget) {
+    BlockBudget b(100, 1000);
+    EXPECT_TRUE(b.within_budget(50, 500));    // both under
+    EXPECT_FALSE(b.within_budget(100, 500));  // rows hit
+    EXPECT_FALSE(b.within_budget(50, 1000));  // bytes hit
+    EXPECT_FALSE(b.within_budget(100, 1000)); // both hit
+}
+
+TEST_F(BlockBudgetTest, ExceededIsInverseOfWithinBudget) {
+    BlockBudget b(100, 1000);
+    // Note: exceeded uses >=, within_budget uses <, so they should be
+    // perfect logical inverses.
+    for (size_t r : {0, 50, 99, 100, 200}) {
+        for (size_t bytes : {0, 500, 999, 1000, 2000}) {
+            EXPECT_EQ(b.exceeded(r, bytes), !b.within_budget(r, bytes))
+                    << "r=" << r << " bytes=" << bytes;
+        }
+    }
+}
+
+// ── remaining_rows ──────────────────────────────────────────────────────────
+
+TEST_F(BlockBudgetTest, RemainingRowsNoByteBudget) {
+    BlockBudget b(100, 0);
+    EXPECT_EQ(b.remaining_rows(0, 0), 100);
+    EXPECT_EQ(b.remaining_rows(60, 9999), 40);
+    EXPECT_EQ(b.remaining_rows(100, 0), 0);
+    EXPECT_EQ(b.remaining_rows(200, 0), 0);
+}
+
+TEST_F(BlockBudgetTest, RemainingRowsByteLimited) {
+    // max_rows=100, max_bytes=1000, current: 50 rows, 600 bytes
+    // avg = 12 bytes/row, byte_capacity = (1000-600)/12 = 33
+    // row_capacity = 100 - 50 = 50
+    // result = min(50, 33) = 33
+    BlockBudget b(100, 1000);
+    EXPECT_EQ(b.remaining_rows(50, 600), 33);
+}
+
+TEST_F(BlockBudgetTest, RemainingRowsAlreadyOverByteBudget) {
+    BlockBudget b(100, 1000);
+    EXPECT_EQ(b.remaining_rows(50, 1000), 0);
+    EXPECT_EQ(b.remaining_rows(50, 2000), 0);
+}
+
+TEST_F(BlockBudgetTest, RemainingRowsZeroCurrentRows) {
+    // No rows yet → can't estimate avg_row_bytes, fall back to row capacity
+    BlockBudget b(100, 1000);
+    EXPECT_EQ(b.remaining_rows(0, 0), 100);
+}
+
+TEST_F(BlockBudgetTest, RemainingRowsZeroCurrentBytes) {
+    // Has rows but zero bytes → can't estimate avg, fall back to row capacity
+    BlockBudget b(100, 1000);
+    EXPECT_EQ(b.remaining_rows(50, 0), 50);
+}
+
+} // namespace doris
diff --git a/be/test/exec/pipeline/local_exchanger_test.cpp b/be/test/exec/pipeline/local_exchanger_test.cpp
index 2a1bb3ddfc2785..68e42efe35c0d4 100644
--- a/be/test/exec/pipeline/local_exchanger_test.cpp
+++ b/be/test/exec/pipeline/local_exchanger_test.cpp
@@ -134,7 +134,8 @@ TEST_F(LocalExchangerTest, ShuffleExchanger) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
@@ -362,7 +363,8 @@ TEST_F(LocalExchangerTest, PassthroughExchanger) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
@@ -562,7 +564,8 @@ TEST_F(LocalExchangerTest, PassToOneExchanger) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
@@ -770,7 +773,8 @@ TEST_F(LocalExchangerTest, BroadcastExchanger) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
@@ -973,7 +977,8 @@ TEST_F(LocalExchangerTest, AdaptivePassthroughExchanger) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
@@ -1208,7 +1213,8 @@ TEST_F(LocalExchangerTest, TestShuffleExchangerWrongMap) {
         auto* get_block_failed_counter =
                 ADD_TIMER(profile, "_get_block_failed_counter" + std::to_string(i));
         auto* copy_data_timer = ADD_TIMER(profile, "_copy_data_timer" + std::to_string(i));
-        _local_states[i] = std::make_unique<LocalExchangeSourceLocalState>(nullptr, nullptr);
+        _local_states[i] =
+                std::make_unique<LocalExchangeSourceLocalState>(_runtime_state.get(), nullptr);
         _local_states[i]->_exchanger = shared_state->exchanger.get();
         _local_states[i]->_get_block_failed_counter = get_block_failed_counter;
         _local_states[i]->_copy_data_timer = copy_data_timer;
diff --git a/be/test/format/csv/csv_reader_test.cpp b/be/test/format/csv/csv_reader_test.cpp
new file mode 100644
index 00000000000000..498ce615302eef
--- /dev/null
+++ b/be/test/format/csv/csv_reader_test.cpp
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/csv/csv_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris {
+
+// Test that set_batch_size stores the value correctly.
+TEST(CsvReaderSetBatchSizeTest, SetBatchSizeStoresValue) {
+    TFileScanRangeParams params;
+    params.format_type = TFileFormatType::FORMAT_CSV_PLAIN;
+    params.__isset.file_attributes = true;
+    params.file_attributes.__isset.text_params = true;
+    params.file_attributes.text_params.column_separator = ",";
+    params.file_attributes.text_params.line_delimiter = "\n";
+
+    TFileRangeDesc range;
+    range.path = "/nonexistent/test.csv";
+    range.start_offset = 0;
+    range.size = 0;
+
+    auto runtime_state = std::make_unique<MockRuntimeState>();
+
+    std::vector<SlotDescriptor*> file_slot_descs;
+    auto reader = CsvReader::create_unique(runtime_state.get(), nullptr, nullptr, params, range,
+                                           file_slot_descs, runtime_state->batch_size(), nullptr);
+
+    // Default: _batch_size should be 0 (not set)
+    // After set_batch_size, it should store the value
+    reader->set_batch_size(128);
+    // We can only verify indirectly that it was stored; the value is used
+    // inside get_next_block(). Since we can't call get_next_block without
+    // a fully initialized reader, we verify the interface doesn't crash.
+
+    reader->set_batch_size(256);
+    // Calling set_batch_size multiple times should be safe.
+
+    reader->set_batch_size(0);
+    // Setting to 0 should revert to default behavior.
+}
+
+// Test that set_batch_size is callable via the GenericReader interface.
+TEST(CsvReaderSetBatchSizeTest, SetBatchSizeViaGenericInterface) {
+    TFileScanRangeParams params;
+    params.format_type = TFileFormatType::FORMAT_CSV_PLAIN;
+    params.__isset.file_attributes = true;
+    params.file_attributes.__isset.text_params = true;
+    params.file_attributes.text_params.column_separator = ",";
+    params.file_attributes.text_params.line_delimiter = "\n";
+
+    TFileRangeDesc range;
+    range.path = "/nonexistent/test.csv";
+    range.start_offset = 0;
+    range.size = 0;
+
+    auto runtime_state = std::make_unique<MockRuntimeState>();
+
+    std::vector<SlotDescriptor*> file_slot_descs;
+    auto reader = CsvReader::create_unique(runtime_state.get(), nullptr, nullptr, params, range,
+                                           file_slot_descs, runtime_state->batch_size(), nullptr);
+
+    // Access through base class pointer — this is how FileScanner calls it.
+    GenericReader* base_reader = reader.get();
+    base_reader->set_batch_size(128);
+    base_reader->set_batch_size(4096);
+}
+
+} // namespace doris
diff --git a/be/test/format/json/json_reader_test.cpp b/be/test/format/json/json_reader_test.cpp
new file mode 100644
index 00000000000000..920d3ea0f9f041
--- /dev/null
+++ b/be/test/format/json/json_reader_test.cpp
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "format/json/new_json_reader.h"
+
+namespace doris {
+
+static constexpr size_t kDefaultBatchSize = 4064;
+
+// Test that set_batch_size stores the value correctly.
+TEST(NewJsonReaderSetBatchSizeTest, SetBatchSizeStoresValue) {
+    TFileScanRangeParams params;
+    params.format_type = TFileFormatType::FORMAT_JSON;
+    params.__isset.file_attributes = true;
+    params.file_attributes.__isset.text_params = true;
+    params.file_attributes.text_params.line_delimiter = "\n";
+
+    TFileRangeDesc range;
+    range.path = "/nonexistent/test.json";
+    range.start_offset = 0;
+    range.size = 0;
+
+    std::vector<SlotDescriptor*> file_slot_descs;
+    // Use the second constructor (profile, params, range, file_slot_descs, io_ctx)
+    // to avoid the first constructor's ADD_TIMER(_profile, ...) which crashes on nullptr.
+    auto reader = NewJsonReader::create_unique(nullptr, params, range, file_slot_descs,
+                                               kDefaultBatchSize, nullptr);
+
+    // Default: _batch_size is initialized to _MIN_BATCH_SIZE.
+    EXPECT_EQ(reader->get_batch_size(), 4064U);
+
+    // After set_batch_size, it should store the value (clamped to >=_MIN_BATCH_SIZE).
+    reader->set_batch_size(8192);
+    EXPECT_EQ(reader->get_batch_size(), 8192U);
+
+    // Calling set_batch_size multiple times should update the value.
+    reader->set_batch_size(16384);
+    EXPECT_EQ(reader->get_batch_size(), 16384U);
+
+    // Setting below _MIN_BATCH_SIZE (or 0) clamps to 1 so the
+    // reader never spins on empty blocks.
+    reader->set_batch_size(0);
+    EXPECT_EQ(reader->get_batch_size(), 1UL);
+}
+
+// Test that set_batch_size is callable via the GenericReader interface.
+TEST(NewJsonReaderSetBatchSizeTest, SetBatchSizeViaGenericInterface) {
+    TFileScanRangeParams params;
+    params.format_type = TFileFormatType::FORMAT_JSON;
+    params.__isset.file_attributes = true;
+    params.file_attributes.__isset.text_params = true;
+    params.file_attributes.text_params.line_delimiter = "\n";
+
+    TFileRangeDesc range;
+    range.path = "/nonexistent/test.json";
+    range.start_offset = 0;
+    range.size = 0;
+
+    std::vector<SlotDescriptor*> file_slot_descs;
+    // Use the second constructor to avoid nullptr profile crash in ADD_TIMER.
+    auto reader = NewJsonReader::create_unique(nullptr, params, range, file_slot_descs,
+                                               kDefaultBatchSize, nullptr);
+
+    // Access through base class pointer — this is how FileScanner calls it.
+    GenericReader* base_reader = reader.get();
+    base_reader->set_batch_size(8192);
+    EXPECT_EQ(base_reader->get_batch_size(), 8192U);
+    base_reader->set_batch_size(4096);
+    EXPECT_EQ(base_reader->get_batch_size(), 4096U);
+}
+
+} // namespace doris
diff --git a/be/test/format/orc/orc_convert_dict_test.cpp b/be/test/format/orc/orc_convert_dict_test.cpp
index 0e64590e16014c..3a4333f5f27c49 100644
--- a/be/test/format/orc/orc_convert_dict_test.cpp
+++ b/be/test/format/orc/orc_convert_dict_test.cpp
@@ -81,7 +81,7 @@ TEST_F(OrcReaderConvertDictTest, ConvertDictColumnToStringColumnBasic) {
 
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
     // Execute conversion
     auto result_column = reader->_convert_dict_column_to_string_column(
@@ -118,7 +118,7 @@ TEST_F(OrcReaderConvertDictTest, ConvertDictColumnToStringColumnWithNulls) {
 
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
     // Execute conversion
     auto result_column = _reader->_convert_dict_column_to_string_column(
@@ -150,7 +150,7 @@ TEST_F(OrcReaderConvertDictTest, ConvertDictColumnToStringColumnChar) {
     auto orc_type_ptr = createPrimitiveType(orc::TypeKind::CHAR);
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
     // Execute conversion
     auto result_column = _reader->_convert_dict_column_to_string_column(
@@ -181,7 +181,7 @@ TEST_F(OrcReaderConvertDictTest, ConvertDictColumnToStringColumnEmpty) {
     auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
     // Execute conversion
     auto result_column = _reader->_convert_dict_column_to_string_column(
             dict_column.get(), nullptr, string_batch.get(), orc_type_ptr.get());
@@ -213,7 +213,7 @@ TEST_F(OrcReaderConvertDictTest, ConvertDictColumnToStringColumnMixed) {
     auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
     // Execute conversion
     auto result_column = _reader->_convert_dict_column_to_string_column(
             dict_column.get(), &null_map, string_batch.get(), orc_type_ptr.get());
diff --git a/be/test/format/orc/orc_reader_fill_data_test.cpp b/be/test/format/orc/orc_reader_fill_data_test.cpp
index 574f7cf26a7fdb..12c1dd209c585b 100644
--- a/be/test/format/orc/orc_reader_fill_data_test.cpp
+++ b/be/test/format/orc/orc_reader_fill_data_test.cpp
@@ -80,7 +80,7 @@ TEST_F(OrcReaderFillDataTest, TestFillLongColumn) {
 
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
     MutableColumnPtr xx = column->assume_mutable();
 
@@ -106,7 +106,7 @@ TEST_F(OrcReaderFillDataTest, TestFillLongColumnWithNull) {
 
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
     MutableColumnPtr xx = column->assume_mutable();
 
@@ -160,7 +160,7 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
 
         TFileScanRangeParams params;
         TFileRangeDesc range;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+        auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
         auto doris_struct_type = std::make_shared<DataTypeStruct>(
                 std::vector<DataTypePtr> {
@@ -246,7 +246,7 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
 
         TFileScanRangeParams params;
         TFileRangeDesc range;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+        auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
         auto doris_struct_type = std::make_shared<DataTypeStruct>(
                 std::vector<DataTypePtr> {std::make_shared<DataTypeInt32>(),
@@ -332,7 +332,7 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
 
         TFileScanRangeParams params;
         TFileRangeDesc range;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+        auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
         auto doris_struct_type = std::make_shared<DataTypeStruct>(
                 std::vector<DataTypePtr> {std::make_shared<DataTypeDecimal64>(18, 5)},
@@ -446,7 +446,7 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
 
         TFileScanRangeParams params;
         TFileRangeDesc range;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+        auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
 
         auto doris_struct_type = std::make_shared<DataTypeMap>(std::make_shared<DataTypeInt32>(),
                                                                std::make_shared<DataTypeFloat32>());
diff --git a/be/test/format/orc/orc_reader_init_column_test.cpp b/be/test/format/orc/orc_reader_init_column_test.cpp
index 4005edcf8fb7e6..00d165dedfc810 100644
--- a/be/test/format/orc/orc_reader_init_column_test.cpp
+++ b/be/test/format/orc/orc_reader_init_column_test.cpp
@@ -53,7 +53,7 @@ TEST_F(OrcReaderInitColumnTest, InitReadColumn) {
 
         TFileScanRangeParams params;
         TFileRangeDesc range;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+        auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
         reader->_reader = std::move(orc_reader);
         std::vector<std::string> tmp;
         tmp.emplace_back("col1");
@@ -72,7 +72,7 @@ TEST_F(OrcReaderInitColumnTest, CheckAcidSchemaTest) {
     using namespace orc;
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
     // 1. Test standard ACID schema
     {
         // Create standard ACID structure
@@ -139,7 +139,7 @@ TEST_F(OrcReaderInitColumnTest, RemoveAcidTest) {
     using namespace orc;
     TFileScanRangeParams params;
     TFileRangeDesc range;
-    auto _reader = OrcReader::create_unique(params, range, "", nullptr, nullptr, true);
+    auto _reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true);
     // 1. Test removing ACID info from ACID schema
     {
         // Create ACID schema
diff --git a/be/test/format/orc/orc_reader_test.cpp b/be/test/format/orc/orc_reader_test.cpp
index 932b345c461525..dd8ecbe720ec97 100644
--- a/be/test/format/orc/orc_reader_test.cpp
+++ b/be/test/format/orc/orc_reader_test.cpp
@@ -83,7 +83,7 @@ class OrcReaderTest : public testing::Test {
         range.path = "./be/test/exec/test_data/orc_scanner/orders.orc";
         range.start_offset = 0;
         range.size = 1293;
-        auto reader = OrcReader::create_unique(params, range, "", nullptr, &cache, true);
+        auto reader = OrcReader::create_unique(params, range, 4096, "", nullptr, &cache, true);
         auto status = reader->init_reader(&column_names, &col_name_to_block_idx, {}, false,
                                           tuple_desc, &row_desc, nullptr, nullptr);
         EXPECT_TRUE(status.ok());
diff --git a/be/test/runtime/runtime_state_block_budget_test.cpp b/be/test/runtime/runtime_state_block_budget_test.cpp
new file mode 100644
index 00000000000000..22ebc5ebf8a0ee
--- /dev/null
+++ b/be/test/runtime/runtime_state_block_budget_test.cpp
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "common/config.h"
+#include "runtime/runtime_state.h"
+#include "testutil/mock/mock_runtime_state.h"
+#include "util/block_budget.h"
+
+namespace doris {
+
+// ---------------------------------------------------------------------------
+// RuntimeState::batch_size()
+// ---------------------------------------------------------------------------
+
+class RuntimeStateBatchSizeTest : public ::testing::Test {
+protected:
+    RuntimeState state;
+};
+
+TEST_F(RuntimeStateBatchSizeTest, DefaultWhenUnset) {
+    EXPECT_EQ(state.batch_size(), 4062);
+}
+
+TEST_F(RuntimeStateBatchSizeTest, NormalValue) {
+    state._query_options.__set_batch_size(4096);
+    EXPECT_EQ(state.batch_size(), 4096);
+}
+
+TEST_F(RuntimeStateBatchSizeTest, ClampToMin) {
+    state._query_options.__set_batch_size(0);
+    EXPECT_EQ(state.batch_size(), 1);
+
+    state._query_options.__set_batch_size(-100);
+    EXPECT_EQ(state.batch_size(), 1);
+}
+
+TEST_F(RuntimeStateBatchSizeTest, ClampToMax) {
+    state._query_options.__set_batch_size(100000);
+    EXPECT_EQ(state.batch_size(), 65535);
+}
+
+TEST_F(RuntimeStateBatchSizeTest, ExactBoundaries) {
+    state._query_options.__set_batch_size(1);
+    EXPECT_EQ(state.batch_size(), 1);
+
+    state._query_options.__set_batch_size(65535);
+    EXPECT_EQ(state.batch_size(), 65535);
+}
+
+TEST_F(RuntimeStateBatchSizeTest, ConstructedBlockBudgetUsesBatchSizeRows) {
+    state._query_options.__set_batch_size(4096);
+    EXPECT_EQ(BlockBudget(state.batch_size(), state.preferred_block_size_bytes()).max_rows, 4096UL);
+}
+
+// ---------------------------------------------------------------------------
+// RuntimeState::preferred_block_size_bytes()
+// ---------------------------------------------------------------------------
+
+class RuntimeStateAdaptiveBatchSizeTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        _saved_enable_adaptive = config::enable_adaptive_batch_size;
+        config::enable_adaptive_batch_size = true;
+    }
+
+    void TearDown() override { config::enable_adaptive_batch_size = _saved_enable_adaptive; }
+
+    bool _saved_enable_adaptive = false;
+};
+
+class RuntimeStateBlockSizeBytesTest : public RuntimeStateAdaptiveBatchSizeTest {
+protected:
+    RuntimeState state;
+};
+
+TEST_F(RuntimeStateBlockSizeBytesTest, DefaultWhenUnset) {
+    // Field not set → default 8MB.
+    EXPECT_EQ(state.preferred_block_size_bytes(), 8388608UL);
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, NormalValue) {
+    state._query_options.__set_preferred_block_size_bytes(4194304L); // 4MB
+    EXPECT_EQ(state.preferred_block_size_bytes(), 4194304UL);
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, ZeroClampsToMin) {
+    // FE rejects 0, but BE still clamps direct thrift / mixed-version inputs defensively.
+    state._query_options.__set_preferred_block_size_bytes(0);
+    EXPECT_EQ(state.preferred_block_size_bytes(), 1048576UL);
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, ClampToMin) {
+    // Non-zero values below 1MB should be clamped to 1MB.
+    state._query_options.__set_preferred_block_size_bytes(50);
+    EXPECT_EQ(state.preferred_block_size_bytes(), 1048576UL); // 1MB
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, ClampToMax) {
+    // Values above 512MB should be clamped to 512MB.
+    state._query_options.__set_preferred_block_size_bytes(1073741824L); // 1GB
+    EXPECT_EQ(state.preferred_block_size_bytes(), 536870912UL);         // 512MB
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, ExactBoundaries) {
+    state._query_options.__set_preferred_block_size_bytes(1048576L); // 1MB
+    EXPECT_EQ(state.preferred_block_size_bytes(), 1048576UL);
+
+    state._query_options.__set_preferred_block_size_bytes(536870912L); // 512MB
+    EXPECT_EQ(state.preferred_block_size_bytes(), 536870912UL);
+}
+
+TEST_F(RuntimeStateBlockSizeBytesTest, DisabledWhenConfigOff) {
+    config::enable_adaptive_batch_size = false;
+    state._query_options.__set_preferred_block_size_bytes(8388608L);
+    EXPECT_EQ(state.preferred_block_size_bytes(), 536870912UL);
+    EXPECT_EQ(BlockBudget(state.batch_size(), state.preferred_block_size_bytes()).max_bytes,
+              536870912UL);
+}
+
+// ---------------------------------------------------------------------------
+// MockRuntimeState: verify the test override bypasses clamping
+// ---------------------------------------------------------------------------
+
+class MockRuntimeStateBlockBudgetTest : public RuntimeStateAdaptiveBatchSizeTest {
+protected:
+    MockRuntimeState state;
+};
+
+TEST_F(MockRuntimeStateBlockBudgetTest, PreferredBlockSizeBypassesClamping) {
+    state._query_options.__set_preferred_block_size_bytes(50);
+    EXPECT_EQ(state.preferred_block_size_bytes(), 50UL);
+}
+
+TEST_F(MockRuntimeStateBlockBudgetTest, PreferredBlockSizeDefaultFallback) {
+    // When not set, falls back to base class default (8MB).
+    EXPECT_EQ(state.preferred_block_size_bytes(), 8388608UL);
+}
+
+TEST_F(MockRuntimeStateBlockBudgetTest, BatchSizeOverride) {
+    // MockRuntimeState returns _batch_size member directly.
+    state._batch_size = 256;
+    EXPECT_EQ(state.batch_size(), 256);
+}
+
+TEST_F(MockRuntimeStateBlockBudgetTest, ConfigOffStillDisablesAdaptiveBytes) {
+    config::enable_adaptive_batch_size = false;
+    state._query_options.__set_preferred_block_size_bytes(50);
+    EXPECT_EQ(state.preferred_block_size_bytes(), 536870912UL);
+}
+
+} // namespace doris
diff --git a/be/test/storage/compaction/segcompaction_mow_test.cpp b/be/test/storage/compaction/segcompaction_mow_test.cpp
index 760a5d953aa693..6b57c081c53860 100644
--- a/be/test/storage/compaction/segcompaction_mow_test.cpp
+++ b/be/test/storage/compaction/segcompaction_mow_test.cpp
@@ -19,9 +19,14 @@
 #include <gen_cpp/olap_file.pb.h>
 #include <gtest/gtest.h>
 
+#include <atomic>
+#include <chrono>
+#include <filesystem>
+#include <functional>
 #include <memory>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "common/config.h"
@@ -40,6 +45,7 @@
 #include "storage/tablet/tablet_meta.h"
 #include "storage/tablet/tablet_schema.h"
 #include "storage/utils.h"
+#include "util/debug_points.h"
 #include "util/slice.h"
 
 namespace doris {
@@ -94,6 +100,8 @@ class SegCompactionMoWTest : public ::testing::TestWithParam<std::string> {
     }
 
     void TearDown() {
+        DebugPoints::instance()->clear();
+        config::enable_debug_points = false;
         config::enable_segcompaction = false;
         ExecEnv* exec_env = doris::ExecEnv::GetInstance();
         s_engine = nullptr;
@@ -130,6 +138,35 @@ class SegCompactionMoWTest : public ::testing::TestWithParam<std::string> {
         return true;
     }
 
+    bool wait_until(const std::function<bool()>& pred, int timeout_ms = 10000) {
+        auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms);
+        while (std::chrono::steady_clock::now() < deadline) {
+            if (pred()) {
+                return true;
+            }
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+        }
+        return pred();
+    }
+
+    Block create_int_block(TabletSchemaSPtr tablet_schema, uint32_t segment_id,
+                           uint32_t rows_per_segment) {
+        Block block = tablet_schema->create_block();
+        auto columns = block.mutate_columns();
+        for (uint32_t rid = 0; rid < rows_per_segment; ++rid) {
+            uint32_t k1 = rid * 100 + segment_id;
+            uint32_t k2 = segment_id;
+            uint32_t k3 = rid;
+            uint32_t seq = 0;
+            columns[0]->insert_data(reinterpret_cast<const char*>(&k1), sizeof(k1));
+            columns[1]->insert_data(reinterpret_cast<const char*>(&k2), sizeof(k2));
+            columns[2]->insert_data(reinterpret_cast<const char*>(&k3), sizeof(k3));
+            columns[3]->insert_data(reinterpret_cast<const char*>(&seq), sizeof(seq));
+        }
+        block.set_columns(std::move(columns));
+        return block;
+    }
+
     // (k1 int, k2 varchar(20), k3 int) keys (k1, k2)
     void create_tablet_schema(TabletSchemaSPtr tablet_schema) {
         TabletSchemaPB tablet_schema_pb;
@@ -823,6 +860,85 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
                                                    total_written_rows, rows_mark_deleted));
 }
 
+TEST_F(SegCompactionMoWTest, AsyncDeleteBitmapMustNotReadSegmentsDeletedBySegcompaction) {
+    config::enable_segcompaction = true;
+    config::enable_debug_points = true;
+    config::segcompaction_batch_size = 5;
+    config::segcompaction_candidate_max_rows = 1000;
+    config::segcompaction_candidate_max_bytes = 1 << 20;
+
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    create_tablet_schema(tablet_schema);
+
+    RowsetWriterContext writer_context;
+    const int raw_rsid = 20052;
+    create_rowset_writer_context(raw_rsid, tablet_schema, &writer_context);
+
+    DeleteBitmapPtr delete_bitmap = std::make_shared<DeleteBitmap>(TABLET_ID);
+    std::shared_ptr<RowsetIdUnorderedSet> rsids {std::make_shared<RowsetIdUnorderedSet>()};
+    std::vector<RowsetSharedPtr> rowset_ptrs;
+    writer_context.mow_context =
+            std::make_shared<MowContext>(1, 1, rsids, rowset_ptrs, delete_bitmap);
+
+    auto res = RowsetFactory::create_rowset_writer(*s_engine, writer_context, false);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto rowset_writer = std::move(res).value();
+
+    constexpr int32_t target_segment_id = 3;
+    std::atomic<bool> target_delete_bitmap_task_blocked {false};
+    std::atomic<bool> release_target_delete_bitmap_task {false};
+    DebugPoints::instance()->add_with_callback(
+            "BaseBetaRowsetWriter::_generate_delete_bitmap.block_before_load_segments",
+            std::function<void(int32_t)>([&](int32_t segment_id) {
+                if (segment_id != target_segment_id) {
+                    return;
+                }
+                target_delete_bitmap_task_blocked.store(true);
+                while (!release_target_delete_bitmap_task.load()) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+                }
+            }));
+
+    auto target_segment_path = fmt::format("{}/{}_{}.dat", lTestDir, raw_rsid, target_segment_id);
+    bool blocked = false;
+    bool source_segment_deleted_before_release = false;
+    std::thread release_thread([&] {
+        blocked = wait_until([&] { return target_delete_bitmap_task_blocked.load(); });
+        if (blocked) {
+            source_segment_deleted_before_release =
+                    wait_until([&] { return !std::filesystem::exists(target_segment_path); }, 1000);
+        }
+        release_target_delete_bitmap_task.store(true);
+        DebugPoints::instance()->remove(
+                "BaseBetaRowsetWriter::_generate_delete_bitmap.block_before_load_segments");
+    });
+
+    Status write_status = Status::OK();
+    const uint32_t rows_per_segment = 128;
+    for (int32_t segment_id = 0; segment_id < 5; ++segment_id) {
+        auto block = create_int_block(tablet_schema, segment_id, rows_per_segment);
+        write_status = rowset_writer->add_block(&block);
+        if (!write_status.ok()) {
+            break;
+        }
+        write_status = rowset_writer->flush();
+        if (!write_status.ok()) {
+            break;
+        }
+    }
+    release_thread.join();
+
+    RowsetSharedPtr rowset;
+    auto build_status = write_status.ok() ? rowset_writer->build(rowset) : write_status;
+
+    ASSERT_TRUE(blocked) << "delete bitmap task did not reach the injected wait point";
+    EXPECT_FALSE(source_segment_deleted_before_release)
+            << "segcompaction deleted source segment before delete bitmap finished: "
+            << target_segment_path;
+    EXPECT_TRUE(write_status.ok()) << write_status;
+    EXPECT_TRUE(build_status.ok()) << build_status;
+}
+
 TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) {
     config::enable_segcompaction = true;
     Status s;
diff --git a/be/test/storage/compaction/vertical_compaction_test.cpp b/be/test/storage/compaction/vertical_compaction_test.cpp
index 3b736857242caf..a39932e2a01fcf 100644
--- a/be/test/storage/compaction/vertical_compaction_test.cpp
+++ b/be/test/storage/compaction/vertical_compaction_test.cpp
@@ -30,6 +30,7 @@
 
 #include <iostream>
 #include <memory>
+#include <set>
 #include <string>
 #include <tuple>
 #include <unordered_map>
@@ -47,6 +48,7 @@
 #include "io/io_common.h"
 #include "json2pb/json_to_pb.h"
 #include "runtime/exec_env.h"
+#include "storage/compaction/compaction.h"
 #include "storage/delete/delete_handler.h"
 #include "storage/field.h"
 #include "storage/iterator/vertical_merge_iterator.h"
@@ -68,6 +70,7 @@
 #include "storage/tablet/tablet.h"
 #include "storage/tablet/tablet_meta.h"
 #include "storage/tablet/tablet_schema.h"
+#include "storage/txn/txn_manager.h"
 #include "storage/utils.h"
 #include "util/uid_util.h"
 
@@ -155,6 +158,67 @@ class VerticalCompactionTest : public ::testing::Test {
         return tablet_schema;
     }
 
+    TabletSchemaSPtr create_mow_cluster_key_schema(bool has_sequence_col = false) {
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        TabletSchemaPB tablet_schema_pb;
+        tablet_schema_pb.set_keys_type(UNIQUE_KEYS);
+        tablet_schema_pb.set_num_short_key_columns(1);
+        tablet_schema_pb.set_num_rows_per_row_block(1024);
+        tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
+        tablet_schema_pb.set_next_column_unique_id(has_sequence_col ? 5 : 4);
+        tablet_schema_pb.add_cluster_key_uids(2);
+        if (has_sequence_col) {
+            tablet_schema_pb.set_sequence_col_idx(2);
+            tablet_schema_pb.add_cluster_key_uids(3);
+        }
+
+        ColumnPB* column_1 = tablet_schema_pb.add_column();
+        column_1->set_unique_id(1);
+        column_1->set_name("c1");
+        column_1->set_type("INT");
+        column_1->set_is_key(true);
+        column_1->set_length(4);
+        column_1->set_index_length(4);
+        column_1->set_is_nullable(false);
+        column_1->set_is_bf_column(false);
+
+        ColumnPB* column_2 = tablet_schema_pb.add_column();
+        column_2->set_unique_id(2);
+        column_2->set_name("c2");
+        column_2->set_type("INT");
+        column_2->set_length(4);
+        column_2->set_index_length(4);
+        column_2->set_is_key(false);
+        column_2->set_is_nullable(false);
+        column_2->set_is_bf_column(false);
+
+        if (has_sequence_col) {
+            ColumnPB* column_3 = tablet_schema_pb.add_column();
+            column_3->set_unique_id(3);
+            column_3->set_name("c3");
+            column_3->set_type("INT");
+            column_3->set_length(4);
+            column_3->set_index_length(4);
+            column_3->set_is_key(false);
+            column_3->set_is_nullable(false);
+            column_3->set_is_bf_column(false);
+            column_3->set_aggregation("NONE");
+        }
+
+        ColumnPB* delete_sign_column = tablet_schema_pb.add_column();
+        delete_sign_column->set_unique_id(has_sequence_col ? 4 : 3);
+        delete_sign_column->set_name(DELETE_SIGN);
+        delete_sign_column->set_type("TINYINT");
+        delete_sign_column->set_length(1);
+        delete_sign_column->set_index_length(1);
+        delete_sign_column->set_is_key(false);
+        delete_sign_column->set_is_nullable(false);
+        delete_sign_column->set_is_bf_column(false);
+
+        tablet_schema->init_from_pb(tablet_schema_pb);
+        return tablet_schema;
+    }
+
     TabletSchemaSPtr create_agg_schema() {
         TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
         TabletSchemaPB tablet_schema_pb;
@@ -206,6 +270,9 @@ class VerticalCompactionTest : public ::testing::Test {
         rowset_writer_context.version = version;
         rowset_writer_context.segments_overlap = overlap;
         rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
+        rowset_writer_context.enable_unique_key_merge_on_write =
+                tablet_schema->keys_type() == UNIQUE_KEYS &&
+                !tablet_schema->cluster_key_uids().empty();
         inc_id++;
         return rowset_writer_context;
     }
@@ -268,6 +335,54 @@ class VerticalCompactionTest : public ::testing::Test {
         return rowset;
     }
 
+    RowsetSharedPtr create_rowset_with_sequence(
+            TabletSchemaSPtr tablet_schema, const SegmentsOverlapPB& overlap,
+            std::vector<std::vector<std::tuple<int64_t, int64_t, int64_t>>> rowset_data,
+            int64_t version) {
+        if (overlap == NONOVERLAPPING) {
+            for (auto i = 1; i < rowset_data.size(); i++) {
+                auto& last_seg_data = rowset_data[i - 1];
+                auto& cur_seg_data = rowset_data[i];
+                int64_t last_seg_max = std::get<0>(last_seg_data[last_seg_data.size() - 1]);
+                int64_t cur_seg_min = std::get<0>(cur_seg_data[0]);
+                EXPECT_LT(last_seg_max, cur_seg_min);
+            }
+        }
+        auto writer_context = create_rowset_writer_context(tablet_schema, overlap, UINT32_MAX,
+                                                           {version, version});
+
+        auto res = RowsetFactory::create_rowset_writer(*engine_ref, writer_context, true);
+        EXPECT_TRUE(res.has_value()) << res.error();
+        auto rowset_writer = std::move(res).value();
+
+        uint32_t num_rows = 0;
+        for (int i = 0; i < rowset_data.size(); ++i) {
+            Block block = tablet_schema->create_block();
+            auto columns = block.mutate_columns();
+            for (int rid = 0; rid < rowset_data[i].size(); ++rid) {
+                int32_t c1 = std::get<0>(rowset_data[i][rid]);
+                int32_t c2 = std::get<1>(rowset_data[i][rid]);
+                int32_t c3 = std::get<2>(rowset_data[i][rid]);
+                columns[0]->insert_data((const char*)&c1, sizeof(c1));
+                columns[1]->insert_data((const char*)&c2, sizeof(c2));
+                columns[2]->insert_data((const char*)&c3, sizeof(c3));
+                uint8_t num = 0;
+                columns[3]->insert_data((const char*)&num, sizeof(num));
+                num_rows++;
+            }
+            auto s = rowset_writer->add_block(&block);
+            EXPECT_TRUE(s.ok());
+            s = rowset_writer->flush();
+            EXPECT_TRUE(s.ok());
+        }
+
+        RowsetSharedPtr rowset;
+        EXPECT_EQ(Status::OK(), rowset_writer->build(rowset));
+        EXPECT_EQ(rowset_data.size(), rowset->rowset_meta()->num_segments());
+        EXPECT_EQ(num_rows, rowset->rowset_meta()->num_rows());
+        return rowset;
+    }
+
     void init_rs_meta(RowsetMetaSharedPtr& rs_meta, int64_t start, int64_t end) {
         std::string json_rowset_meta = R"({
             "rowset_id": 540081,
@@ -322,6 +437,12 @@ class VerticalCompactionTest : public ::testing::Test {
         } else if (tablet_schema.keys_type() == AGG_KEYS) {
             t_tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
         }
+        for (auto uid : tablet_schema.cluster_key_uids()) {
+            t_tablet_schema.cluster_key_uids.push_back(uid);
+        }
+        if (tablet_schema.has_sequence_col()) {
+            t_tablet_schema.__set_sequence_col_idx(tablet_schema.sequence_col_idx());
+        }
         t_tablet_schema.__set_storage_type(TStorageType::COLUMN);
         t_tablet_schema.__set_columns(cols);
         TabletMetaSharedPtr tablet_meta(
@@ -372,12 +493,56 @@ class VerticalCompactionTest : public ::testing::Test {
         }
     }
 
+    void commit_txn_with_delete_bitmap(TabletSharedPtr tablet, const RowsetSharedPtr& rowset,
+                                       int64_t txn_id, DeleteBitmapPtr delete_bitmap,
+                                       const RowsetIdUnorderedSet& rowset_ids) {
+        PUniqueId load_id;
+        load_id.set_hi(txn_id);
+        load_id.set_lo(txn_id);
+        auto status = engine_ref->txn_manager()->prepare_txn(tablet->partition_id(), *tablet,
+                                                             txn_id, load_id);
+        ASSERT_TRUE(status.ok()) << status;
+        status = engine_ref->txn_manager()->commit_txn(tablet->partition_id(), *tablet, txn_id,
+                                                       load_id, rowset, {}, false);
+        ASSERT_TRUE(status.ok()) << status;
+        engine_ref->txn_manager()->set_txn_related_delete_bitmap(
+                tablet->partition_id(), txn_id, tablet->tablet_id(), tablet->tablet_uid(), true,
+                delete_bitmap, rowset_ids, nullptr);
+    }
+
 private:
     const std::string kTestDir = "/ut_dir/vertical_compaction_test";
     std::string absolute_dir;
     DataDir* _data_dir = nullptr;
 };
 
+class TestCompactionMixin : public CompactionMixin {
+public:
+    TestCompactionMixin(StorageEngine& engine, TabletSharedPtr tablet)
+            : CompactionMixin(engine, std::move(tablet), "TestCompactionMixin") {}
+
+    Status prepare_compact() override { return Status::OK(); }
+
+    Status modify_rowsets_for_test(std::vector<RowsetSharedPtr> input_rowsets,
+                                   RowsetSharedPtr output_rowset,
+                                   std::unique_ptr<RowIdConversion> rowid_conversion) {
+        _input_rowsets = std::move(input_rowsets);
+        _output_rowset = std::move(output_rowset);
+        _rowid_conversion = std::move(rowid_conversion);
+        _stats.rowid_conversion = _rowid_conversion.get();
+        auto st = modify_rowsets();
+        if (st.ok()) {
+            _state = CompactionState::SUCCESS;
+        }
+        return st;
+    }
+
+private:
+    std::string_view compaction_name() const override { return "test compaction"; }
+
+    ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; }
+};
+
 TEST_F(VerticalCompactionTest, TestRowSourcesBuffer) {
     RowSourcesBuffer buffer(100, absolute_dir, ReaderType::READER_CUMULATIVE_COMPACTION);
     RowSource s1(0, 0);
@@ -745,6 +910,165 @@ TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMerge) {
     }
 }
 
+TEST_F(VerticalCompactionTest, ClusterKeyMowCompactionNeedsOutputRowsetInternalDedup) {
+    TabletSchemaSPtr tablet_schema = create_mow_cluster_key_schema();
+    TabletSharedPtr tablet = create_tablet(*tablet_schema, true);
+
+    std::vector<RowsetSharedPtr> input_rowsets;
+    input_rowsets.push_back(create_rowset(tablet_schema, NONOVERLAPPING, {{{1, 30}}}, 2));
+    input_rowsets.push_back(create_rowset(tablet_schema, NONOVERLAPPING, {{{2, 10}, {1, 20}}}, 3));
+
+    std::vector<RowsetReaderSharedPtr> input_rs_readers;
+    for (auto& rowset : input_rowsets) {
+        RowsetReaderSharedPtr rs_reader;
+        ASSERT_TRUE(rowset->create_reader(&rs_reader).ok());
+        input_rs_readers.push_back(std::move(rs_reader));
+    }
+
+    auto writer_context = create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 1024, {2, 3});
+    auto res = RowsetFactory::create_rowset_writer(*engine_ref, writer_context, true);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto output_rs_writer = std::move(res).value();
+
+    Merger::Statistics stats;
+    RowIdConversion rowid_conversion;
+    stats.rowid_conversion = &rowid_conversion;
+    auto st = Merger::vertical_merge_rowsets(tablet, ReaderType::READER_CUMULATIVE_COMPACTION,
+                                             *tablet_schema, input_rs_readers,
+                                             output_rs_writer.get(), 1024, 1, &stats);
+    ASSERT_TRUE(st.ok()) << st;
+
+    RowsetSharedPtr output_rowset;
+    ASSERT_EQ(Status::OK(), output_rs_writer->build(output_rowset));
+    ASSERT_NE(output_rowset, nullptr);
+    ASSERT_EQ(1, output_rowset->num_segments());
+    ASSERT_EQ(3, output_rowset->num_rows());
+    ASSERT_EQ(0, stats.merged_rows);
+
+    RowsetReaderContext reader_context;
+    reader_context.tablet_schema = tablet_schema;
+    reader_context.need_ordered_result = false;
+    std::vector<uint32_t> return_columns = {0, 1};
+    reader_context.return_columns = &return_columns;
+    RowsetReaderSharedPtr output_rs_reader;
+    create_and_init_rowset_reader(output_rowset.get(), reader_context, &output_rs_reader);
+
+    std::vector<std::tuple<int64_t, int64_t>> output_data;
+    do {
+        Block output_block = tablet_schema->create_block();
+        st = output_rs_reader->next_batch(&output_block);
+        auto columns = output_block.get_columns_with_type_and_name();
+        ASSERT_GE(columns.size(), 2);
+        for (auto i = 0; i < output_block.rows(); i++) {
+            output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
+        }
+    } while (st.ok());
+    ASSERT_TRUE(st.is<END_OF_FILE>()) << st;
+
+    ASSERT_EQ(3, output_data.size());
+    EXPECT_EQ(output_data[0], std::make_tuple(int64_t {2}, int64_t {10}));
+    EXPECT_EQ(output_data[1], std::make_tuple(int64_t {1}, int64_t {20}));
+    EXPECT_EQ(output_data[2], std::make_tuple(int64_t {1}, int64_t {30}));
+
+    DeleteBitmap input_delete_bitmap(tablet->tablet_id());
+    DeleteBitmap output_delete_bitmap(tablet->tablet_id());
+    tablet->calc_compaction_output_rowset_delete_bitmap(input_rowsets, rowid_conversion, 0,
+                                                        UINT64_MAX, nullptr, nullptr,
+                                                        input_delete_bitmap, &output_delete_bitmap);
+    st = tablet->calc_compaction_output_rowset_internal_delete_bitmap(
+            input_rowsets, output_rowset, rowid_conversion, &output_delete_bitmap);
+    ASSERT_TRUE(st.ok()) << st;
+
+    std::set<int64_t> visible_keys;
+    auto deleted_rows = output_delete_bitmap.get_agg({output_rowset->rowset_id(), 0, UINT64_MAX});
+    for (uint32_t row_id = 0; row_id < output_data.size(); ++row_id) {
+        if (deleted_rows->contains(row_id)) {
+            continue;
+        }
+        ASSERT_TRUE(visible_keys.insert(std::get<0>(output_data[row_id])).second)
+                << "unique key should not be duplicated after cluster-key MOW compaction";
+    }
+}
+
+TEST_F(VerticalCompactionTest,
+       ClusterKeyMowCompactionWithSequenceKeepsTxnInternalDedupDeleteBitmap) {
+    TabletSchemaSPtr tablet_schema = create_mow_cluster_key_schema(true);
+    TabletSharedPtr tablet = create_tablet(*tablet_schema, true);
+
+    std::vector<RowsetSharedPtr> input_rowsets;
+    input_rowsets.push_back(
+            create_rowset_with_sequence(tablet_schema, NONOVERLAPPING, {{{1, 30, 30}}}, 2));
+    input_rowsets.push_back(create_rowset_with_sequence(tablet_schema, NONOVERLAPPING,
+                                                        {{{2, 10, 10}, {1, 20, 20}}}, 3));
+    for (auto& rowset : input_rowsets) {
+        ASSERT_TRUE(tablet->add_rowset(rowset).ok());
+    }
+
+    auto writer_context = create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 1024, {2, 3});
+    auto res = RowsetFactory::create_rowset_writer(*engine_ref, writer_context, true);
+    ASSERT_TRUE(res.has_value()) << res.error();
+    auto output_rs_writer = std::move(res).value();
+
+    Block block = tablet_schema->create_block();
+    auto columns = block.mutate_columns();
+    std::vector<std::tuple<int32_t, int32_t, int32_t>> output_rows = {
+            {2, 10, 10}, {1, 20, 20}, {1, 30, 30}};
+    for (auto& [c1, c2, c3] : output_rows) {
+        columns[0]->insert_data((const char*)&c1, sizeof(c1));
+        columns[1]->insert_data((const char*)&c2, sizeof(c2));
+        columns[2]->insert_data((const char*)&c3, sizeof(c3));
+        uint8_t delete_sign = 0;
+        columns[3]->insert_data((const char*)&delete_sign, sizeof(delete_sign));
+    }
+    auto st = output_rs_writer->add_block(&block);
+    ASSERT_TRUE(st.ok()) << st;
+    st = output_rs_writer->flush();
+    ASSERT_TRUE(st.ok()) << st;
+
+    RowsetSharedPtr output_rowset;
+    ASSERT_EQ(Status::OK(), output_rs_writer->build(output_rowset));
+    ASSERT_NE(output_rowset, nullptr);
+    ASSERT_EQ(3, output_rowset->num_rows());
+
+    auto rowid_conversion = std::make_unique<RowIdConversion>();
+    ASSERT_TRUE(rowid_conversion->init_segment_map(input_rowsets[0]->rowset_id(), {1}).ok());
+    ASSERT_TRUE(rowid_conversion->init_segment_map(input_rowsets[1]->rowset_id(), {2}).ok());
+    rowid_conversion->set_dst_rowset_id(output_rowset->rowset_id());
+    rowid_conversion->add({RowLocation(input_rowsets[1]->rowset_id(), 0, 0),
+                           RowLocation(input_rowsets[1]->rowset_id(), 0, 1),
+                           RowLocation(input_rowsets[0]->rowset_id(), 0, 0)},
+                          {3});
+
+    auto committed_rowset =
+            create_rowset_with_sequence(tablet_schema, NONOVERLAPPING, {{{3, 40, 40}}}, 4);
+    RowsetIdUnorderedSet txn_rowset_ids;
+    for (auto& rowset : input_rowsets) {
+        txn_rowset_ids.insert(rowset->rowset_id());
+    }
+    txn_rowset_ids.insert(committed_rowset->rowset_id());
+    auto txn_delete_bitmap = std::make_shared<DeleteBitmap>(tablet->tablet_id());
+    constexpr int64_t txn_id = 10001;
+    commit_txn_with_delete_bitmap(tablet, committed_rowset, txn_id, txn_delete_bitmap,
+                                  txn_rowset_ids);
+
+    TestCompactionMixin compaction(*engine_ref, tablet);
+    st = compaction.modify_rowsets_for_test(input_rowsets, output_rowset,
+                                            std::move(rowid_conversion));
+    ASSERT_TRUE(st.ok()) << st;
+
+    CommitTabletTxnInfoVec commit_tablet_txn_info_vec {};
+    engine_ref->txn_manager()->get_all_commit_tablet_txn_info_by_tablet(
+            *tablet, &commit_tablet_txn_info_vec);
+    ASSERT_EQ(1, commit_tablet_txn_info_vec.size());
+
+    auto deleted_rows = commit_tablet_txn_info_vec[0].delete_bitmap->get_agg(
+            {output_rowset->rowset_id(), 0, UINT64_MAX});
+    ASSERT_TRUE(deleted_rows->contains(1))
+            << "committed txn delete bitmap must keep the output rowset internal dedup row";
+    ASSERT_FALSE(deleted_rows->contains(2))
+            << "the higher sequence row should stay visible after compaction";
+}
+
 TEST_F(VerticalCompactionTest, TestDupKeyVerticalMergeWithDelete) {
     auto num_input_rowset = 2;
     auto num_segments = 2;
diff --git a/be/test/storage/iterator/block_reader_agg_flush_test.cpp b/be/test/storage/iterator/block_reader_agg_flush_test.cpp
new file mode 100644
index 00000000000000..a9c0a4a4818d82
--- /dev/null
+++ b/be/test/storage/iterator/block_reader_agg_flush_test.cpp
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Verifies that BlockReader's aggregation buffer flush triggered by
+// `_stored_row_ref.size() == batch_max_rows()` (block_reader.cpp:639) does not
+// corrupt the final aggregated value when a single agg group spans multiple
+// flush windows. Drives `_append_agg_data` / `_update_agg_data` directly.
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wkeyword-macro"
+#endif
+#define private public
+#define protected public
+#include "storage/iterator/block_reader.h"
+#undef private
+#undef protected
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+#include "agent/be_exec_version_manager.h"
+#include "common/config.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/aggregate/aggregate_function_simple_factory.h"
+
+namespace doris {
+
+namespace {
+
+// Builds a 2-column source block: key (Int64, all set to `key_value`) and value
+// (Int64, set to 1..n_rows). The value column is what we aggregate over.
+std::unique_ptr<Block> make_source_block(size_t n_rows, int64_t key_value) {
+    auto block = Block::create_unique();
+    auto key_type = std::make_shared<DataTypeInt64>();
+    auto val_type = std::make_shared<DataTypeInt64>();
+
+    auto key_col = ColumnInt64::create();
+    auto val_col = ColumnInt64::create();
+    for (size_t i = 0; i < n_rows; ++i) {
+        key_col->insert_value(key_value);
+        val_col->insert_value(static_cast<int64_t>(i + 1));
+    }
+    block->insert({std::move(key_col), key_type, "k"});
+    block->insert({std::move(val_col), val_type, "v"});
+    return block;
+}
+
+// Mirror BlockReader::_init_agg_state's _stored_data_columns sizing: cloned
+// struct of the source block, pre-filled with `n_rows` default rows so that
+// non-variable-length agg columns can be written via replace_column_data.
+MutableColumns make_stored_columns(const Block& src_block, size_t n_rows) {
+    return src_block.create_same_struct_block(n_rows)->mutate_columns();
+}
+
+MutableColumns make_target_columns() {
+    MutableColumns cols;
+    cols.push_back(ColumnInt64::create()); // key (untouched in this test)
+    cols.push_back(ColumnInt64::create()); // agg result column
+    return cols;
+}
+
+// Configure `reader` as if it had completed init() for an AGG_KEYS table with
+// schema {key: Int64, value: Int64} and SUM aggregation over `value`.
+void configure_reader_for_int64_sum(BlockReader& reader, const Block& src_block,
+                                    size_t batch_max_rows) {
+    reader._reader_context.batch_size = batch_max_rows;
+    // Adaptive disabled so batch_max_rows() == _reader_context.batch_size.
+    config::enable_adaptive_batch_size = false;
+
+    // Column layout: [0]=key, [1]=agg value. Output layout matches input.
+    reader._normal_columns_idx = {0};
+    reader._agg_columns_idx = {1};
+    reader._return_columns_loc = {0, 1};
+
+    reader._stored_data_columns = make_stored_columns(src_block, batch_max_rows);
+    reader._stored_has_null_tag.assign(reader._stored_data_columns.size(), false);
+    reader._stored_has_variable_length_tag.assign(reader._stored_data_columns.size(), false);
+
+    auto fn = AggregateFunctionSimpleFactory::instance().get(
+            "sum", {std::make_shared<DataTypeInt64>()}, std::make_shared<DataTypeInt64>(),
+            /*result_nullable=*/false, BeExecVersionManager::get_newest_version(),
+            {.column_names = {}});
+    ASSERT_TRUE(fn != nullptr);
+
+    auto* place = new char[fn->size_of_data()];
+    fn->create(place);
+    reader._agg_functions.push_back(fn);
+    reader._agg_places.push_back(place);
+    // Destructor (BlockReader::~BlockReader) cleans up _agg_places.
+}
+
+int64_t read_int64(const IColumn& col, size_t row) {
+    return assert_cast<const ColumnInt64&>(col).get_data()[row];
+}
+
+} // namespace
+
+class BlockReaderAggFlushTest : public testing::Test {
+protected:
+    void SetUp() override { _saved_enable_adaptive = config::enable_adaptive_batch_size; }
+
+    void TearDown() override { config::enable_adaptive_batch_size = _saved_enable_adaptive; }
+
+    bool _saved_enable_adaptive = false;
+};
+
+// Sanity baseline: a single group whose size is below batch_max_rows triggers
+// no mid-group flush. Verifies the test fixture itself is wired correctly.
+TEST_F(BlockReaderAggFlushTest, NoMidGroupFlushAggregatesCorrectly) {
+    constexpr size_t kBatchMaxRows = 16;
+    constexpr size_t kRows = 5; // < batch_max_rows, only is_last flush fires
+
+    BlockReader reader;
+    auto src_block = make_source_block(kRows, /*key_value=*/42);
+    configure_reader_for_int64_sum(reader, *src_block, kBatchMaxRows);
+
+    auto target_columns = make_target_columns();
+
+    for (size_t i = 0; i < kRows; ++i) {
+        reader._next_row.block = std::shared_ptr<Block>(src_block.get(), [](Block*) {});
+        reader._next_row.row_pos = static_cast<int>(i);
+        reader._next_row.is_same = (i > 0);
+        reader._append_agg_data(target_columns);
+    }
+
+    // is_last flush at i=4 already drained _stored_row_ref into the aggregator
+    // without finalizing (because _last_agg_data_counter > 0).
+    EXPECT_EQ(reader._stored_row_ref.size(), 0);
+    EXPECT_EQ(reader._last_agg_data_counter, 0);
+
+    // Mimic `_agg_key_next_block` end-of-group close.
+    reader._agg_data_counters.push_back(reader._last_agg_data_counter);
+    reader._last_agg_data_counter = 0;
+    reader._update_agg_data(target_columns);
+
+    ASSERT_EQ(target_columns[1]->size(), 1);
+    EXPECT_EQ(read_int64(*target_columns[1], 0), 1 + 2 + 3 + 4 + 5);
+}
+
+// The interesting case: a single group of 10 rows with batch_max_rows=4 forces
+// `_stored_row_ref.size() == batch_max_rows()` to fire mid-group at i=3 and
+// i=7, plus an `is_last` flush at i=9. Final close must still emit the full
+// sum 1..10 = 55.
+TEST_F(BlockReaderAggFlushTest, PeriodicFlushPreservesAggregateAcrossWindows) {
+    constexpr size_t kBatchMaxRows = 4;
+    constexpr size_t kRows = 10;
+
+    BlockReader reader;
+    auto src_block = make_source_block(kRows, /*key_value=*/7);
+    configure_reader_for_int64_sum(reader, *src_block, kBatchMaxRows);
+
+    auto target_columns = make_target_columns();
+
+    int flush_count = 0;
+    int prev_size = 0;
+    for (size_t i = 0; i < kRows; ++i) {
+        reader._next_row.block = std::shared_ptr<Block>(src_block.get(), [](Block*) {});
+        reader._next_row.row_pos = static_cast<int>(i);
+        reader._next_row.is_same = (i > 0);
+        reader._append_agg_data(target_columns);
+
+        // A flush happens whenever _stored_row_ref shrinks (it's pushed to
+        // first, then potentially cleared by _update_agg_data).
+        int cur_size = static_cast<int>(reader._stored_row_ref.size());
+        if (cur_size < prev_size + 1) {
+            ++flush_count;
+        }
+        prev_size = cur_size;
+    }
+
+    // Expected flushes: at i=3 (size==4), i=7 (size==4), i=9 (is_last). The
+    // final aggregated state must remain consistent across all three.
+    EXPECT_GE(flush_count, 3) << "expected at least 3 mid/last flushes";
+    EXPECT_EQ(reader._stored_row_ref.size(), 0);
+    EXPECT_EQ(reader._last_agg_data_counter, 0);
+
+    // Mimic `_agg_key_next_block` end-of-group close.
+    reader._agg_data_counters.push_back(reader._last_agg_data_counter);
+    reader._last_agg_data_counter = 0;
+    reader._update_agg_data(target_columns);
+
+    ASSERT_EQ(target_columns[1]->size(), 1);
+    int64_t expected = 0;
+    for (int64_t v = 1; v <= static_cast<int64_t>(kRows); ++v) {
+        expected += v;
+    }
+    EXPECT_EQ(read_int64(*target_columns[1], 0), expected); // 55
+}
+
+// Stress: a single group long enough to trigger many full periodic flushes,
+// followed by a group end. Catches off-by-one bugs in chunked aggregation.
+TEST_F(BlockReaderAggFlushTest, PeriodicFlushManyWindowsSingleGroup) {
+    constexpr size_t kBatchMaxRows = 4;
+    constexpr size_t kRows = 100; // 25 full windows
+
+    BlockReader reader;
+    auto src_block = make_source_block(kRows, /*key_value=*/3);
+    configure_reader_for_int64_sum(reader, *src_block, kBatchMaxRows);
+
+    auto target_columns = make_target_columns();
+    for (size_t i = 0; i < kRows; ++i) {
+        reader._next_row.block = std::shared_ptr<Block>(src_block.get(), [](Block*) {});
+        reader._next_row.row_pos = static_cast<int>(i);
+        reader._next_row.is_same = (i > 0);
+        reader._append_agg_data(target_columns);
+    }
+    reader._agg_data_counters.push_back(reader._last_agg_data_counter);
+    reader._last_agg_data_counter = 0;
+    reader._update_agg_data(target_columns);
+
+    ASSERT_EQ(target_columns[1]->size(), 1);
+    int64_t expected = static_cast<int64_t>(kRows) * (kRows + 1) / 2; // 5050
+    EXPECT_EQ(read_int64(*target_columns[1], 0), expected);
+}
+
+} // namespace doris
diff --git a/be/test/storage/iterator/block_reader_batch_max_rows_test.cpp b/be/test/storage/iterator/block_reader_batch_max_rows_test.cpp
new file mode 100644
index 00000000000000..4569cd53cbf33d
--- /dev/null
+++ b/be/test/storage/iterator/block_reader_batch_max_rows_test.cpp
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Use #define private public to access private/protected members for testing
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wkeyword-macro"
+#endif
+#define private public
+#define protected public
+#include "storage/iterator/block_reader.h"
+#undef private
+#undef protected
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#include <gtest/gtest.h>
+
+#include "common/config.h"
+#include "core/column/column_vector.h"
+
+namespace doris {
+
+namespace {
+
+constexpr size_t kMinPublicBlockBudgetBytes = 1048576; // 1MB
+
+MutableColumns make_int64_columns(size_t rows) {
+    MutableColumns columns;
+    auto col = ColumnInt64::create();
+    for (size_t i = 0; i < rows; ++i) {
+        col->insert_value(cast_set<int64_t>(i));
+    }
+    columns.push_back(std::move(col));
+    return columns;
+}
+
+} // namespace
+
+class BlockReaderBatchMaxRowsTest : public testing::Test {
+protected:
+    void SetUp() override { _saved_enable_adaptive = config::enable_adaptive_batch_size; }
+
+    void TearDown() override { config::enable_adaptive_batch_size = _saved_enable_adaptive; }
+
+    bool _saved_enable_adaptive = false;
+};
+
+TEST_F(BlockReaderBatchMaxRowsTest, FallbackToBatchSizeWhenAdaptiveDisabled) {
+    config::enable_adaptive_batch_size = false;
+
+    BlockReader reader;
+    reader._reader_context.batch_size = 4096;
+
+    EXPECT_EQ(reader.batch_max_rows(), 4096);
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, UseBatchSizeWhenAdaptiveEnabled) {
+    config::enable_adaptive_batch_size = true;
+
+    BlockReader reader;
+    reader._reader_context.batch_size = 4096;
+    reader._reader_context.preferred_block_size_bytes = 8388608; // byte budget must be active
+
+    EXPECT_EQ(reader.batch_max_rows(), 4096);
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, BatchMaxRowsIgnoresByteBudget) {
+    config::enable_adaptive_batch_size = true;
+
+    BlockReader reader;
+    reader._reader_context.batch_size = 1024;
+    reader._reader_context.preferred_block_size_bytes = 8388608;
+
+    EXPECT_EQ(reader.batch_max_rows(), 1024);
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, PreferredBlockSizeBytesWhenEnabled) {
+    config::enable_adaptive_batch_size = true;
+
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = 8388608; // 8MB
+
+    EXPECT_EQ(reader.preferred_block_size_bytes(), 8388608);
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, PreferredBlockSizeBytesWhenDisabled) {
+    config::enable_adaptive_batch_size = false;
+
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = 8388608;
+
+    EXPECT_EQ(reader.preferred_block_size_bytes(), 0);
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, ReachedByteBudgetReturnsFalseWhenDisabled) {
+    config::enable_adaptive_batch_size = false;
+
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = kMinPublicBlockBudgetBytes;
+
+    auto columns = make_int64_columns(200000); // ~1.6MB > 1MB min public budget
+
+    EXPECT_FALSE(reader._reached_byte_budget(columns));
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, ReachedByteBudgetReturnsTrueWhenExceeded) {
+    config::enable_adaptive_batch_size = true;
+
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = kMinPublicBlockBudgetBytes;
+
+    auto columns = make_int64_columns(200000); // ~1.6MB >= 1MB min public budget
+
+    EXPECT_TRUE(reader._reached_byte_budget(columns));
+}
+
+TEST_F(BlockReaderBatchMaxRowsTest, ReachedByteBudgetReturnsFalseWhenUnderBudget) {
+    config::enable_adaptive_batch_size = true;
+
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = kMinPublicBlockBudgetBytes;
+
+    auto columns = make_int64_columns(10); // 80 bytes < 1MB min public budget
+
+    EXPECT_FALSE(reader._reached_byte_budget(columns));
+}
+
+} // namespace doris
diff --git a/be/test/storage/iterator/vcollect_iterator_collected_enough_test.cpp b/be/test/storage/iterator/vcollect_iterator_collected_enough_test.cpp
new file mode 100644
index 00000000000000..afcf979ccca8d5
--- /dev/null
+++ b/be/test/storage/iterator/vcollect_iterator_collected_enough_test.cpp
@@ -0,0 +1,383 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "common/config.h"
+#include "core/block/block.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "storage/iterator/block_reader.h"
+#include "storage/iterator/vcollect_iterator.h"
+
+namespace doris {
+
+// ============================================================================
+// Part 1: Pure-computation tests for estimate_collected_enough()
+// ============================================================================
+
+class EstimateCollectedEnoughTest : public testing::Test {};
+
+// Budget is 0 → always false (feature disabled).
+TEST_F(EstimateCollectedEnoughTest, BudgetZeroReturnsFalse) {
+    EXPECT_FALSE(estimate_collected_enough(/*present_bytes=*/1000, /*present_rows=*/10,
+                                           /*rows_to_merge=*/5,
+                                           /*preferred_block_size_bytes=*/0));
+}
+
+// No rows collected yet → always false (cannot estimate).
+TEST_F(EstimateCollectedEnoughTest, ZeroRowsReturnsFalse) {
+    EXPECT_FALSE(estimate_collected_enough(/*present_bytes=*/0, /*present_rows=*/0,
+                                           /*rows_to_merge=*/5,
+                                           /*preferred_block_size_bytes=*/1024));
+}
+
+// Present bytes already exceed budget → true.
+TEST_F(EstimateCollectedEnoughTest, PresentBytesExceedBudget) {
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/2048, /*present_rows=*/10,
+                                          /*rows_to_merge=*/0,
+                                          /*preferred_block_size_bytes=*/1024));
+}
+
+// Present bytes exactly equal budget → true.
+TEST_F(EstimateCollectedEnoughTest, PresentBytesEqualBudget) {
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/1024, /*present_rows=*/10,
+                                          /*rows_to_merge=*/0,
+                                          /*preferred_block_size_bytes=*/1024));
+}
+
+// Prediction: 500 bytes / 10 rows = 50 bytes/row.
+// With 10 pending rows → predicted 500 + 500 = 1000 < 1024 → false.
+TEST_F(EstimateCollectedEnoughTest, PredictionBelowBudget) {
+    EXPECT_FALSE(estimate_collected_enough(/*present_bytes=*/500, /*present_rows=*/10,
+                                           /*rows_to_merge=*/10,
+                                           /*preferred_block_size_bytes=*/1024));
+}
+
+// Prediction: 500 bytes / 10 rows = 50 bytes/row.
+// With 11 pending rows → predicted 500 * 21 / 10 = 1050 >= 1024 → true.
+TEST_F(EstimateCollectedEnoughTest, PredictionMeetsBudget) {
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/500, /*present_rows=*/10,
+                                          /*rows_to_merge=*/11,
+                                          /*preferred_block_size_bytes=*/1024));
+}
+
+// Zero pending rows: prediction = present_bytes (no pending rows to flush).
+// 500 < 1024 → false.
+TEST_F(EstimateCollectedEnoughTest, ZeroPendingRows) {
+    EXPECT_FALSE(estimate_collected_enough(/*present_bytes=*/500, /*present_rows=*/10,
+                                           /*rows_to_merge=*/0,
+                                           /*preferred_block_size_bytes=*/1024));
+}
+
+// Exact boundary: 512 bytes / 8 rows = 64 bytes/row. With 8 pending rows →
+// predicted 512 * 16 / 8 = 1024 = budget → true.
+TEST_F(EstimateCollectedEnoughTest, ExactBudgetBoundary) {
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/512, /*present_rows=*/8,
+                                          /*rows_to_merge=*/8,
+                                          /*preferred_block_size_bytes=*/1024));
+}
+
+// One below boundary: 512 bytes / 8 rows = 64 bytes/row. With 7 pending rows →
+// predicted 512 * 15 / 8 = 960 < 1024 → false.
+TEST_F(EstimateCollectedEnoughTest, OneBelowBudgetBoundary) {
+    EXPECT_FALSE(estimate_collected_enough(/*present_bytes=*/512, /*present_rows=*/8,
+                                           /*rows_to_merge=*/7,
+                                           /*preferred_block_size_bytes=*/1024));
+}
+
+// Overflow guard: present_bytes close to SIZE_MAX; multiplication would wrap → true.
+TEST_F(EstimateCollectedEnoughTest, OverflowGuardReturnsTrueForHugeBytes) {
+    const size_t huge = std::numeric_limits<size_t>::max() / 2 + 1;
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/huge, /*present_rows=*/1,
+                                          /*rows_to_merge=*/1,
+                                          /*preferred_block_size_bytes=*/1024));
+}
+
+// Large but no overflow: present_bytes * total_rows fits in size_t.
+TEST_F(EstimateCollectedEnoughTest, LargeButNoOverflow) {
+    // 1GB present, 100 rows, 100 pending → 2GB total predicted.
+    const size_t one_gb = 1ULL << 30;
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/one_gb, /*present_rows=*/100,
+                                          /*rows_to_merge=*/100,
+                                          /*preferred_block_size_bytes=*/one_gb + 1));
+}
+
+// Single present row with many pending rows should scale correctly.
+// 100 bytes / 1 row → 100 bytes/row. With 99 pending → predicted 100 * 100 / 1 = 10000 >= 5000.
+TEST_F(EstimateCollectedEnoughTest, SingleRowScalesCorrectly) {
+    EXPECT_TRUE(estimate_collected_enough(/*present_bytes=*/100, /*present_rows=*/1,
+                                          /*rows_to_merge=*/99,
+                                          /*preferred_block_size_bytes=*/5000));
+}
+
+// ============================================================================
+// Part 2: Integration tests — real MutableColumns + estimate_collected_enough
+//
+// These tests exercise the same code path as collected_enough_rows():
+//   present_bytes = Block::columns_byte_size(columns)
+//   present_rows  = columns[0]->size()
+//   → estimate_collected_enough(present_bytes, present_rows, rows_to_merge, budget)
+//
+// Level1Iterator::collected_enough_rows() is a private inner class method, so
+// we replicate its logic here with real columns to verify end-to-end correctness.
+// ============================================================================
+
+class CollectedEnoughWithColumnsTest : public testing::Test {
+protected:
+    void SetUp() override { _saved_adaptive = config::enable_adaptive_batch_size; }
+    void TearDown() override { config::enable_adaptive_batch_size = _saved_adaptive; }
+
+    // Replicate the logic of Level1Iterator::collected_enough_rows() with a
+    // configurable budget, so we can test the column-byte integration path
+    // without instantiating a Level1Iterator (private inner class).
+    static bool collected_enough_rows_sim(const MutableColumns& columns, int rows_to_merge,
+                                          size_t preferred_block_size_bytes) {
+        if (!config::enable_adaptive_batch_size) {
+            return false;
+        }
+        if (preferred_block_size_bytes == 0) {
+            return false;
+        }
+        const auto present_bytes = Block::columns_byte_size(columns);
+        const auto present_rows = columns.empty() ? 0 : columns[0]->size();
+        return estimate_collected_enough(present_bytes, present_rows, rows_to_merge,
+                                         preferred_block_size_bytes);
+    }
+
+    // Build a MutableColumns with N_cols ColumnInt32 columns, each having `nrows` rows.
+    // Each Int32 is 4 bytes → total = 4 * nrows * ncols bytes.
+    static MutableColumns make_int32_columns(size_t ncols, size_t nrows) {
+        MutableColumns cols;
+        for (size_t c = 0; c < ncols; ++c) {
+            auto col = ColumnInt32::create();
+            for (size_t r = 0; r < nrows; ++r) {
+                col->insert_value(static_cast<Int32>(r));
+            }
+            cols.push_back(std::move(col));
+        }
+        return cols;
+    }
+
+    bool _saved_adaptive = false;
+};
+
+// Config disabled → always false regardless of data.
+TEST_F(CollectedEnoughWithColumnsTest, DisabledConfigReturnsFalse) {
+    config::enable_adaptive_batch_size = false;
+    auto cols = make_int32_columns(2, 100);
+    // 2 cols × 100 rows × 4 bytes = 800 bytes; budget = 100 bytes (well below)
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/100));
+}
+
+// Config enabled, budget = 0 → always false.
+TEST_F(CollectedEnoughWithColumnsTest, ZeroBudgetReturnsFalse) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(2, 100);
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/0));
+}
+
+// Empty columns → present_rows = 0 → cannot estimate → false.
+TEST_F(CollectedEnoughWithColumnsTest, EmptyColumnsReturnsFalse) {
+    config::enable_adaptive_batch_size = true;
+    MutableColumns empty;
+    EXPECT_FALSE(collected_enough_rows_sim(empty, 10, /*budget=*/1024));
+}
+
+// Columns with zero rows → present_rows = 0 → false.
+TEST_F(CollectedEnoughWithColumnsTest, ZeroRowColumnsReturnsFalse) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(3, 0);
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 5, /*budget=*/100));
+}
+
+// Single Int32 column, 256 rows → 1024 bytes.
+// Budget = 1024 → already met → true (no pending rows needed).
+TEST_F(CollectedEnoughWithColumnsTest, SingleColumnExactBudgetMet) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(1, 256);
+    // 256 rows × 4 bytes = 1024 bytes
+    EXPECT_EQ(Block::columns_byte_size(cols), 1024);
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 0, /*budget=*/1024));
+}
+
+// Single Int32 column, 255 rows → 1020 bytes < 1024 budget.
+// No pending rows → not enough → false.
+TEST_F(CollectedEnoughWithColumnsTest, SingleColumnBelowBudgetNoPending) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(1, 255);
+    EXPECT_EQ(Block::columns_byte_size(cols), 1020);
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/1024));
+}
+
+// Single Int32 column, 255 rows → 1020 bytes. With 1 pending row:
+// bytes_per_row = 1020/255 = 4, predicted = 1020 * 256 / 255 = 1024 → meets budget.
+TEST_F(CollectedEnoughWithColumnsTest, SingleColumnBelowBudgetWithPending) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(1, 255);
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 1, /*budget=*/1024));
+}
+
+// Multi-column: 4 Int32 columns × 100 rows = 1600 bytes.
+// Budget = 2000 → not met. With 25 pending rows:
+// predicted = 1600 * 125 / 100 = 2000 → meets budget.
+TEST_F(CollectedEnoughWithColumnsTest, MultiColumnPredictionMeetsBudget) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(4, 100);
+    EXPECT_EQ(Block::columns_byte_size(cols), 1600);
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/2000));
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 25, /*budget=*/2000));
+}
+
+// Multi-column: just one row below the prediction threshold.
+// 4 cols × 100 rows = 1600 bytes, 24 pending rows:
+// predicted = 1600 * 124 / 100 = 1984 < 2000 → false.
+TEST_F(CollectedEnoughWithColumnsTest, MultiColumnOneBelowPrediction) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(4, 100);
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 24, /*budget=*/2000));
+}
+
+// Variable-width column: ColumnString with known data sizes.
+// ColumnString::byte_size() = chars_size + offsets_size.
+// Each offset is sizeof(IColumn::Offset) = 8 bytes (64-bit).
+TEST_F(CollectedEnoughWithColumnsTest, StringColumnByteSizeIntegration) {
+    config::enable_adaptive_batch_size = true;
+
+    auto str_col = ColumnString::create();
+    // Insert 10 strings of 10 chars each → chars = 100 bytes, offsets = 10 * 8 = 80.
+    // Total byte_size = 180 bytes.
+    for (int i = 0; i < 10; ++i) {
+        std::string s(10, 'A' + (i % 26));
+        str_col->insert_data(s.data(), s.size());
+    }
+
+    const size_t expected_bytes = 10 * 10 + 10 * sizeof(IColumn::Offset);
+    EXPECT_EQ(str_col->byte_size(), expected_bytes);
+
+    MutableColumns cols;
+    cols.push_back(std::move(str_col));
+
+    // Budget = expected_bytes → met → true.
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 0, /*budget=*/expected_bytes));
+    // Budget = expected_bytes + 1 → not met with 0 pending → false.
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/expected_bytes + 1));
+}
+
+// Mixed columns: Int32 + String together.
+TEST_F(CollectedEnoughWithColumnsTest, MixedColumnTypes) {
+    config::enable_adaptive_batch_size = true;
+
+    auto int_col = ColumnInt32::create();
+    auto str_col = ColumnString::create();
+    for (int i = 0; i < 50; ++i) {
+        int_col->insert_value(static_cast<Int32>(i));
+        std::string s(20, 'x');
+        str_col->insert_data(s.data(), s.size());
+    }
+    // Int32: 50 × 4 = 200 bytes
+    // String: 50 × 20 chars + 50 × 8 offsets = 1000 + 400 = 1400 bytes
+    // Total: 1600 bytes
+    const size_t int_bytes = 50 * sizeof(Int32);
+    const size_t str_bytes = 50 * 20 + 50 * sizeof(IColumn::Offset);
+    EXPECT_EQ(int_col->byte_size(), int_bytes);
+    EXPECT_EQ(str_col->byte_size(), str_bytes);
+
+    MutableColumns cols;
+    cols.push_back(std::move(int_col));
+    cols.push_back(std::move(str_col));
+
+    const size_t total = int_bytes + str_bytes;
+    EXPECT_EQ(Block::columns_byte_size(cols), total);
+
+    // Budget met exactly → true.
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 0, /*budget=*/total));
+    // Budget slightly above → not met with 0 pending → false.
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/total + 1));
+    // With 1 pending row: predicted = total * 51 / 50 = total + total/50.
+    // So meets budget = total + total/50.
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 1, /*budget=*/total + total / 50));
+}
+
+// Large number of rows to verify no integer issues with real column byte sizes.
+TEST_F(CollectedEnoughWithColumnsTest, LargeRowCountInt32) {
+    config::enable_adaptive_batch_size = true;
+    auto cols = make_int32_columns(1, 100000);
+    // 100000 × 4 = 400000 bytes
+    EXPECT_EQ(Block::columns_byte_size(cols), 400000);
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 0, /*budget=*/400000));
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 0, /*budget=*/400001));
+    // With 1 pending: predicted = 400000 * 100001 / 100000 = 400004 < 400005 → false
+    EXPECT_FALSE(collected_enough_rows_sim(cols, 1, /*budget=*/400005));
+    // predicted = 400004 >= 400004 → true
+    EXPECT_TRUE(collected_enough_rows_sim(cols, 1, /*budget=*/400004));
+}
+
+// ============================================================================
+// Part 3: BlockReader.preferred_block_size_bytes() override tests
+//
+// BlockReader overrides TabletReader::preferred_block_size_bytes() to gate on
+// config::enable_adaptive_batch_size. These tests verify that behavior and
+// ensure collected_enough_rows() would receive the correct budget value.
+// ============================================================================
+
+class BlockReaderByteBudgetTest : public testing::Test {
+protected:
+    void SetUp() override { _saved_adaptive = config::enable_adaptive_batch_size; }
+    void TearDown() override { config::enable_adaptive_batch_size = _saved_adaptive; }
+    bool _saved_adaptive = false;
+};
+
+// When adaptive is enabled, preferred_block_size_bytes() returns the configured value.
+TEST_F(BlockReaderByteBudgetTest, ReturnsConfiguredBytesWhenEnabled) {
+    config::enable_adaptive_batch_size = true;
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = 65536;
+    EXPECT_EQ(reader.preferred_block_size_bytes(), 65536);
+}
+
+// When adaptive is disabled, preferred_block_size_bytes() returns 0 regardless.
+TEST_F(BlockReaderByteBudgetTest, ReturnsZeroWhenDisabled) {
+    config::enable_adaptive_batch_size = false;
+    BlockReader reader;
+    reader._reader_context.preferred_block_size_bytes = 65536;
+    EXPECT_EQ(reader.preferred_block_size_bytes(), 0);
+}
+
+// Default value of preferred_block_size_bytes in reader context is 8MB.
+TEST_F(BlockReaderByteBudgetTest, DefaultIs8MB) {
+    config::enable_adaptive_batch_size = true;
+    BlockReader reader;
+    EXPECT_EQ(reader.preferred_block_size_bytes(), 8388608UL);
+}
+
+// Virtual dispatch: BlockReader override is reachable through a TabletReader pointer.
+TEST_F(BlockReaderByteBudgetTest, VirtualDispatchThroughTabletReaderPtr) {
+    config::enable_adaptive_batch_size = true;
+    BlockReader concrete;
+    concrete._reader_context.preferred_block_size_bytes = 99999;
+    TabletReader* base = &concrete;
+    // Through the virtual dispatch, BlockReader's override should be called.
+    EXPECT_EQ(base->preferred_block_size_bytes(), 99999);
+}
+
+} // namespace doris
diff --git a/be/test/storage/segment/adaptive_block_size_predictor_test.cpp b/be/test/storage/segment/adaptive_block_size_predictor_test.cpp
new file mode 100644
index 00000000000000..60b6f37b8ceeba
--- /dev/null
+++ b/be/test/storage/segment/adaptive_block_size_predictor_test.cpp
@@ -0,0 +1,357 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "storage/segment/adaptive_block_size_predictor.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+#include "common/config.h"
+#include "core/block/block.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/olap_common.h"
+
+namespace doris {
+
+// ── helper functions ──────────────────────────────────────────────────────────
+
+// Build a Block with N rows, each containing a single Int32 column of the given value.
+static Block make_int32_block(size_t rows, int32_t value = 42) {
+    auto col = ColumnVector<TYPE_INT>::create();
+    col->reserve(rows);
+    for (size_t i = 0; i < rows; ++i) {
+        col->insert_value(value);
+    }
+    Block block;
+    block.insert({std::move(col), std::make_shared<DataTypeInt32>(), "c0"});
+    return block;
+}
+
+// Build a Block with N rows where each row holds a string of |str_len| bytes.
+static Block make_string_block(size_t rows, size_t str_len) {
+    auto col = ColumnString::create();
+    col->reserve(rows);
+    std::string s(str_len, 'x');
+    for (size_t i = 0; i < rows; ++i) {
+        col->insert_data(s.data(), s.size());
+    }
+    Block block;
+    block.insert({std::move(col), std::make_shared<DataTypeString>(), "c0"});
+    return block;
+}
+
+// ── AdaptiveBlockSizePredictorTest ───────────────────────────────────────────
+
+class AdaptiveBlockSizePredictorTest : public testing::Test {
+protected:
+    // 8 MB target
+    static constexpr size_t kBlockBytes = 8 * 1024 * 1024;
+    static constexpr size_t kMaxRows = 4096;
+};
+
+// ── Test 1: no history ────────────────────────────────────────────────────────
+// Before any update, has_history == false and bytes_per_row == 0.
+// After the first update, has_history == true and bytes_per_row == block.bytes()/rows.
+TEST_F(AdaptiveBlockSizePredictorTest, NoHistoryReturnsMaxRows) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // Initially no history.
+    EXPECT_FALSE(pred.has_history_for_test());
+    EXPECT_DOUBLE_EQ(pred.bytes_per_row_for_test(), 0.0);
+
+    // After one update the first sample is stored directly (no EWMA blending).
+    Block blk = make_int32_block(100);
+    std::vector<ColumnId> cols = {0};
+    pred.update(blk);
+
+    EXPECT_TRUE(pred.has_history_for_test());
+    double expected_bpr = static_cast<double>(blk.bytes()) / 100.0;
+    EXPECT_DOUBLE_EQ(pred.bytes_per_row_for_test(), expected_bpr);
+}
+
+// ── Test 2: EWMA convergence ──────────────────────────────────────────────────
+// When every update delivers the same sample, the EWMA stays exactly at that
+// value (0.9*v + 0.1*v == v for any v).
+TEST_F(AdaptiveBlockSizePredictorTest, EwmaConvergence) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    std::vector<ColumnId> cols = {0};
+
+    // Compute expected bytes-per-row from an actual block so the test does not
+    // hard-code internal column memory layout assumptions.
+    Block probe = make_string_block(100, 100);
+    double expected_bpr = static_cast<double>(probe.bytes()) / 100.0;
+
+    // First update seeds the EWMA directly.
+    pred.update(probe);
+    EXPECT_DOUBLE_EQ(pred.bytes_per_row_for_test(), expected_bpr);
+
+    // All subsequent updates carry the same sample → EWMA stays constant.
+    for (int i = 1; i < 50; ++i) {
+        Block blk = make_string_block(100, 100);
+        pred.update(blk);
+    }
+    EXPECT_NEAR(pred.bytes_per_row_for_test(), expected_bpr, 0.01);
+}
+
+// ── Test 4: zero rows block is ignored ───────────────────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, ZeroRowsBlockIgnored) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // update() with an empty block must be a no-op.
+    Block blk = make_int32_block(0);
+    std::vector<ColumnId> cols = {0};
+    pred.update(blk);
+
+    EXPECT_FALSE(pred.has_history_for_test());
+    EXPECT_DOUBLE_EQ(pred.bytes_per_row_for_test(), 0.0);
+
+    // A subsequent real update must still work normally.
+    Block blk2 = make_int32_block(50);
+    pred.update(blk2);
+    EXPECT_TRUE(pred.has_history_for_test());
+    double expected_bpr = static_cast<double>(blk2.bytes()) / 50.0;
+    EXPECT_DOUBLE_EQ(pred.bytes_per_row_for_test(), expected_bpr);
+}
+
+// ── Test 5: disabled when preferred_block_size_bytes == 0 ────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, DisabledWhenBlockSizeIsZero) {
+    AdaptiveBlockSizePredictor pred(0, 0.0);
+
+    Block blk = make_int32_block(1000);
+    std::vector<ColumnId> cols = {0};
+    pred.update(blk);
+
+    // update() still records history even when budget == 0.
+    EXPECT_TRUE(pred.has_history_for_test());
+    EXPECT_GT(pred.bytes_per_row_for_test(), 0.0);
+}
+
+// ── Test 6: config flag disables predictor ────────────────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, ConfigDefaultEnabled) {
+    EXPECT_TRUE(config::enable_adaptive_batch_size);
+}
+
+// ── predict_next_rows tests ──────────────────────────────────────────────────
+
+// ── Test: _block_size_bytes == 0 returns block_size_rows ─────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictReturnsBlockSizeRowsWhenDisabled) {
+    AdaptiveBlockSizePredictor pred(0, 0.0);
+
+    EXPECT_EQ(pred.predict_next_rows(), pred.block_size_rows_for_test());
+
+    // Even after update, still returns block_size_rows because block_size_bytes == 0.
+    Block blk = make_int32_block(100);
+    std::vector<ColumnId> cols = {0};
+    pred.update(blk);
+    EXPECT_EQ(pred.predict_next_rows(), pred.block_size_rows_for_test());
+}
+
+// ── Test: no history, no metadata hint → probe_rows ────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryNoHint) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    EXPECT_EQ(pred.predict_next_rows(), pred.probe_rows_for_test());
+}
+
+// ── Test: no history fallback is also bounded by the first-batch safety threshold
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryNoHintUsesSafetyThreshold) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    EXPECT_EQ(pred.predict_next_rows(), pred.probe_rows_for_test());
+}
+
+// ── Test: no history, metadata hint computed successfully ────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryMetadataHint) {
+    // Simulate: 1 column, 400000 raw bytes in a 1000-row segment.
+    // bytes_per_row_hint = (400000 / 1000) * 1.2 = 480.0
+    // predicted = 8MB / 480.0 = 17476
+    double hint_bpr = (400000.0 / 1000.0) * 1.2; // 480.0
+    AdaptiveBlockSizePredictor pred(kBlockBytes, hint_bpr);
+
+    size_t result = pred.predict_next_rows();
+
+    size_t expected = static_cast<size_t>(static_cast<double>(kBlockBytes) / hint_bpr);
+    // No history: probe_rows clamps the result.
+    expected = std::min(expected, pred.probe_rows_for_test());
+    EXPECT_EQ(result, expected);
+}
+
+// ── Test: no history metadata hint is bounded by the first-batch safety threshold
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryMetadataHintUsesSafetyThreshold) {
+    double hint_bpr = (400000.0 / 1000.0) * 1.2; // 480.0
+    AdaptiveBlockSizePredictor pred(kBlockBytes, hint_bpr);
+
+    EXPECT_EQ(pred.predict_next_rows(), pred.probe_rows_for_test());
+}
+
+// ── Test: no history, second call reuses same hint ───────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryCachedHint) {
+    double hint_bpr = (400000.0 / 1000.0) * 1.2;
+    AdaptiveBlockSizePredictor pred(kBlockBytes, hint_bpr);
+
+    size_t first = pred.predict_next_rows();
+    size_t second = pred.predict_next_rows();
+
+    EXPECT_EQ(first, second);
+}
+
+// ── Test: has history, uses EWMA bytes_per_row ──────────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictWithHistory) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // Inject history: 100 bytes per row.
+    pred.set_has_history_for_test(true, 100.0);
+
+    size_t result = pred.predict_next_rows();
+    // predicted = 8MB / 100.0 = 83886, clamped to block_size_rows = 65535.
+    EXPECT_EQ(result, pred.block_size_rows_for_test());
+}
+
+// ── Test: has history, predicted < block_size_rows (no clamping at upper bound) ────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictWithHistoryNoClamping) {
+    // 8 KB target, not 8 MB, so predicted is small.
+    AdaptiveBlockSizePredictor pred(8 * 1024, 0.0);
+
+    // 100 bytes per row → predicted = 8192 / 100 = 81.
+    pred.set_has_history_for_test(true, 100.0);
+
+    size_t result = pred.predict_next_rows();
+    EXPECT_EQ(result, 81u);
+}
+
+// ── Test: predicted > block_size_rows → clamped to block_size_rows ─────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictClampedToBlockSizeRows) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // 1 byte/row → predicted = 8MB / 1 = 8388608 >> block_size_rows.
+    pred.set_has_history_for_test(true, 1.0);
+
+    EXPECT_EQ(pred.predict_next_rows(), pred.block_size_rows_for_test());
+}
+
+// ── Test: huge bytes_per_row → predicted < 1 → clamped to 1 ────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictClampedToOne) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // bytes_per_row so large that predicted rounds to 0.
+    pred.set_has_history_for_test(true, static_cast<double>(kBlockBytes) * 10.0);
+
+    EXPECT_EQ(pred.predict_next_rows(), 1u);
+}
+
+// ── Test: metadata hint with multiple columns ───────────────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, PredictNoHistoryMultiColumnMetadata) {
+    // Simulate: 2 columns, uid=10 with 200000B, uid=20 with 600000B, 1000 rows.
+    // total_bytes = 800000, hint_bpr = (800000/1000) * 1.2 = 960.0
+    double hint_bpr = (800000.0 / 1000.0) * 1.2; // 960.0
+    AdaptiveBlockSizePredictor pred(kBlockBytes, hint_bpr);
+
+    size_t result = pred.predict_next_rows();
+    size_t expected = static_cast<size_t>(static_cast<double>(kBlockBytes) / hint_bpr);
+    // No history: probe_rows clamps the result.
+    expected = std::min(expected, pred.probe_rows_for_test());
+    EXPECT_EQ(result, expected);
+}
+
+// ── Test: no hint (simulates empty segment with 0 rows) ─────────────────────
+TEST_F(AdaptiveBlockSizePredictorTest, ConstructorHandlesNoHint) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    // No hint available → falls back to the default first-batch probe limit.
+    EXPECT_EQ(pred.predict_next_rows(), pred.probe_rows_for_test());
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, PredictUsesCustomProbeRowsWithoutHint) {
+    constexpr size_t custom_probe_rows = 128;
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0, custom_probe_rows);
+
+    EXPECT_EQ(pred.probe_rows_for_test(), custom_probe_rows);
+    EXPECT_EQ(pred.predict_next_rows(), custom_probe_rows);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, PredictUsesCustomProbeRowsWithHint) {
+    constexpr size_t custom_probe_rows = 128;
+    double hint_bpr = 1.0;
+    AdaptiveBlockSizePredictor pred(kBlockBytes, hint_bpr, custom_probe_rows);
+
+    EXPECT_EQ(pred.predict_next_rows(), custom_probe_rows);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, PredictProbeRowsZeroFallsBackToOne) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0, 0);
+
+    EXPECT_EQ(pred.probe_rows_for_test(), 0u);
+    EXPECT_EQ(pred.predict_next_rows(), 1u);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, PredictProbeRowsOneWorks) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0, 1);
+
+    EXPECT_EQ(pred.predict_next_rows(), 1u);
+}
+
+// ── batch_size tests ────────────────────────────────────────────────────────
+
+TEST_F(AdaptiveBlockSizePredictorTest, DefaultBlockSizeRows) {
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0);
+
+    EXPECT_EQ(pred.block_size_rows_for_test(),
+              AdaptiveBlockSizePredictor::default_block_size_rows_for_test());
+    EXPECT_EQ(pred.block_size_rows_for_test(), 65535u);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, CustomBlockSizeRows) {
+    constexpr size_t custom_rows = 1024;
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0, AdaptiveBlockSizePredictor::kDefaultProbeRows,
+                                    custom_rows);
+
+    EXPECT_EQ(pred.block_size_rows_for_test(), custom_rows);
+
+    // With history: 1 byte/row → predicted = 8MB, clamped to custom_rows = 1024.
+    pred.set_has_history_for_test(true, 1.0);
+    EXPECT_EQ(pred.predict_next_rows(), custom_rows);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, BlockSizeRowsClampsWithHistory) {
+    constexpr size_t custom_rows = 500;
+    AdaptiveBlockSizePredictor pred(kBlockBytes, 0.0, AdaptiveBlockSizePredictor::kDefaultProbeRows,
+                                    custom_rows);
+
+    // 100 bytes/row → predicted = 8MB/100 = 83886, clamped to custom_rows = 500.
+    pred.set_has_history_for_test(true, 100.0);
+    EXPECT_EQ(pred.predict_next_rows(), custom_rows);
+}
+
+TEST_F(AdaptiveBlockSizePredictorTest, BlockSizeRowsDoesNotAffectSmallPrediction) {
+    constexpr size_t custom_rows = 10000;
+    // 8 KB target, custom block_size_rows = 10000.
+    AdaptiveBlockSizePredictor pred(8 * 1024, 0.0, AdaptiveBlockSizePredictor::kDefaultProbeRows,
+                                    custom_rows);
+
+    // 100 bytes/row → predicted = 8192/100 = 81 < custom_rows.
+    pred.set_has_history_for_test(true, 100.0);
+    EXPECT_EQ(pred.predict_next_rows(), 81u);
+}
+
+} // namespace doris
diff --git a/be/test/storage/segment/mock/mock_segment.h b/be/test/storage/segment/mock/mock_segment.h
index 211b48c29da5bd..6c715a53dee612 100644
--- a/be/test/storage/segment/mock/mock_segment.h
+++ b/be/test/storage/segment/mock/mock_segment.h
@@ -56,6 +56,10 @@ class MockSegment : public Segment {
 
     void set_footer(std::shared_ptr<SegmentFooterPB> footer) { _footer = footer; }
 
+    void set_column_raw_data_bytes(int32_t uid, uint64_t bytes) {
+        _column_uid_to_raw_bytes[uid] = bytes;
+    }
+
     std::shared_ptr<SegmentFooterPB> get_footer() const { return _footer; }
 
     std::shared_ptr<SegmentFooterPB> _footer;
diff --git a/be/test/storage/segment/segment_column_raw_data_bytes_test.cpp b/be/test/storage/segment/segment_column_raw_data_bytes_test.cpp
new file mode 100644
index 00000000000000..85203076443ee5
--- /dev/null
+++ b/be/test/storage/segment/segment_column_raw_data_bytes_test.cpp
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "storage/segment/mock/mock_segment.h"
+
+namespace doris::segment_v2 {
+
+class SegmentColumnRawDataBytesTest : public testing::Test {};
+
+TEST_F(SegmentColumnRawDataBytesTest, ReturnsZeroForUnknownColumn) {
+    MockSegment seg;
+    EXPECT_EQ(seg.column_raw_data_bytes(999), 0);
+}
+
+TEST_F(SegmentColumnRawDataBytesTest, ReturnsSetValue) {
+    MockSegment seg;
+    seg.set_column_raw_data_bytes(1, 1024);
+    seg.set_column_raw_data_bytes(2, 2048);
+
+    EXPECT_EQ(seg.column_raw_data_bytes(1), 1024);
+    EXPECT_EQ(seg.column_raw_data_bytes(2), 2048);
+}
+
+TEST_F(SegmentColumnRawDataBytesTest, OverwritesPreviousValue) {
+    MockSegment seg;
+    seg.set_column_raw_data_bytes(1, 100);
+    EXPECT_EQ(seg.column_raw_data_bytes(1), 100);
+
+    seg.set_column_raw_data_bytes(1, 200);
+    EXPECT_EQ(seg.column_raw_data_bytes(1), 200);
+}
+
+TEST_F(SegmentColumnRawDataBytesTest, HandlesMultipleColumns) {
+    MockSegment seg;
+    for (int32_t uid = 0; uid < 50; uid++) {
+        seg.set_column_raw_data_bytes(uid, uid * 1000);
+    }
+    for (int32_t uid = 0; uid < 50; uid++) {
+        EXPECT_EQ(seg.column_raw_data_bytes(uid), uid * 1000);
+    }
+}
+
+TEST_F(SegmentColumnRawDataBytesTest, HandlesLargeByteValues) {
+    MockSegment seg;
+    uint64_t large_value = 1ULL << 40; // 1 TB
+    seg.set_column_raw_data_bytes(1, large_value);
+    EXPECT_EQ(seg.column_raw_data_bytes(1), large_value);
+}
+
+} // namespace doris::segment_v2
diff --git a/be/test/storage/test_data/tablet_meta_test.hdr b/be/test/storage/test_data/tablet_meta_test.hdr
new file mode 100644
index 00000000000000..017dbd8e748e0d
Binary files /dev/null and b/be/test/storage/test_data/tablet_meta_test.hdr differ
diff --git a/be/test/testutil/mock/mock_runtime_state.h b/be/test/testutil/mock/mock_runtime_state.h
index e67e7c45ff2c0b..5e05ce8cf8a1f0 100644
--- a/be/test/testutil/mock/mock_runtime_state.h
+++ b/be/test/testutil/mock/mock_runtime_state.h
@@ -73,6 +73,19 @@ class MockRuntimeState : public RuntimeState {
 
     bool enable_use_hybrid_sort() const override { return false; }
 
+    // Bypass the [1MB, 512MB] clamping in RuntimeState so tests can use tiny
+    // byte budgets (e.g. 1 or 50) to exercise block-splitting logic.
+    // When adaptive is disabled, fall back to RuntimeState's behavior (kMax)
+    // so the value is always a legal byte budget; tests should gate on
+    // config::enable_adaptive_batch_size directly to detect the disabled state.
+    size_t preferred_block_size_bytes() const override {
+        if (config::enable_adaptive_batch_size &&
+            _query_options.__isset.preferred_block_size_bytes) {
+            return _query_options.preferred_block_size_bytes;
+        }
+        return RuntimeState::preferred_block_size_bytes();
+    }
+
     // default batch size
     int _batch_size = 4096;
     bool _enable_shared_exchange_sink_buffer = true;
diff --git a/be/test/util/profile_spec_test.cpp b/be/test/util/profile_spec_test.cpp
index a97816a83ecd15..9d2561416120b7 100644
--- a/be/test/util/profile_spec_test.cpp
+++ b/be/test/util/profile_spec_test.cpp
@@ -21,8 +21,11 @@
 #include <gtest/gtest.h>
 
 #include "common/object_pool.h"
+#include "core/column/column_string.h"
+#include "core/data_type/data_type_string.h"
 #include "exec/operator/exchange_sink_operator.h"
 #include "exec/operator/mock_operator.h"
+#include "exec/operator/mock_scan_operator.h"
 #include "exec/operator/operator.h"
 #include "runtime/descriptors.h"
 #include "runtime/runtime_state.h"
@@ -62,6 +65,26 @@ class ProfileSpecTest : public testing::Test {
         sink.__set_dest_node_id(1);
     }
 
+protected:
+    template <typename Operator>
+    void init_source_local_state(MockRuntimeState* runtime_state, Operator* op,
+                                 RuntimeProfile* parent_profile) {
+        const auto max_operator_id = op->operator_id() - 1;
+        runtime_state->resize_op_id_to_local_state(max_operator_id);
+        runtime_state->set_max_operator_id(max_operator_id);
+        LocalStateInfo info {parent_profile, {}, nullptr, {}, 0};
+        ASSERT_TRUE(op->setup_local_state(runtime_state, info).ok());
+    }
+
+    Block make_string_block(std::string value) {
+        auto col = ColumnString::create();
+        col->insert_data(value.data(), value.size());
+        Block block;
+        block.insert(
+                ColumnWithTypeAndName(std::move(col), std::make_shared<DataTypeString>(), "c0"));
+        return block;
+    }
+
 private:
     class MockOperatorX : public OperatorX<MockLocalState> {
     public:
@@ -77,13 +100,27 @@ class ProfileSpecTest : public testing::Test {
             return Status::OK();
         }
     };
-    class MockRuntimeState : public RuntimeState {
+    class ProducingMockOperatorX : public OperatorX<MockLocalState> {
     public:
-        MockRuntimeState() = default;
+        ProducingMockOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id,
+                               const DescriptorTbl& descs)
+                : OperatorX<MockLocalState>(pool, tnode, operator_id, descs) {
+            _op_name = "MOCK_OPERATOR";
+        }
 
-        MOCK_CONST_METHOD0(enable_local_merge_sort, bool());
-    };
+        void set_output_block(Block block) { _block = std::move(block); }
+
+        Status prepare(RuntimeState* state) override { return Status::OK(); }
+        Status close(RuntimeState* state) override { return Status::OK(); }
+        Status get_block(RuntimeState* state, Block* block, bool* eos) override {
+            *eos = true;
+            block->swap(_block);
+            return Status::OK();
+        }
 
+    private:
+        Block _block;
+    };
     std::unique_ptr<ObjectPool> obj_pool = std::make_unique<ObjectPool>();
     TTableDescriptor tbl_desc;
     TScalarType scalar_type;
@@ -110,8 +147,8 @@ TEST_F(ProfileSpecTest, SourceOperatorNameSuffixTest1) {
 
     MockOperatorX op(obj_pool.get(), tnode, 1, *descs);
 
-    RuntimeState* runtime_state = nullptr;
-    auto local_state = std::make_unique<MockLocalState>(runtime_state, &op);
+    auto runtime_state = std::make_unique<MockRuntimeState>();
+    auto local_state = std::make_unique<MockLocalState>(runtime_state.get(), &op);
     ASSERT_EQ(local_state->name_suffix(), "(id=1)");
 }
 
@@ -127,8 +164,8 @@ TEST_F(ProfileSpecTest, SourceOperatorNameSuffixTest2) {
 
     MockOperatorX op(obj_pool.get(), tnode, 1, *descs);
     op._nereids_id = 100;
-    RuntimeState* runtime_state = nullptr;
-    auto local_state = std::make_unique<MockLocalState>(runtime_state, &op);
+    auto runtime_state = std::make_unique<MockRuntimeState>();
+    auto local_state = std::make_unique<MockLocalState>(runtime_state.get(), &op);
     ASSERT_EQ(local_state->name_suffix(), "(nereids_id=100)(id=1)");
 }
 
@@ -177,4 +214,30 @@ TEST_F(ProfileSpecTest, CommonCountersCustomCounters) {
     ASSERT_TRUE(local_state->operator_profile()->get_child("CommonCounters") != nullptr);
 }
 
+TEST_F(ProfileSpecTest, ScanSourceOperatorUpdatesOutputBlockByteCounters) {
+    MockScanOperatorX op;
+    std::unique_ptr<MockRuntimeState> runtime_state = std::make_unique<MockRuntimeState>();
+    RuntimeProfile parent_profile("parent");
+    init_source_local_state(runtime_state.get(), &op, &parent_profile);
+
+    Block expected = make_string_block("scan-output");
+    const auto expected_bytes = static_cast<int64_t>(expected.bytes());
+    op.set_output_block(std::move(expected));
+
+    Block output;
+    bool eos = false;
+    ASSERT_TRUE(op.get_block_after_projects(runtime_state.get(), &output, &eos).ok());
+    ASSERT_TRUE(eos);
+
+    auto* local_state = runtime_state->get_local_state(op.operator_id());
+    EXPECT_EQ(local_state->common_profile()->get_counter("RowsProduced")->value(), 1);
+    EXPECT_EQ(local_state->common_profile()->get_counter("BlocksProduced")->value(), 1);
+    EXPECT_EQ(local_state->common_profile()->get_counter("OutputBlockBytes")->value(),
+              expected_bytes);
+    EXPECT_EQ(local_state->common_profile()->get_counter("MaxOutputBlockBytes")->value(),
+              expected_bytes);
+    EXPECT_EQ(local_state->common_profile()->get_counter("MinOutputBlockBytes")->value(),
+              expected_bytes);
+}
+
 } // namespace doris
diff --git a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
index e4627c56f57b2e..85655b56016c60 100644
--- a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
+++ b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
@@ -168,7 +168,7 @@ public int getNext() throws IOException {
                 ArrayWritable value = reader.createValue();
                 long startTime = System.nanoTime();
                 int numRows = 0;
-                for (; numRows < fetchSize; numRows++) {
+                for (; numRows < batchSize; numRows++) {
                     if (!reader.next(key, value)) {
                         break;
                     }
diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java
index 8bb8a664ccf516..4e2e23b5ae0851 100644
--- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java
+++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java
@@ -78,6 +78,10 @@ protected int getBatchSize() {
         return batchSize;
     }
 
+    public void setBatchSize(int batchSize) {
+        this.batchSize = batchSize;
+    }
+
     public VectorTable getTable() {
         return vectorTable;
     }
diff --git a/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java b/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java
index 97a5ad5ef3895b..74683955411801 100644
--- a/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java
+++ b/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java
@@ -61,4 +61,46 @@ public void testMockJniScanner() throws IOException {
         scanner.releaseTable();
         scanner.close();
     }
+
+    @Test
+    public void testSetBatchSize() throws IOException {
+        OffHeap.setTesting();
+        MockJniScanner scanner = new MockJniScanner(16, new HashMap<String, String>() {
+            {
+                put("mock_rows", "64");
+                put("required_fields", "int");
+                put("columns_types", "int");
+            }
+        });
+        scanner.open();
+
+        // First batch: batchSize = 16
+        long metaAddress = scanner.getNextBatchMeta();
+        Assert.assertNotEquals(0, metaAddress);
+        Assert.assertEquals(16, OffHeap.getLong(null, metaAddress));
+        scanner.resetTable();
+
+        // Change batch size to 32
+        scanner.setBatchSize(32);
+        Assert.assertEquals(32, scanner.getBatchSize());
+
+        // Second batch: should read 32 rows with updated batchSize
+        metaAddress = scanner.getNextBatchMeta();
+        Assert.assertNotEquals(0, metaAddress);
+        Assert.assertEquals(32, OffHeap.getLong(null, metaAddress));
+        scanner.resetTable();
+
+        // Third batch: only 16 rows remaining
+        metaAddress = scanner.getNextBatchMeta();
+        Assert.assertNotEquals(0, metaAddress);
+        Assert.assertEquals(16, OffHeap.getLong(null, metaAddress));
+        scanner.resetTable();
+
+        // EOF
+        metaAddress = scanner.getNextBatchMeta();
+        Assert.assertEquals(0, metaAddress);
+
+        scanner.releaseTable();
+        scanner.close();
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 9576f729584a5f..d8eddbd616bbfd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -162,6 +162,7 @@ public class SessionVariable implements Serializable, Writable {
     public static final int MIN_EXEC_MEM_LIMIT = 2097152;
     public static final String BATCH_SIZE = "batch_size";
     public static final String BROKER_LOAD_BATCH_SIZE = "broker_load_batch_size";
+    public static final String PREFERRED_BLOCK_SIZE_BYTES = "preferred_block_size_bytes";
     public static final String DISABLE_STREAMING_PREAGGREGATIONS = "disable_streaming_preaggregations";
     public static final String ENABLE_DISTINCT_STREAMING_AGGREGATION = "enable_distinct_streaming_aggregation";
     public static final String ENABLE_STREAMING_AGG_HASH_JOIN_FORCE_PASSTHROUGH =
@@ -1274,7 +1275,8 @@ public void checkQuerySlotCount(String slotCnt) {
     @VariableMgr.VarAttr(name = HAVE_QUERY_CACHE, flag = VariableMgr.READ_ONLY)
     public boolean haveQueryCache = false;
 
-    // 4096 minus 16 + 16 bytes padding that in padding pod array
+    // 8192 minus 16 + 16 bytes padding that in padding pod array.
+    // This remains the row cap for output blocks even when adaptive byte budgeting is enabled.
     @VariableMgr.VarAttr(name = BATCH_SIZE, fuzzy = true, checker = "checkBatchSize", needForward = true)
     public int batchSize = 8160;
 
@@ -1282,7 +1284,18 @@ public void checkQuerySlotCount(String slotCnt) {
     @VariableMgr.VarAttr(name = BROKER_LOAD_BATCH_SIZE, fuzzy = true, checker = "checkBatchSize")
     public int brokerLoadBatchSize = 16352;
 
+    // Target output block size in bytes for adaptive batch size.
+    // Valid range: [1MB, 512MB]. Default 8MB.
+    @VariableMgr.VarAttr(name = PREFERRED_BLOCK_SIZE_BYTES, needForward = true,
+            checker = "checkPreferredBlockSizeBytes",
+            description = {"目标输出 Block 字节数上限，自适应 batch size 功能使用。"
+                    + "范围 [1MB, 512MB]，默认 8MB",
+                "Target output block size in bytes for adaptive batch size. "
+                    + "Range [1MB, 512MB]. Default 8MB."})
+    public long preferredBlockSizeBytes = 8388608L; // 8MB
+
     @VariableMgr.VarAttr(name = DISABLE_STREAMING_PREAGGREGATIONS, fuzzy = true)
+
     public boolean disableStreamPreaggregations = false;
 
     @VariableMgr.VarAttr(name = ENABLE_DISTINCT_STREAMING_AGGREGATION, fuzzy = true)
@@ -5269,6 +5282,7 @@ public TQueryOptions toThrift() {
         tResult.setEnableShareHashTableForBroadcastJoin(enableShareHashTableForBroadcastJoin);
 
         tResult.setBatchSize(batchSize);
+        tResult.setPreferredBlockSizeBytes(preferredBlockSizeBytes);
         tResult.setDisableStreamPreaggregations(disableStreamPreaggregations);
         tResult.setEnableDistinctStreamingAggregation(enableDistinctStreamingAggregation);
         tResult.setEnableStreamingAggHashJoinForcePassthrough(enableStreamingAggHashJoinForcePassthrough);
@@ -5944,6 +5958,20 @@ public void checkBatchSize(String batchSize) {
         }
     }
 
+
+    private static final long PREFERRED_BLOCK_SIZE_BYTES_MIN = 1048576L;      // 1MB
+    private static final long PREFERRED_BLOCK_SIZE_BYTES_MAX = 536870912L;    // 512MB
+
+    public void checkPreferredBlockSizeBytes(String value) {
+        long v = Long.parseLong(value);
+        if (v < PREFERRED_BLOCK_SIZE_BYTES_MIN || v > PREFERRED_BLOCK_SIZE_BYTES_MAX) {
+            throw new InvalidParameterException(
+                    "preferred_block_size_bytes should be between 1MB ("
+                    + PREFERRED_BLOCK_SIZE_BYTES_MIN + ") and 512MB ("
+                    + PREFERRED_BLOCK_SIZE_BYTES_MAX + "), got " + v);
+        }
+    }
+
     public void checkSkewRewriteAggBucketNum(String bucketNumStr) {
         try {
             long bucketNum = Long.parseLong(bucketNumStr);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java
index 7004e85b5f2037..2cc9d43b31167b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java
@@ -35,6 +35,7 @@
 import org.apache.doris.nereids.trees.plans.commands.SetOptionsCommand;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.thrift.TQueryOptions;
 import org.apache.doris.utframe.UtFrameUtils;
 
 import org.apache.commons.io.FileUtils;
@@ -191,4 +192,49 @@ public void testCheckSqlConvertorFeatures() throws DdlException {
         VariableMgr.setVar(var, setVar);
         Assert.assertEquals(new String[] {""}, var.getSqlConvertorFeatures());
     }
+
+    @Test
+    public void testAdaptiveBatchSizeDefaultsToThrift() {
+        SessionVariable var = new SessionVariable();
+        TQueryOptions options = var.toThrift();
+
+        Assert.assertEquals(8160, var.batchSize);
+        Assert.assertEquals(8160, options.getBatchSize());
+        Assert.assertEquals(8388608L, options.getPreferredBlockSizeBytes());
+    }
+
+    @Test
+    public void testAdaptiveBatchSizeSessionVariables() throws Exception {
+        SessionVariable var = new SessionVariable();
+
+        VariableMgr.setVar(var, new SetVar(SetType.SESSION, SessionVariable.BATCH_SIZE,
+                new StringLiteral("12345")));
+        VariableMgr.setVar(var, new SetVar(SetType.SESSION, SessionVariable.PREFERRED_BLOCK_SIZE_BYTES,
+                new StringLiteral("1048576")));
+
+        TQueryOptions options = var.toThrift();
+        Assert.assertEquals(12345, var.batchSize);
+        Assert.assertEquals(1048576L, var.preferredBlockSizeBytes);
+        Assert.assertEquals(12345, options.getBatchSize());
+        Assert.assertEquals(1048576L, options.getPreferredBlockSizeBytes());
+    }
+
+    @Test
+    public void testAdaptiveBatchSizeRejectsTinyNonZeroBytes() {
+        SessionVariable var = new SessionVariable();
+        DdlException exception = Assert.assertThrows(DdlException.class, () -> VariableMgr.setVar(var,
+                new SetVar(SetType.SESSION, SessionVariable.PREFERRED_BLOCK_SIZE_BYTES,
+                        new StringLiteral("1"))));
+        Assert.assertTrue(exception.getMessage().contains("preferred_block_size_bytes"));
+    }
+
+    @Test
+    public void testAdaptiveBatchSizeRejectsZeroByteValues() {
+        SessionVariable var = new SessionVariable();
+
+        DdlException blockSizeException = Assert.assertThrows(DdlException.class, () -> VariableMgr.setVar(var,
+                new SetVar(SetType.SESSION, SessionVariable.PREFERRED_BLOCK_SIZE_BYTES,
+                        new StringLiteral("0"))));
+        Assert.assertTrue(blockSizeException.getMessage().contains("preferred_block_size_bytes"));
+    }
 }
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index 3094be0bff3832..ff473f89e53dbe 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -472,10 +472,23 @@ struct TQueryOptions {
   184: optional i32 cte_max_recursion_depth;
 
 
+
   // Enable hybrid sorting: dynamically selects between PdqSort and TimSort based on 
   // runtime profiling to choose the most efficient algorithm for the data pattern
   210: optional bool enable_use_hybrid_sort = false;
 
+  211: optional bool enable_adaptive_scan = false;
+  212: optional bool enable_local_exchange_before_agg = true;
+  213: optional double max_scan_mem_ratio = 0.3;
+
+  // Use Rust-based Lance reader for FORMAT_LANCE scan ranges
+  216: optional bool enable_rust_lance_reader = false;
+  217: optional bool new_version_percentile = false
+
+  // Adaptive batch size: target output block size in bytes. Valid range [1MB, 512MB].
+  // Default 8MB. Sent by FE session variable preferred_block_size_bytes.
+  218: optional i64 preferred_block_size_bytes = 8388608
+
   // For cloud, to control if the content would be written into file cache
   // In write path, to control if the content would be written into file cache.
   // In read path, read from file cache or remote storage when execute query.
diff --git a/regression-test/data/query_p0/adaptive_batch_size/adaptive_batch_size.out b/regression-test/data/query_p0/adaptive_batch_size/adaptive_batch_size.out
new file mode 100644
index 00000000000000..d42f122b65ff6a
--- /dev/null
+++ b/regression-test/data/query_p0/adaptive_batch_size/adaptive_batch_size.out
@@ -0,0 +1,73 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !wide --
+1	1000	1000	1000
+10	1000	1000	1000
+11	1000	1000	1000
+12	1000	1000	1000
+13	1000	1000	1000
+14	1000	1000	1000
+15	1000	1000	1000
+16	1000	1000	1000
+17	1000	1000	1000
+18	1000	1000	1000
+19	1000	1000	1000
+2	1000	1000	1000
+20	1000	1000	1000
+21	1000	1000	1000
+22	1000	1000	1000
+23	1000	1000	1000
+24	1000	1000	1000
+25	1000	1000	1000
+26	1000	1000	1000
+27	1000	1000	1000
+28	1000	1000	1000
+29	1000	1000	1000
+3	1000	1000	1000
+30	1000	1000	1000
+31	1000	1000	1000
+32	1000	1000	1000
+33	1000	1000	1000
+34	1000	1000	1000
+35	1000	1000	1000
+36	1000	1000	1000
+37	1000	1000	1000
+38	1000	1000	1000
+39	1000	1000	1000
+4	1000	1000	1000
+40	1000	1000	1000
+41	1000	1000	1000
+42	1000	1000	1000
+43	1000	1000	1000
+44	1000	1000	1000
+45	1000	1000	1000
+46	1000	1000	1000
+47	1000	1000	1000
+48	1000	1000	1000
+49	1000	1000	1000
+5	1000	1000	1000
+50	1000	1000	1000
+6	1000	1000	1000
+7	1000	1000	1000
+8	1000	1000	1000
+9	1000	1000	1000
+
+-- !narrow --
+24995000	37492500	49990000
+
+-- !agg --
+1	3
+10	30
+2	6
+3	9
+4	12
+5	15
+6	18
+7	21
+8	24
+9	27
+
+-- !unique --
+3000	4498500
+
+-- !flag --
+4950
diff --git a/regression-test/suites/fault_injection_p0/test_skip_calc_between_segments.groovy b/regression-test/suites/fault_injection_p0/test_skip_calc_between_segments.groovy
index 5a127335d25298..cdfcf5b4df1020 100644
--- a/regression-test/suites/fault_injection_p0/test_skip_calc_between_segments.groovy
+++ b/regression-test/suites/fault_injection_p0/test_skip_calc_between_segments.groovy
@@ -95,7 +95,8 @@ suite("test_skip_calc_between_segments", "nonConcurrent") {
 
     // to cause multi segments
     def customBeConfig = [
-        doris_scanner_row_bytes : 1
+        doris_scanner_row_bytes : 1,
+        enable_adaptive_batch_size: false
     ]
 
     setBeConfigTemporary(customBeConfig) {
diff --git a/regression-test/suites/query_p0/adaptive_batch_size/adaptive_batch_size.groovy b/regression-test/suites/query_p0/adaptive_batch_size/adaptive_batch_size.groovy
new file mode 100644
index 00000000000000..644f588a0c8a0e
--- /dev/null
+++ b/regression-test/suites/query_p0/adaptive_batch_size/adaptive_batch_size.groovy
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression tests for the Adaptive Batch Size feature.
+//
+// Design notes:
+//   - Each case runs the same query with the feature enabled and disabled, and
+//     asserts that results are identical (correctness check).
+//   - We do NOT directly assert internal block byte sizes, because the storage
+//     layer does not expose them via SQL result columns.  Correctness is the
+//     primary requirement; performance / memory reduction is verified manually
+//     or via profile counters in a separate benchmark.
+
+suite("adaptive_batch_size") {
+
+    // ── helpers ────────────────────────────────────────────────────────────────
+
+    def set_adaptive = { enabled ->
+        if (enabled) {
+            set_be_param("enable_adaptive_batch_size", "true")
+            sql "set preferred_block_size_bytes = 8388608"     // 8 MB (default)
+            sql "set batch_size = 4096"
+        } else {
+            set_be_param("enable_adaptive_batch_size", "false")
+            sql "set preferred_block_size_bytes = 8388608"
+            sql "set batch_size = 4096"
+        }
+    }
+
+    try {
+        // ── Test 1: wide table (VARCHAR columns) ──────────────────────────────────
+        // Each row is ~10 KB; with 4096 rows that is ~40 MB/batch which OOM-risks.
+        // With adaptive=on the batch is trimmed to ~8 MB worth of rows.
+
+        sql "drop table if exists abs_wide_table"
+        sql """
+            create table abs_wide_table (
+                id      int         not null,
+                c1      varchar(4096),
+                c2      varchar(4096),
+                c3      varchar(4096)
+            )
+            ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+        """
+
+        // Insert 1000 rows with ~3 KB data each.
+        def wide_rows = (1..1000).collect { i ->
+            "(${i}, '${('a' * 1000)}', '${('b' * 1000)}', '${('c' * 1000)}')"
+        }
+        sql "insert into abs_wide_table values ${wide_rows.join(',')}"
+
+        // Run query with adaptive enabled and collect result.
+        set_adaptive(true)
+        def res_enabled = sql "select id, length(c1) as l1, length(c2) as l2, length(c3) as l3 from abs_wide_table order by 1, 2, 3, 4"
+
+        order_qt_wide "select id, length(c1) as l1, length(c2) as l2, length(c3) as l3 from abs_wide_table order by 1, 2, 3, 4 limit 50"
+
+        // Run query with adaptive disabled and collect result.
+        set_adaptive(false)
+        def res_disabled = sql "select id, length(c1) as l1, length(c2) as l2, length(c3) as l3 from abs_wide_table order by 1, 2, 3, 4"
+
+        // Results must be identical.
+        assertEquals(res_enabled.size(), res_disabled.size())
+        for (int i = 0; i < res_enabled.size(); i++) {
+            assertEquals(res_enabled[i].toString(), res_disabled[i].toString())
+        }
+
+
+        // ── Test 2: narrow table (INT columns) ───────────────────────────────────
+        // Rows are ~12 bytes each; with adaptive=on the predictor should converge
+        // toward returning close to batch_size (batch is still row-limited).
+
+        sql "drop table if exists abs_narrow_table"
+        sql """
+            create table abs_narrow_table (
+                id   int not null,
+                c1   int,
+                c2   int,
+                c3   int
+            )
+            ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+        """
+
+        sql "insert into abs_narrow_table select number, number*2, number*3, number*4 from numbers('number'='5000')"
+
+        set_adaptive(true)
+        def narrow_on  = sql "select sum(c1), sum(c2), sum(c3) from abs_narrow_table"
+
+        order_qt_narrow "select sum(c1), sum(c2), sum(c3) from abs_narrow_table"
+
+        set_adaptive(false)
+        def narrow_off = sql "select sum(c1), sum(c2), sum(c3) from abs_narrow_table"
+
+        assertEquals(narrow_on.toString(), narrow_off.toString())
+
+
+        // ── Test 3: AGG_KEYS table ────────────────────────────────────────────────
+        // Verifies that adaptive batch size does not break aggregation correctness
+        // (the byte check in _agg_key_next_block must only trigger at group boundaries).
+
+        sql "drop table if exists abs_agg_table"
+        sql """
+            create table abs_agg_table (
+                id    int         not null,
+                val   bigint      replace
+            )
+            ENGINE=OLAP
+            AGGREGATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+        """
+
+        // 2000 distinct keys, 3 rows per key → 6000 rows total.
+        def agg_rows = []
+        for (int k = 1; k <= 2000; k++) {
+            agg_rows << "(${k}, ${k})"
+            agg_rows << "(${k}, ${k * 2})"
+            agg_rows << "(${k}, ${k * 3})"
+        }
+        sql "insert into abs_agg_table values ${agg_rows.join(',')}"
+
+        set_adaptive(true)
+        def agg_on = sql "select id, val from abs_agg_table order by 1, 2 limit 10"
+
+        order_qt_agg "select id, val from abs_agg_table order by 1, 2 limit 10"
+
+        set_adaptive(false)
+        def agg_off = sql "select id, val from abs_agg_table order by 1, 2 limit 10"
+
+        assertEquals(agg_on.toString(), agg_off.toString())
+
+
+        // ── Test 4: UNIQUE_KEYS table ─────────────────────────────────────────────
+        // Verifies that adaptive byte-stop in _unique_key_next_block does not
+        // cause duplicate or missing rows.
+
+        sql "drop table if exists abs_unique_table"
+        sql """
+            create table abs_unique_table (
+                id   int          not null,
+                name varchar(200)
+            )
+            ENGINE=OLAP
+            UNIQUE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+        """
+
+        sql "insert into abs_unique_table select number, repeat('x', 100) from numbers('number'='3000')"
+
+        set_adaptive(true)
+        def uniq_on = sql "select count(*), sum(id) from abs_unique_table"
+
+        order_qt_unique "select count(*), sum(id) from abs_unique_table"
+
+        set_adaptive(false)
+        def uniq_off = sql "select count(*), sum(id) from abs_unique_table"
+
+        assertEquals(uniq_on.toString(), uniq_off.toString())
+
+
+        // ── Test 5: verify setting enable_adaptive_batch_size = false disables adaptive sizing ──
+
+        sql "drop table if exists abs_flag_table"
+        sql """
+            create table abs_flag_table (id int not null, v int)
+            ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+        """
+        sql "insert into abs_flag_table select number, number from numbers('number'='100')"
+
+        set_adaptive(false)
+        def flag_off = sql "select sum(v) from abs_flag_table"
+
+        order_qt_flag "select sum(v) from abs_flag_table"
+
+        set_adaptive(true)
+        def flag_on  = sql "select sum(v) from abs_flag_table"
+
+        assertEquals(flag_off.toString(), flag_on.toString())
+    } finally {
+        set_adaptive(true)
+        sql "set preferred_block_size_bytes = 8388608"
+        sql "set batch_size = 8160"
+    }
+}
diff --git a/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy b/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy
index b9d3a28bfb6352..97ab4d5e8bed6a 100644
--- a/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy
+++ b/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy
@@ -45,13 +45,17 @@ suite("test_compact_multi_segments", "nonConcurrent") {
 
     // batch_size is 4164 in csv_reader.cpp
     // _batch_size is 8192 in vtablet_writer.cpp
+
     def backendId_to_params = get_be_param("doris_scanner_row_bytes")
+    def backendId_to_adaptive_batch_size = get_be_param("enable_adaptive_batch_size")
     onFinish {
         GetDebugPoint().disableDebugPointForAllBEs("MemTable.need_flush")
         set_original_be_param("doris_scanner_row_bytes", backendId_to_params)
+        set_original_be_param("enable_adaptive_batch_size", backendId_to_adaptive_batch_size)
     }
     GetDebugPoint().enableDebugPointForAllBEs("MemTable.need_flush")
     set_be_param.call("doris_scanner_row_bytes", "1")
+    set_be_param.call("enable_adaptive_batch_size", "false")
 
     for (int j = 0; j < 2; j++) {
         tableName = "test_compact_multi_segments_" + j
diff --git a/regression-test/suites/unique_with_mow_c_p0/test_schema_change_add_key_column.groovy b/regression-test/suites/unique_with_mow_c_p0/test_schema_change_add_key_column.groovy
index f3d9429c74760f..f64bbfc5dc14a5 100644
--- a/regression-test/suites/unique_with_mow_c_p0/test_schema_change_add_key_column.groovy
+++ b/regression-test/suites/unique_with_mow_c_p0/test_schema_change_add_key_column.groovy
@@ -57,13 +57,16 @@ suite("test_schema_change_add_key_column", "nonConcurrent") {
     // batch_size is 4164 in csv_reader.cpp
     // _batch_size is 8192 in vtablet_writer.cpp
     def backendId_to_params = get_be_param("doris_scanner_row_bytes")
+    def backendId_to_adaptive_batch_size = get_be_param("enable_adaptive_batch_size")
     onFinish {
         GetDebugPoint().clearDebugPointsForAllBEs()
         set_original_be_param("doris_scanner_row_bytes", backendId_to_params)
+        set_original_be_param("enable_adaptive_batch_size", backendId_to_adaptive_batch_size)
     }
     GetDebugPoint().enableDebugPointForAllBEs("MemTable.need_flush")
     GetDebugPoint().enableDebugPointForAllBEs("VBaseSchemaChangeWithSorting._inner_process.create_rowset")
     set_be_param.call("doris_scanner_row_bytes", "1")
+    set_be_param.call("enable_adaptive_batch_size", "false")
 
     // 0: table without sequence_col; add a key col
     // 1: table without sequence_col; reorder cols