From 39e3b70c9d9aea854b5269010c4d629325d376f9 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Wed, 6 May 2026 01:28:17 +0800 Subject: [PATCH 1/4] [fix](be) Restore mutable ownership in COW paths Issue Number: N/A Related PR: N/A Problem Summary: assume_mutable now asserts exclusive ownership, so paths that moved columns out of blocks or borrowed shared subcolumns must return ownership explicitly or mutate through owning COW handles. This restores COW-safe mutation in block reuse, variant extraction, parquet/orc conversion, memtable aggregation, compaction readers, result buffering, and affected BE unit tests. None - Test: Unit Test - ./run-be-ut.sh --run -j 100 - ./run-be-ut.sh --run --filter=LocalExchangerTest.* - ./run-be-ut.sh --run --filter=ArrowResultBlockBufferTest.*:BitUtil.CountZero:Parameters/TestRowIdConversion.* - ./run-be-ut.sh --run --filter=VariantColumnWriterReaderTest.*:HierarchicalDataIteratorTest.* - ./run-be-ut.sh --run --filter=VariantColumnWriterReaderTest.test_nested_iter_nullable:VariantColumnWriterReaderTest.test_streaming_compaction_writer_streams_regular_array_paths_across_batches:TabletCooldownTest.normal - PATH=/mnt/disk6/common/ldb_toolchain_toucan/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.codex/tmp/arg0/codex-arg0w0dN9c:/mnt/disk6/common/node-v24.14.1-linux-x64/lib/node_modules/@openai/codex/node_modules/@openai/codex-linux-x64/vendor/x86_64-unknown-linux-musl/path:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/usr/share/Modules/bin:/usr/lib64/ccache:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin ./build-support/clang-format.sh - git diff --cached --check - PATH=/mnt/disk6/common/ldb_toolchain_toucan/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.codex/tmp/arg0/codex-arg0w0dN9c:/mnt/disk6/common/node-v24.14.1-linux-x64/lib/node_modules/@openai/codex/node_modules/@openai/codex-linux-x64/vendor/x86_64-unknown-linux-musl/path:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/usr/share/Modules/bin:/usr/lib64/ccache:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin ./build-support/check-format.sh (attempted; blocked by pre-existing formatting drift in be/src/exec/operator/distinct_streaming_aggregation_operator.cpp) - PATH=/mnt/disk6/common/ldb_toolchain_toucan/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.codex/tmp/arg0/codex-arg0w0dN9c:/mnt/disk6/common/node-v24.14.1-linux-x64/lib/node_modules/@openai/codex/node_modules/@openai/codex-linux-x64/vendor/x86_64-unknown-linux-musl/path:/mnt/disk3/zhaochangle/.bun/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/mnt/disk3/zhaochangle/.opencode/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/.local/bin:/mnt/disk3/zhaochangle/bin:/mnt/disk6/common/apache-maven-3.9.14/bin:/mnt/disk6/common/ldb_toolchain_028/bin:/mnt/disk6/common/jdk-17.0.16/bin:/mnt/disk6/common/node-v24.14.1-linux-x64/bin:/usr/share/Modules/bin:/usr/lib64/ccache:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin ./build-support/run-clang-tidy.sh --build-dir be/ut_build_ASAN (attempted; blocked by existing clang-tidy analyzer errors from be/src/util/jni-util.h static_assert(false) and pre-existing diagnostics) - Behavior changed: No - Does this need documentation: No --- be/src/core/block/block.cpp | 18 +- be/src/core/column/column.cpp | 5 +- be/src/core/column/column.h | 12 +- be/src/core/column/column_array.cpp | 19 +- be/src/core/column/column_array.h | 14 +- be/src/core/column/column_const.cpp | 14 +- be/src/core/column/column_const.h | 9 +- be/src/core/column/column_map.cpp | 89 ++++++---- be/src/core/column/column_map.h | 5 +- be/src/core/column/column_nullable.cpp | 13 +- be/src/core/column/column_nullable.h | 6 +- be/src/core/column/column_varbinary.h | 7 +- be/src/core/column/column_variant.cpp | 78 +++++--- be/src/core/column/column_variant.h | 6 +- be/src/core/cow.h | 17 +- be/src/core/data_type/data_type_array.cpp | 2 +- be/src/core/data_type/data_type_map.cpp | 4 +- be/src/core/data_type/data_type_struct.cpp | 2 +- .../exec/common/hash_table/hash_map_context.h | 2 +- be/src/exec/common/variant_util.cpp | 11 +- be/src/exec/exchange/local_exchanger.cpp | 7 +- .../operator/aggregation_sink_operator.cpp | 17 +- .../exec/operator/cache_source_operator.cpp | 8 +- ...istinct_streaming_aggregation_operator.cpp | 129 +++++++------- be/src/exec/operator/hashjoin_build_sink.h | 2 +- .../join/process_hash_table_probe_impl.h | 7 +- be/src/exec/operator/operator.cpp | 4 +- be/src/exec/operator/repeat_operator.cpp | 2 + .../exec/operator/table_function_operator.cpp | 1 + be/src/exec/operator/union_sink_operator.h | 3 +- .../exec/operator/union_source_operator.cpp | 3 + be/src/exec/scan/file_scanner.cpp | 1 + be/src/exec/scan/scanner.h | 5 +- be/src/exec/sort/partition_sorter.cpp | 4 + be/src/exec/sort/vsorted_run_merger.cpp | 1 + be/src/exprs/function/cast/cast_to_variant.h | 4 +- be/src/exprs/function/function.cpp | 9 +- be/src/exprs/function/function_bitmap.cpp | 10 +- .../function/function_variant_element.cpp | 21 ++- be/src/format/column_type_convert.h | 74 ++++---- be/src/format/orc/vorc_reader.cpp | 93 +++++++--- .../format/parquet/parquet_column_convert.h | 136 +++++++++----- .../format/parquet/vparquet_column_reader.cpp | 22 ++- .../format/parquet/vparquet_column_reader.h | 1 + be/src/load/memtable/memtable.cpp | 32 ++-- be/src/load/memtable/memtable.h | 6 +- be/src/runtime/query_cache/query_cache.cpp | 7 +- be/src/runtime/result_block_buffer.cpp | 6 +- .../iterator/vertical_block_reader.cpp | 18 +- .../variant/binary_column_extract_iterator.h | 4 +- .../variant/hierarchical_data_iterator.cpp | 42 +++-- .../variant/hierarchical_data_iterator.h | 1 + .../variant/variant_column_writer_impl.cpp | 18 +- .../variant_streaming_compaction_writer.cpp | 13 +- be/test/core/block/column_nullable_test.cpp | 2 +- be/test/core/column/column_array_test.cpp | 3 +- be/test/core/column/column_ip_test.cpp | 98 +++++----- be/test/core/column/column_variant_test.cpp | 11 +- be/test/core/column/common_column_test.h | 12 +- .../data_type/common_data_type_serder_test.h | 2 +- .../data_type_serde_csv_test.cpp | 13 +- .../data_type_serde_struct_test.cpp | 7 +- be/test/exec/column_type_convert_test.cpp | 126 +++++-------- .../exec/common/schema_util_rowset_test.cpp | 1 + be/test/exec/common/schema_util_test.cpp | 7 +- .../exec/connector/vjdbc_connector_test.cpp | 69 +++++-- .../function_variant_element_test.cpp | 4 +- .../native/native_reader_writer_test.cpp | 1 + .../format/parquet/parquet_thrift_test.cpp | 11 +- .../delta_writer_cluster_key_test.cpp | 3 +- .../load/delta_writer/delta_writer_test.cpp | 8 +- .../memtable/memtable_memory_limiter_test.cpp | 1 + be/test/runtime/snapshot_loader_test.cpp | 1 + be/test/runtime/stream_load_parquet_test.cpp | 32 ++-- .../adaptive_thread_pool_controller_test.cpp | 9 +- .../ordered_data_compaction_test.cpp | 2 + .../compaction/segcompaction_mow_test.cpp | 34 ++-- .../storage/compaction/segcompaction_test.cpp | 56 +++--- .../compaction/vertical_compaction_test.cpp | 16 +- .../storage/index/date_bloom_filter_test.cpp | 2 + be/test/storage/index/index_builder_test.cpp | 36 ++++ .../common/inverted_index_gc_binlogs_test.cpp | 1 + .../util/index_compaction_utils.cpp | 4 +- be/test/storage/rowid_conversion_test.cpp | 1 + .../storage/segment/segment_cache_test.cpp | 7 +- .../segments_key_bounds_truncation_test.cpp | 1 + .../variant_column_writer_reader_test.cpp | 168 +++++------------- .../storage/tablet/tablet_cooldown_test.cpp | 5 +- be/test/util/bit_util_test.cpp | 32 ++-- 89 files changed, 1081 insertions(+), 749 deletions(-) diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 2bb156325443e3..bde0f01e9d0fdf 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -581,7 +581,7 @@ MutableColumns Block::mutate_columns() { MutableColumns columns(num_columns); for (size_t i = 0; i < num_columns; ++i) { DCHECK(data[i].type); - columns[i] = data[i].column ? (*std::move(data[i].column)).mutate() + columns[i] = data[i].column ? IColumn::mutate(std::move(data[i].column)) : data[i].type->create_column(); } return columns; @@ -655,9 +655,11 @@ void Block::clear_column_data(int64_t column_size) noexcept { } for (auto& d : data) { if (d.column) { - // Temporarily disable reference count check because a column might be referenced multiple times within a block. - // Queries like this: `select c, c from t1;` - (*std::move(d.column)).assume_mutable()->clear(); + if (d.column->is_exclusive()) { + d.column->assume_mutable()->clear(); + } else { + d.column = d.column->clone_empty(); + } } } } @@ -1085,7 +1087,13 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector& char_ for (auto idx : char_type_idx) { if (idx < data.size()) { auto& col_and_name = this->get_by_position(idx); - col_and_name.column->assume_mutable()->shrink_padding_chars(); + if (col_and_name.column->is_exclusive()) { + col_and_name.column->assume_mutable()->shrink_padding_chars(); + } else { + auto mutable_col = std::move(*col_and_name.column).mutate(); + mutable_col->shrink_padding_chars(); + col_and_name.column = std::move(mutable_col); + } } } } diff --git a/be/src/core/column/column.cpp b/be/src/core/column/column.cpp index b0056e3d4377bd..3fea47f93887ec 100644 --- a/be/src/core/column/column.cpp +++ b/be/src/core/column/column.cpp @@ -232,10 +232,11 @@ bool is_column_const(const IColumn& column) { void IColumn::check_const_only_in_top_level() const { ColumnCallback throw_if_const = [&](WrappedPtr& column) { - if (is_column_const(*column)) { + const ColumnPtr& col = const_cast(column); + if (is_column_const(*col)) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "const column is not allowed to be nested, but got {}", - column->get_name()); + col->get_name()); } }; const_cast(this)->for_each_subcolumn(throw_if_const); diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h index d20ecc9d820846..c48c7a55da84cd 100644 --- a/be/src/core/column/column.h +++ b/be/src/core/column/column.h @@ -581,16 +581,20 @@ class IColumn : public COW { MutablePtr mutate() const&& { MutablePtr res = shallow_mutate(); - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } static MutablePtr mutate(Ptr ptr) { MutablePtr res = ptr->shallow_mutate(); /// Now use_count is 2. ptr.reset(); /// Reset use_count to 1. - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } diff --git a/be/src/core/column/column_array.cpp b/be/src/core/column/column_array.cpp index 6de4d96cc326f7..7022d8f5aaaf70 100644 --- a/be/src/core/column/column_array.cpp +++ b/be/src/core/column/column_array.cpp @@ -63,7 +63,9 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of // } // #endif check_const_only_in_top_level(); - const auto* offsets_concrete = typeid_cast(offsets.get()); + // Use const access to avoid triggering assume_mutable_ref() during construction. + const auto* offsets_concrete = + typeid_cast(static_cast(offsets).get()); if (!offsets_concrete) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "offsets_column must be a ColumnUInt64"); @@ -98,6 +100,21 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } +ColumnArray::ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column) { + if (is_column_const(*nested_column)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "const column is not allowed to be nested, but got {}", + nested_column->get_name()); + } + if (is_column_const(*offsets_column)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "const column is not allowed to be nested, but got {}", + offsets_column->get_name()); + } + static_cast(data) = std::move(nested_column); + static_cast(offsets) = std::move(offsets_column); +} + void ColumnArray::shrink_padding_chars() { data->shrink_padding_chars(); } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index c11547bdbf5e2d..81c75f774d243a 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -75,6 +75,10 @@ class ColumnArray final : public COWHelper { /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr&& nested_column); + /** Create an array column with shared (possibly non-exclusive) nested column and offsets. */ + struct SharedTag {}; + ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column); + ColumnArray(const ColumnArray&) = default; ColumnArray() = default; @@ -98,12 +102,16 @@ class ColumnArray final : public COWHelper { using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) { - return ColumnArray::create(nested_column->assume_mutable(), - offsets_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + return Base::create(SharedTag {}, nested_column, offsets_column); } static MutablePtr create(const ColumnPtr& nested_column) { - return ColumnArray::create(nested_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + ColumnPtr empty_offsets = ColumnOffsets::create(); + return Base::create(SharedTag {}, nested_column, std::move(empty_offsets)); } template empty() != create_with_empty) { + const IColumn& col = get_data_column(); + if (col.empty() != create_with_empty) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, " "create_with_empty: {}.", - data->size(), create_with_empty); + col.size(), create_with_empty); } - if (data->size() != 1 && !create_with_empty) { + if (col.size() != 1 && !create_with_empty) { throw doris::Exception( ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, must be 1.", - data->size()); + col.size()); } } @@ -108,7 +109,10 @@ void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_ } void ColumnConst::replace_float_special_values() { - data->replace_float_special_values(); + // COW: get exclusive ownership of data before mutating + auto mutable_data = IColumn::mutate(std::move(static_cast(data))); + mutable_data->replace_float_special_values(); + data = std::move(mutable_data); } std::pair check_column_const_set_readability(const IColumn& column, diff --git a/be/src/core/column/column_const.h b/be/src/core/column/column_const.h index 92a86628526384..7f648ece468dd1 100644 --- a/be/src/core/column/column_const.h +++ b/be/src/core/column/column_const.h @@ -240,7 +240,8 @@ class ColumnConst final : public COWHelper { bool has_enough_capacity(const IColumn& src) const override { return true; } int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override { - auto rhs_const_column = assert_cast(rhs); + const auto& rhs_const_column = + assert_cast(rhs); const auto* this_nullable = check_and_get_column(data.get()); const auto* rhs_nullable = @@ -321,7 +322,11 @@ class ColumnConst final : public COWHelper { size_t deserialize_impl(const char* pos) override { ++s; - return data->deserialize_impl(pos); + ColumnPtr owned = std::move(static_cast(data)); + auto mutable_data = IColumn::mutate(std::move(owned)); + size_t ret = mutable_data->deserialize_impl(pos); + data = std::move(mutable_data); + return ret; } void replace_float_special_values() override; diff --git a/be/src/core/column/column_map.cpp b/be/src/core/column/column_map.cpp index 48db377d888b75..30019da155f3b6 100644 --- a/be/src/core/column/column_map.cpp +++ b/be/src/core/column/column_map.cpp @@ -518,35 +518,54 @@ void ColumnMap::insert_range_from_ignore_overflow(const IColumn& src, size_t sta } ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const { - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); + // For const filter we must clone subcolumns so the original ColumnMap remains intact. + // IColumn::mutate(copy) clones if use_count>1, returns self if exclusive. + auto offsets_mut = IColumn::mutate(static_cast(offsets_column)); + MutableColumnPtr offsets_copy = offsets_mut->clone_empty(); + offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(IColumn::mutate(static_cast(keys_column)), + std::move(offsets_mut)) + ->filter(filt, result_size_hint); + auto v_arr = ColumnArray::create(IColumn::mutate(static_cast(values_column)), + std::move(offsets_copy)) + ->filter(filt, result_size_hint); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), assert_cast(*k_arr).get_offsets_ptr()); } size_t ColumnMap::filter(const Filter& filter) { - MutableColumnPtr copied_off = offsets_column->clone_empty(); - copied_off->insert_range_from(*offsets_column, 0, offsets_column->size()); - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filter); - ColumnArray::create(values_column->assume_mutable(), copied_off->assume_mutable()) - ->filter(filter); - return get_offsets().size(); + // Move subcolumns out of this ColumnMap to get exclusive ownership, then write back. + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + auto offsets_mut = IColumn::mutate(std::move(static_cast(offsets_column))); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + // Clone offsets for values (both keys and values share the same offsets structure) + MutableColumnPtr copied_off = offsets_mut->clone_empty(); + copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(std::move(keys_mut), std::move(offsets_mut)); + k_arr->filter(filter); + auto v_arr = ColumnArray::create(std::move(values_mut), std::move(copied_off)); + v_arr->filter(filter); + // Put filtered subcolumns back + static_cast(keys_column) = k_arr->get_data_ptr(); + static_cast(offsets_column) = k_arr->get_offsets_ptr(); + static_cast(values_column) = v_arr->get_data_ptr(); + // Use const access to avoid assume_mutable_ref() on the just-written-back offsets_column + // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of scope) + return static_cast(offsets_column)->size(); } MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { - // Make a temp column array - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); + // Const permute: clone subcolumns so the original ColumnMap remains intact. + auto offsets_mut = IColumn::mutate(static_cast(offsets_column)); + MutableColumnPtr offsets_copy = offsets_mut->clone_empty(); + offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(IColumn::mutate(static_cast(keys_column)), + std::move(offsets_mut)) + ->permute(perm, limit); + auto v_arr = ColumnArray::create(IColumn::mutate(static_cast(values_column)), + std::move(offsets_copy)) + ->permute(perm, limit); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), @@ -554,23 +573,27 @@ MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const } Status ColumnMap::deduplicate_keys(bool recursive) { - const auto inner_rows = keys_column->size(); - const auto rows = offsets_column->size(); + const IColumn& ck = *static_cast(keys_column); + const IColumn& co = *static_cast(offsets_column); + const auto inner_rows = ck.size(); + const auto rows = co.size(); if (recursive) { - auto values_column_ = values_column; + const IColumn::Ptr& values_ptr = static_cast(values_column); + IColumn::Ptr values_column_ = values_ptr; if (values_column_->is_nullable()) { - values_column_ = (assert_cast(*values_column)).get_nested_column_ptr(); + values_column_ = + assert_cast(*values_column_).get_nested_column_ptr(); } if (auto* values_map = check_and_get_column(values_column_.get())) { - RETURN_IF_ERROR(values_map->deduplicate_keys(recursive)); + RETURN_IF_ERROR(const_cast(values_map)->deduplicate_keys(recursive)); } } DorisVector serialized_keys(inner_rows); - const size_t max_one_row_byte_size = keys_column->get_max_row_byte_size(); + const size_t max_one_row_byte_size = ck.get_max_row_byte_size(); size_t total_bytes = max_one_row_byte_size * inner_rows; Arena pool; @@ -579,7 +602,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { // reach mem limit, don't serialize in batch const char* begin = nullptr; for (size_t i = 0; i != inner_rows; ++i) { - serialized_keys[i] = keys_column->serialize_value_into_arena(i, pool, begin); + serialized_keys[i] = ck.serialize_value_into_arena(i, pool, begin); } } else { auto* serialized_key_buffer = reinterpret_cast(pool.alloc(total_bytes)); @@ -590,7 +613,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { serialized_keys[i].size = 0; } - keys_column->serialize(serialized_keys.data(), inner_rows); + ck.serialize(serialized_keys.data(), inner_rows); } auto new_offsets = COffsets::create(); @@ -598,7 +621,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { auto& new_offsets_data = new_offsets->get_data(); IColumn::Filter filter(inner_rows, 1); - auto& offsets = get_offsets(); + const auto& offsets = static_cast(this)->get_offsets(); Offset64 offset = 0; bool has_duplicated_key = false; @@ -636,8 +659,12 @@ Status ColumnMap::deduplicate_keys(bool recursive) { if (has_duplicated_key) { offsets_column = std::move(new_offsets); - keys_column->filter(filter); - values_column->filter(filter); + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + keys_mut->filter(filter); + static_cast(keys_column) = std::move(keys_mut); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + values_mut->filter(filter); + static_cast(values_column) = std::move(values_mut); } return Status::OK(); diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index 12f8fe4f8184ab..e1fb77ca9056b6 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -61,8 +61,9 @@ class ColumnMap final : public COWHelper { static MutablePtr create(const ColumnPtr& keys, const ColumnPtr& values, const ColumnPtr& offsets) { - return ColumnMap::create(keys->assume_mutable(), values->assume_mutable(), - offsets->assume_mutable()); + // Mutate to ensure exclusive ownership required by the constructor's non-const WrappedPtr access. + return ColumnMap::create(IColumn::mutate(keys), IColumn::mutate(values), + IColumn::mutate(offsets)); } template (get_null_map_column()).get_data().data(); if (_nested_column->support_replace_column_null_data()) { // nullmap process is slow, replace null data to default value to avoid nullmap process - _nested_column->assume_mutable()->replace_column_null_data(real_null_data); + // This is an intentional in-place mutation inside a logically-const hash computation: + // null positions are overwritten with defaults so the inner hash loop needs no null checks. + auto nested_mut = std::move(*static_cast(_nested_column)).mutate(); + nested_mut->replace_column_null_data(real_null_data); + static_cast(const_cast(_nested_column)) = std::move(nested_mut); _nested_column->update_crc32c_batch(hashes, nullptr); } else { auto s = size(); @@ -380,12 +384,15 @@ size_t ColumnNullable::filter(const Filter& filter) { Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { auto* nullable_col_ptr = assert_cast(col_ptr); - WrappedPtr nest_col_ptr = nullable_col_ptr->_nested_column; + // Access the nested column via const path to avoid assume_mutable_ref (which requires + // exclusive ownership). The output col_ptr was just created, so its nested column is exclusive. + IColumn* nest_col_raw = const_cast( + static_cast(nullable_col_ptr->_nested_column).get()); /// `get_null_map_data` will set `_need_update_has_null` to true auto& res_nullmap = nullable_col_ptr->get_null_map_data(); - RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_ptr.get())); + RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_raw)); DCHECK(res_nullmap.empty()); res_nullmap.resize(sel_size); auto& cur_nullmap = get_null_map_column().get_data(); diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index 4a9d82102a4f45..9566b36cea3c32 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -64,8 +64,10 @@ class ColumnNullable final : public COWHelper { */ using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column_, const ColumnPtr& null_map_) { - return ColumnNullable::create(nested_column_->assume_mutable(), - null_map_->assume_mutable()); + // Mutate to ensure exclusive ownership: the constructor accesses subcolumns via non-const + // WrappedPtr path which requires use_count() == 1. IColumn::mutate(Ptr) safely clones if + // shared (use_count > 1) or takes ownership if already exclusive. + return ColumnNullable::create(IColumn::mutate(nested_column_), IColumn::mutate(null_map_)); } template diff --git a/be/src/core/column/column_varbinary.h b/be/src/core/column/column_varbinary.h index 673059194face5..caad77e28ad44f 100644 --- a/be/src/core/column/column_varbinary.h +++ b/be/src/core/column/column_varbinary.h @@ -44,7 +44,12 @@ class ColumnVarbinary final : public COWHelper { private: ColumnVarbinary() = default; ColumnVarbinary(const size_t n) : _data(n) {} - ColumnVarbinary(const ColumnVarbinary& src) : _data(src._data.begin(), src._data.end()) {} + ColumnVarbinary(const ColumnVarbinary& src) { + _data.reserve(src._data.size()); + for (const auto& value : src._data) { + insert_data(value.data(), value.size()); + } + } public: std::string get_name() const override { return "ColumnVarbinary"; } diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index dd9429c9ebfd5b..7e9aae62fb8469 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -484,7 +484,7 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { auto& finalized_object = assert_cast(*finalized); return finalized_object.apply_for_columns(std::forward(func)); } - auto new_root = func(get_root())->assume_mutable(); + auto new_root = std::move(*func(get_root())).mutate(); auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& subcolumn : subcolumns) { @@ -492,16 +492,16 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { continue; } auto new_subcolumn = func(subcolumn->data.get_finalized_column_ptr()); - if (!res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(), + if (!res->add_sub_column(subcolumn->path, std::move(*new_subcolumn).mutate(), subcolumn->data.get_least_common_type())) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error", subcolumn->path.get_path()); } } auto sparse_column = func(serialized_sparse_column); - res->serialized_sparse_column = sparse_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(std::move(sparse_column)); auto doc_value_column = func(serialized_doc_value_column); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_doc_value_column = IColumn::mutate(std::move(doc_value_column)); res->num_rows = res->serialized_sparse_column->size(); ENABLE_CHECK_CONSISTENCY(res.get()); return res; @@ -942,6 +942,10 @@ bool ColumnVariant::Subcolumn::is_null_at(size_t n) const { } ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + return true; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -972,6 +976,11 @@ void ColumnVariant::Subcolumn::get(size_t n, FieldWithDataType& res) const { ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + res = FieldWithDataType(Field()); + return; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -2059,14 +2068,13 @@ Status ColumnVariant::serialize_sparse_columns( /// directly as NestedGroup data by the writer (VariantColumnWriterImpl). void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumns) const { entry->data.finalize(); - auto nested_column = entry->data.get_finalized_column_ptr()->assume_mutable(); + auto nested_column = std::move(*entry->data.get_finalized_column_ptr()).mutate(); auto* nested_column_nullable = assert_cast(nested_column.get()); auto* nested_column_array = - assert_cast(nested_column_nullable->get_nested_column_ptr().get()); + assert_cast(&nested_column_nullable->get_nested_column()); auto& offset = nested_column_array->get_offsets_ptr(); - auto* nested_object_nullable = assert_cast( - nested_column_array->get_data_ptr()->assume_mutable().get()); + auto* nested_object_nullable = assert_cast(&nested_column_array->get_data()); auto& nested_object_column = assert_cast(nested_object_nullable->get_nested_column()); PathInData nested_path = entry->path; @@ -2082,13 +2090,18 @@ void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumn path_builder.append(nested_entry->path.get_parts(), true); auto subnested_column = ColumnArray::create( ColumnNullable::create(nested_entry->data.get_finalized_column_ptr(), - nested_object_nullable->get_null_map_column_ptr()), + static_cast(nested_object_nullable) + ->get_null_map_column() + .get_ptr()), offset); - auto nullable_subnested_column = ColumnNullable::create( - std::move(subnested_column), nested_column_nullable->get_null_map_column_ptr()); + auto nullable_subnested_column = + ColumnNullable::create(std::move(subnested_column), + static_cast(nested_column_nullable) + ->get_null_map_column() + .get_ptr()); auto type = make_nullable( std::make_shared(nested_entry->data.least_common_type.get())); - Subcolumn subcolumn(nullable_subnested_column->assume_mutable(), type, is_nullable); + Subcolumn subcolumn(std::move(nullable_subnested_column), type, is_nullable); res_subcolumns.add(path_builder.build(), subcolumn); } } @@ -2101,7 +2114,7 @@ void ColumnVariant::clear_sparse_column() { } #endif - serialized_sparse_column->clear(); + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); } Status ColumnVariant::convert_typed_path_to_storage_type( @@ -2313,7 +2326,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { ENABLE_CHECK_CONSISTENCY(res.get()); return res; } - auto new_root = get_root()->filter(filter, count)->assume_mutable(); + auto new_root = std::move(*get_root()->filter(filter, count)).mutate(); auto new_column = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& entry : subcolumns) { @@ -2321,7 +2334,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { continue; } auto subcolumn = entry->data.get_finalized_column().filter(filter, -1); - new_column->add_sub_column(entry->path, subcolumn->assume_mutable(), + new_column->add_sub_column(entry->path, std::move(*subcolumn).mutate(), entry->data.get_least_common_type()); } new_column->serialized_sparse_column = serialized_sparse_column->filter(filter, count); @@ -2368,8 +2381,10 @@ void ColumnVariant::clear() { // we must keep root column exist empty.create_root(Subcolumn(0, is_nullable, true)); std::swap(empty, subcolumns); - serialized_sparse_column->clear(); - serialized_doc_value_column->clear(); + // Reassign to fresh empty columns to avoid requiring exclusive ownership. + // The existing columns may be shared (use_count > 1) so we cannot clear them in-place. + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); + serialized_doc_value_column = ColumnPtr(create_binary_column_fn()); num_rows = 0; _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2769,10 +2784,26 @@ void ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null MutableColumnPtr ColumnVariant::clone() const { auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode); + // Copy typed_path_count and nested_path_count so the subcolumn limit logic is consistent. + res->typed_path_count = typed_path_count; + res->nested_path_count = nested_path_count; Subcolumns new_subcolumns; for (const auto& subcolumn : subcolumns) { - auto new_subcolumn = subcolumn->data; - if (subcolumn->data.is_root) { + // Struct-copy all metadata (num_rows, num_of_defaults_in_prefix, + // current_num_of_defaults, data_types, etc.), then deep-clone data WrappedPtrs. + Subcolumn new_subcolumn = subcolumn->data; + for (auto& wp : new_subcolumn.data) { + static_cast(wp) = + std::move(*static_cast(wp)).mutate(); + } + // Flush pending lazy defaults into actual data so that the cloned subcolumn + // is self-consistent (current_num_of_defaults == 0 after clone). + if (new_subcolumn.current_num_of_defaults > 0) { + size_t pending = new_subcolumn.current_num_of_defaults; + new_subcolumn.current_num_of_defaults = 0; + new_subcolumn.insert_many_defaults(pending); + } + if (subcolumn->data.is_root || subcolumn->path.empty()) { new_subcolumns.create_root(std::move(new_subcolumn)); } else if (!new_subcolumns.add(subcolumn->path, std::move(new_subcolumn))) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error in clone()", @@ -2783,13 +2814,8 @@ MutableColumnPtr ColumnVariant::clone() const { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "root is nullptr in clone()"); } res->subcolumns = std::move(new_subcolumns); - auto&& column = serialized_sparse_column->get_ptr(); - auto sparse_column = std::move(*column).mutate(); - res->serialized_sparse_column = sparse_column->assume_mutable(); - - auto&& new_doc_value_column = serialized_doc_value_column->get_ptr(); - auto doc_value_column = std::move(*new_doc_value_column).mutate(); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(serialized_sparse_column->get_ptr()); + res->serialized_doc_value_column = IColumn::mutate(serialized_doc_value_column->get_ptr()); res->set_num_rows(num_rows); ENABLE_CHECK_CONSISTENCY(res.get()); diff --git a/be/src/core/column/column_variant.h b/be/src/core/column/column_variant.h index 16ced2f529118f..428cf112eb9ef0 100644 --- a/be/src/core/column/column_variant.h +++ b/be/src/core/column/column_variant.h @@ -325,7 +325,7 @@ class ColumnVariant final : public COWHelper { if (subcolumns.empty()) { return nullptr; } - return subcolumns.get_mutable_root()->data.get_finalized_column_ptr()->assume_mutable(); + return std::move(*subcolumns.get_mutable_root()->data.get_finalized_column_ptr()).mutate(); } void serialize_one_row_to_string(int64_t row, std::string* output, @@ -409,8 +409,12 @@ class ColumnVariant final : public COWHelper { ColumnPtr get_sparse_column() const { return serialized_sparse_column; } + IColumn& get_sparse_column_mutable() { return *serialized_sparse_column; } + ColumnPtr get_doc_value_column() const { return serialized_doc_value_column; } + IColumn& get_doc_value_column_mutable() { return *serialized_doc_value_column; } + // use sparse_subcolumns_schema to record sparse column's path info and type static MutableColumnPtr create_binary_column_fn() { return ColumnMap::create(ColumnString::create(), ColumnString::create(), diff --git a/be/src/core/cow.h b/be/src/core/cow.h index fcac631aa83ce1..a0dd93bf545d20 100644 --- a/be/src/core/cow.h +++ b/be/src/core/cow.h @@ -25,6 +25,9 @@ #include #include +#include "common/exception.h" +#include "common/status.h" + namespace doris { /** Copy-on-write shared ptr. @@ -313,9 +316,19 @@ class COW { public: MutablePtr mutate() const&& { return shallow_mutate(); } - MutablePtr assume_mutable() const { return const_cast(this)->get_ptr(); } + MutablePtr assume_mutable() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(this)->get_ptr(); + } - Derived& assume_mutable_ref() const { return const_cast(*derived()); } + Derived& assume_mutable_ref() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(*derived()); + } protected: /// It works as immutable_ptr if it is const and as mutable_ptr if it is non const. diff --git a/be/src/core/data_type/data_type_array.cpp b/be/src/core/data_type/data_type_array.cpp index c30cabe26c745b..be97fc0c460ff4 100644 --- a/be/src/core/data_type/data_type_array.cpp +++ b/be/src/core/data_type/data_type_array.cpp @@ -120,7 +120,7 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column memcpy(offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // children - auto nested_column = data_column->get_data_ptr()->assume_mutable(); + auto nested_column = std::move(*data_column->get_data_ptr()).mutate(); buf = get_nested_type()->deserialize(buf, &nested_column, be_exec_version); return buf; } diff --git a/be/src/core/data_type/data_type_map.cpp b/be/src/core/data_type/data_type_map.cpp index 0932bf47c218bd..7e633c21490bce 100644 --- a/be/src/core/data_type/data_type_map.cpp +++ b/be/src/core/data_type/data_type_map.cpp @@ -135,8 +135,8 @@ const char* DataTypeMap::deserialize(const char* buf, MutableColumnPtr* column, memcpy(map_offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // key value - auto nested_keys_column = map_column->get_keys_ptr()->assume_mutable(); - auto nested_values_column = map_column->get_values_ptr()->assume_mutable(); + auto nested_keys_column = std::move(*map_column->get_keys_ptr()).mutate(); + auto nested_values_column = std::move(*map_column->get_values_ptr()).mutate(); buf = get_key_type()->deserialize(buf, &nested_keys_column, be_exec_version); buf = get_value_type()->deserialize(buf, &nested_values_column, be_exec_version); return buf; diff --git a/be/src/core/data_type/data_type_struct.cpp b/be/src/core/data_type/data_type_struct.cpp index 0770899d661129..6d6ceb856ee1fb 100644 --- a/be/src/core/data_type/data_type_struct.cpp +++ b/be/src/core/data_type/data_type_struct.cpp @@ -214,7 +214,7 @@ const char* DataTypeStruct::deserialize(const char* buf, MutableColumnPtr* colum auto* struct_column = assert_cast(origin_column); DCHECK(elems.size() == struct_column->tuple_size()); for (size_t i = 0; i < elems.size(); ++i) { - auto child_column = struct_column->get_column_ptr(i)->assume_mutable(); + auto child_column = std::move(*struct_column->get_column_ptr(i)).mutate(); buf = elems[i]->deserialize(buf, &child_column, be_exec_version); } return buf; diff --git a/be/src/exec/common/hash_table/hash_map_context.h b/be/src/exec/common/hash_table/hash_map_context.h index 479256a475ce3c..6d355e7d9561fa 100644 --- a/be/src/exec/common/hash_table/hash_map_context.h +++ b/be/src/exec/common/hash_table/hash_map_context.h @@ -955,7 +955,7 @@ struct MethodKeysFixed : public MethodBase { const auto* nullmap = assert_cast(*nullmap_columns[j]).get_data().data(); // make sure null cell is filled by 0x0 - key_columns[j]->assume_mutable()->replace_column_null_data(nullmap); + const_cast(key_columns[j])->replace_column_null_data(nullmap); } auto* __restrict current = result_data + offset; for (size_t i = 0; i < row_numbers; ++i) { diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 767f107649126c..1fd262e5604bf2 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -435,7 +435,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(), data_type_object.enable_doc_mode()); - variant->create_root(arg.type, arg.column->assume_mutable()); + variant->create_root(arg.type, std::move(*arg.column).mutate()); ColumnPtr nullable = ColumnNullable::create( variant->get_ptr(), check_and_get_column(arg.column.get())->get_null_map_column_ptr()); @@ -2119,9 +2119,8 @@ void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length, } } column_variant.incr_num_rows(); - auto sparse_column = column_variant.get_sparse_column(); - if (sparse_column->size() == old_num_rows) { - sparse_column->assume_mutable()->insert_default(); + if (column_variant.get_sparse_column()->size() == old_num_rows) { + column_variant.get_sparse_column_mutable().insert_default(); } #ifndef NDEBUG column_variant.check_consistency(); @@ -2218,10 +2217,10 @@ Status _parse_and_materialize_variant_columns(Block& block, for (size_t i = 0; i < variant_pos.size(); ++i) { auto column_ref = block.get_by_position(variant_pos[i]).column; bool is_nullable = column_ref->is_nullable(); - MutableColumnPtr var_column = column_ref->assume_mutable(); + MutableColumnPtr var_column = std::move(*column_ref).mutate(); if (is_nullable) { const auto& nullable = assert_cast(*column_ref); - var_column = nullable.get_nested_column_ptr()->assume_mutable(); + var_column = std::move(*nullable.get_nested_column_ptr()).mutate(); } auto& var = assert_cast(*var_column); var_column->finalize(); diff --git a/be/src/exec/exchange/local_exchanger.cpp b/be/src/exec/exchange/local_exchanger.cpp index 620aae737050d6..a248940dc63c81 100644 --- a/be/src/exec/exchange/local_exchanger.cpp +++ b/be/src/exec/exchange/local_exchanger.cpp @@ -167,6 +167,7 @@ Status ShuffleExchanger::get_block(RuntimeState* state, Block* block, bool* eos, mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( block, partitioned_block.first->_data_block); RETURN_IF_ERROR(get_data()); + block->set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); } @@ -212,7 +213,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const std::vectorsize() > 0); + DCHECK(shuffle_idx_to_instance_idx && !shuffle_idx_to_instance_idx->empty()); const auto& map = *shuffle_idx_to_instance_idx; int32_t enqueue_rows = 0; for (const auto& it : map) { @@ -425,6 +426,7 @@ Status BroadcastExchanger::get_block(RuntimeState* state, Block* block, bool* eo RETURN_IF_ERROR(mutable_block.add_rows(&block_wrapper->_data_block, partitioned_block.second.offset_start, partitioned_block.second.length)); + block->set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); @@ -573,6 +575,9 @@ Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, Block* block mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( block, partitioned_block.first->_data_block); RETURN_IF_ERROR(get_data()); + if (mutable_block.rows() > 0) { + block->set_columns(std::move(mutable_block.mutable_columns())); + } } return Status::OK(); } diff --git a/be/src/exec/operator/aggregation_sink_operator.cpp b/be/src/exec/operator/aggregation_sink_operator.cpp index f6a9c2cdc4211d..0808361ad74f86 100644 --- a/be/src/exec/operator/aggregation_sink_operator.cpp +++ b/be/src/exec/operator/aggregation_sink_operator.cpp @@ -299,16 +299,20 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(Block* block) { for (int i = 0; i < key_size; ++i) { if constexpr (for_spill) { - key_columns[i] = block->get_by_position(i).column.get(); key_locs[i] = i; } else { int& result_column_id = key_locs[i]; RETURN_IF_ERROR( Base::_shared_state->probe_expr_ctxs[i]->execute(block, &result_column_id)); block->replace_by_position_if_const(result_column_id); - key_columns[i] = block->get_by_position(result_column_id).column.get(); } - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(key_locs[i]).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(key_locs[i]).column = std::move(mutable_col); + key_columns[i] = block->get_by_position(key_locs[i]).column.get(); + } } size_t rows = block->rows(); @@ -491,8 +495,13 @@ Status AggSinkLocalState::_execute_with_serialized_key_helper(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_col); + } key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/cache_source_operator.cpp b/be/src/exec/operator/cache_source_operator.cpp index aec8206f54b682..06731ff8ed54c0 100644 --- a/be/src/exec/operator/cache_source_operator.cpp +++ b/be/src/exec/operator/cache_source_operator.cpp @@ -156,7 +156,9 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = output_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*output_block)); + auto mutable_block = MutableBlock::build_mutable_block(block); + RETURN_IF_ERROR(mutable_block.merge(*output_block)); + block->set_columns(std::move(mutable_block.mutable_columns())); local_state._current_query_cache_rows += output_block->rows(); auto mem_consume = output_block->allocated_bytes(); local_state._current_query_cache_bytes += mem_consume; @@ -179,7 +181,9 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = hit_cache_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*hit_cache_block)); + auto mutable_block = MutableBlock::build_mutable_block(block); + RETURN_IF_ERROR(mutable_block.merge(*hit_cache_block)); + block->set_columns(std::move(mutable_block.mutable_columns())); if (!local_state._hit_cache_column_orders.empty()) { auto datas = block->get_columns_with_type_and_name(); block->clear(); diff --git a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp index 298896401d6f3e..63b184889276bf 100644 --- a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp @@ -76,65 +76,64 @@ bool DistinctStreamingAggLocalState::_should_expand_preagg_hash_tables() { } return std::visit( - Overload { - [&](std::monostate& arg) -> bool { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); - return false; - }, - [&](auto& agg_method) -> bool { - auto& hash_tbl = *agg_method.hash_table; - auto [ht_mem, ht_rows] = - std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()}; - - // Need some rows in tables to have valid statistics. - if (ht_rows == 0) { - return true; - } - - const auto* reduction = _is_single_backend - ? SINGLE_BE_STREAMING_HT_MIN_REDUCTION - : STREAMING_HT_MIN_REDUCTION; - - // Find the appropriate reduction factor in our table for the current hash table sizes. - int cache_level = 0; - while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && - ht_mem >= reduction[cache_level + 1].min_ht_mem) { - ++cache_level; - } - - // Compare the number of rows in the hash table with the number of input rows that - // were aggregated into it. Exclude passed through rows from this calculation since - // they were not in hash tables. - const int64_t input_rows = _input_num_rows; - const int64_t aggregated_input_rows = input_rows - _num_rows_returned; - // TODO chenhao - // const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; - double current_reduction = static_cast(aggregated_input_rows) / - static_cast(ht_rows); - - // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be - // inaccurate, which could lead to a divide by zero below. - if (aggregated_input_rows <= 0) { - return true; - } - - // Extrapolate the current reduction factor (r) using the formula - // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data - // set, N is the number of input rows, excluding passed-through rows, and n is the - // number of rows inserted or merged into the hash tables. This is a very rough - // approximation but is good enough to be useful. - // TODO: consider collecting more statistics to better estimate reduction. - // double estimated_reduction = aggregated_input_rows >= expected_input_rows - // ? current_reduction - // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); - double min_reduction = reduction[cache_level].streaming_ht_min_reduction; - - // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); - // COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); - // return estimated_reduction > min_reduction; - _should_expand_hash_table = current_reduction > min_reduction; - return _should_expand_hash_table; - }}, + Overload {[&](std::monostate& arg) -> bool { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); + return false; + }, + [&](auto& agg_method) -> bool { + auto& hash_tbl = *agg_method.hash_table; + auto [ht_mem, ht_rows] = + std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()}; + + // Need some rows in tables to have valid statistics. + if (ht_rows == 0) { + return true; + } + + const auto* reduction = _is_single_backend + ? SINGLE_BE_STREAMING_HT_MIN_REDUCTION + : STREAMING_HT_MIN_REDUCTION; + + // Find the appropriate reduction factor in our table for the current hash table sizes. + int cache_level = 0; + while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && + ht_mem >= reduction[cache_level + 1].min_ht_mem) { + ++cache_level; + } + + // Compare the number of rows in the hash table with the number of input rows that + // were aggregated into it. Exclude passed through rows from this calculation since + // they were not in hash tables. + const int64_t input_rows = _input_num_rows; + const int64_t aggregated_input_rows = input_rows - _num_rows_returned; + // TODO chenhao + // const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; + double current_reduction = static_cast(aggregated_input_rows) / + static_cast(ht_rows); + + // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be + // inaccurate, which could lead to a divide by zero below. + if (aggregated_input_rows <= 0) { + return true; + } + + // Extrapolate the current reduction factor (r) using the formula + // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data + // set, N is the number of input rows, excluding passed-through rows, and n is the + // number of rows inserted or merged into the hash tables. This is a very rough + // approximation but is good enough to be useful. + // TODO: consider collecting more statistics to better estimate reduction. + // double estimated_reduction = aggregated_input_rows >= expected_input_rows + // ? current_reduction + // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); + double min_reduction = reduction[cache_level].streaming_ht_min_reduction; + + // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); + // COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); + // return estimated_reduction > min_reduction; + _should_expand_hash_table = current_reduction > min_reduction; + return _should_expand_hash_table; + }}, _agg_data->method_variant); } @@ -162,7 +161,13 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = IColumn::mutate( + std::move(in_block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_col); + key_columns[i] = in_block->get_by_position(result_column_id).column.get(); + } result_idxs[i] = result_column_id; } } @@ -219,9 +224,9 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( } } else { for (int i = 0; i < key_size; ++i) { - auto output_column = out_block->get_by_position(i).column; - auto dst = output_column->assume_mutable(); + auto dst = IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(dst, _distinct_row); + out_block->get_by_position(i).column = std::move(dst); } } } diff --git a/be/src/exec/operator/hashjoin_build_sink.h b/be/src/exec/operator/hashjoin_build_sink.h index dcc76031c2c6df..0da7dddc1106e4 100644 --- a/be/src/exec/operator/hashjoin_build_sink.h +++ b/be/src/exec/operator/hashjoin_build_sink.h @@ -230,7 +230,7 @@ struct ProcessHashTableBuild { // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_build_raw_ptrs.size() == 1 && null_map && *has_null_key) { - _build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data()); + const_cast(_build_raw_ptrs[0])->replace_column_null_data(null_map->data()); } hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, diff --git a/be/src/exec/operator/join/process_hash_table_probe_impl.h b/be/src/exec/operator/join/process_hash_table_probe_impl.h index 5bfd2ff4e0cbfc..3fc4285517d4eb 100644 --- a/be/src/exec/operator/join/process_hash_table_probe_impl.h +++ b/be/src/exec/operator/join/process_hash_table_probe_impl.h @@ -200,7 +200,8 @@ typename HashTableType::State ProcessHashTableProbe::_init_probe_sid // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_parent->_probe_columns.size() == 1 && null_map) { if (simd::contain_one(null_map, probe_rows)) { - _parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map); + const_cast(_parent->_probe_columns[0]) + ->replace_column_null_data(null_map); } } @@ -650,9 +651,11 @@ Status ProcessHashTableProbe::finalize_block_with_filter(Block* outp ->get_data_column_ptr(); auto& src = source_block->get_by_position(column_id).column; - auto dst = output_block->get_by_position(output_column_id).column->assume_mutable(); + auto dst = IColumn::mutate( + std::move(output_block->get_by_position(output_column_id).column)); dst->clear(); insert_with_indexs(dst, src, container, all_match_one); + output_block->get_by_position(output_column_id).column = std::move(dst); } }; do_lazy_materialize(_right_output_slot_flags, _build_indexs, (int)_right_col_idx, diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp index 2168c24a262fd7..d03b3898360a7a 100644 --- a/be/src/exec/operator/operator.cpp +++ b/be/src/exec/operator/operator.cpp @@ -362,8 +362,8 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, MutableBlock mutable_block = VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_output_row_descriptor); + auto& mutable_columns = mutable_block.mutable_columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); DCHECK_EQ(mutable_columns.size(), local_state->_projections.size()) << debug_string(); for (int i = 0; i < mutable_columns.size(); ++i) { ColumnPtr column_ptr; @@ -379,8 +379,8 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, insert_column_datas(mutable_columns[i], column_ptr, rows); } DCHECK(mutable_block.rows() == rows); - output_block->set_columns(std::move(mutable_columns)); } + output_block->set_columns(std::move(mutable_columns)); local_state->_estimate_memory_usage += bytes_usage; diff --git a/be/src/exec/operator/repeat_operator.cpp b/be/src/exec/operator/repeat_operator.cpp index 82ffa633056a41..b0aa6989a35f34 100644 --- a/be/src/exec/operator/repeat_operator.cpp +++ b/be/src/exec/operator/repeat_operator.cpp @@ -154,6 +154,7 @@ Status RepeatLocalState::get_repeated_block(Block* input_block, int repeat_id_id RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, output_columns, repeat_id_idx)); DCHECK_EQ(cur_col, output_column_size); + output_block->set_columns(std::move(m_block.mutable_columns())); return Status::OK(); } @@ -237,6 +238,7 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, Block* output_block, bo std::size_t cur_col = 0; RETURN_IF_ERROR( local_state.add_grouping_id_column(rows, cur_col, columns, _repeat_id_idx)); + output_block->set_columns(std::move(m_block.mutable_columns())); _repeat_id_idx++; if (_repeat_id_idx >= _repeat_id_list_size) { diff --git a/be/src/exec/operator/table_function_operator.cpp b/be/src/exec/operator/table_function_operator.cpp index 09e74f580dd1c2..fd97e8d69c68a8 100644 --- a/be/src/exec/operator/table_function_operator.cpp +++ b/be/src/exec/operator/table_function_operator.cpp @@ -560,6 +560,7 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state, Block* o for (auto index : p._useless_slot_indexs) { columns[index]->insert_many_defaults(row_size - columns[index]->size()); } + output_block->set_columns(std::move(columns)); { SCOPED_TIMER(_filter_timer); // 3. eval conjuncts diff --git a/be/src/exec/operator/union_sink_operator.h b/be/src/exec/operator/union_sink_operator.h index 4842ab6b243903..bdfb4a7303126e 100644 --- a/be/src/exec/operator/union_sink_operator.h +++ b/be/src/exec/operator/union_sink_operator.h @@ -168,9 +168,10 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXset_columns(std::move(mblock.mutable_columns())); } return Status::OK(); } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exec/operator/union_source_operator.cpp b/be/src/exec/operator/union_source_operator.cpp index a484f1e4a324ba..9547c9a8184bcf 100644 --- a/be/src/exec/operator/union_source_operator.cpp +++ b/be/src/exec/operator/union_source_operator.cpp @@ -177,6 +177,9 @@ Status UnionSourceOperatorX::get_next_const(RuntimeState* state, Block* block) { tmp_block.clear(); } } + if (mblock.rows() > 0) { + block->set_columns(std::move(mblock.mutable_columns())); + } // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);" // the const expr will be in output expr cause the union node return a empty block. so here we diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index 5f1d248c1e1f4d..ad694a412b88c7 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -865,6 +865,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { mutable_output_columns[j]->insert_range_from(*column_ptr, 0, rows); ctx_idx++; } + block->set_columns(std::move(mutable_output_columns)); // after do the dest block insert operation, clear _src_block to remove the reference of origin column _src_block_ptr->clear(); diff --git a/be/src/exec/scan/scanner.h b/be/src/exec/scan/scanner.h index bd56dc0f08ffd2..ca64b98d24a2e4 100644 --- a/be/src/exec/scan/scanner.h +++ b/be/src/exec/scan/scanner.h @@ -115,8 +115,9 @@ class Scanner { if (_padding_block.empty()) { _padding_block.swap(_origin_block); } else if (_origin_block.rows()) { - RETURN_IF_ERROR( - MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block)); + auto mutable_block = MutableBlock::build_mutable_block(&_padding_block); + RETURN_IF_ERROR(mutable_block.merge(_origin_block)); + _padding_block.set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); } diff --git a/be/src/exec/sort/partition_sorter.cpp b/be/src/exec/sort/partition_sorter.cpp index 64422a202c236f..89be3b90dc6fb1 100644 --- a/be/src/exec/sort/partition_sorter.cpp +++ b/be/src/exec/sort/partition_sorter.cpp @@ -121,6 +121,7 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ if (current->impl->is_last(step) && current->impl->pos == 0) { if (merged_rows != 0) { // return directly for next time's read swap whole block + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } // swap and return block directly when we should get all data from cursor @@ -147,6 +148,7 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ } } + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } @@ -178,6 +180,7 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch // rank() maybe need check when have get a distinct row // so when the cmp_res is get a distinct row, need check have output all rows num if (_get_enough_data()) { + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } *_previous_row = *current; @@ -196,6 +199,7 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch } } + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } diff --git a/be/src/exec/sort/vsorted_run_merger.cpp b/be/src/exec/sort/vsorted_run_merger.cpp index ce4440c3178343..b4c142cd4f1287 100644 --- a/be/src/exec/sort/vsorted_run_merger.cpp +++ b/be/src/exec/sort/vsorted_run_merger.cpp @@ -194,6 +194,7 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { current->next(); if (_need_more_data(current)) { do_insert(); + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } } diff --git a/be/src/exprs/function/cast/cast_to_variant.h b/be/src/exprs/function/cast/cast_to_variant.h index acc8ed9e7f6492..b679c15b870271 100644 --- a/be/src/exprs/function/cast/cast_to_variant.h +++ b/be/src/exprs/function/cast/cast_to_variant.h @@ -152,7 +152,7 @@ struct CastToVariant { auto variant = ColumnVariant::create( variant_type ? variant_type->variant_max_subcolumns_count() : 0, variant_type ? variant_type->enable_doc_mode() : false); - variant->create_root(from_type, col_from->assume_mutable()); + variant->create_root(from_type, IColumn::mutate(col_from)); block.replace_by_position(result, std::move(variant)); return Status::OK(); } @@ -186,4 +186,4 @@ WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, }; } -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/be/src/exprs/function/function.cpp b/be/src/exprs/function/function.cpp index f1e44bb4c2ac40..590eb63829200d 100644 --- a/be/src/exprs/function/function.cpp +++ b/be/src/exprs/function/function.cpp @@ -67,8 +67,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } if (!mutable_result_null_map_column) { - mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); + mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); } NullMap& result_null_map = @@ -80,6 +79,12 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } } + // Commit merged null map back: result_null_map_column was moved into + // mutable_result_null_map_column when merging 2+ nullable args with nulls. + if (mutable_result_null_map_column) { + result_null_map_column = std::move(mutable_result_null_map_column); + } + if (!result_null_map_column) { if (is_column_const(*src)) { return ColumnConst::create( diff --git a/be/src/exprs/function/function_bitmap.cpp b/be/src/exprs/function/function_bitmap.cpp index 3f2c388efb85bf..35341f297640b0 100644 --- a/be/src/exprs/function/function_bitmap.cpp +++ b/be/src/exprs/function/function_bitmap.cpp @@ -681,11 +681,11 @@ void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count) { - auto* nullable = assert_cast(src.get()); - ColumnPtr src_not_nullable = nullable->get_nested_column_ptr(); - MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable(); + MutableColumnPtr mutable_src = IColumn::mutate(std::move(src)); + auto* nullable = assert_cast(mutable_src.get()); + auto* src_not_nullable_mutable = &nullable->get_nested_column(); auto* __restrict count_data = - assert_cast(src_not_nullable_mutable.get())->get_data().data(); + assert_cast(src_not_nullable_mutable)->get_data().data(); for (const auto& arg : args) { const ColumnWithTypeAndName& elem = block.get_by_position(arg); @@ -712,7 +712,7 @@ ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, } } - return src; + return mutable_src; } Status execute_bitmap_op_count_null_to_zero( diff --git a/be/src/exprs/function/function_variant_element.cpp b/be/src/exprs/function/function_variant_element.cpp index 344f05555c6035..4d56ffc4ea5909 100644 --- a/be/src/exprs/function/function_variant_element.cpp +++ b/be/src/exprs/function/function_variant_element.cpp @@ -148,8 +148,7 @@ class FunctionVariantElement : public IFunction { const auto& src_sparse_data_values = assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast(*target_ptr->get_sparse_column()->assume_mutable()) - .get_offsets(); + assert_cast(target_ptr->get_sparse_column_mutable()).get_offsets(); auto [sparse_data_paths, sparse_data_values] = target_ptr->get_sparse_data_paths_and_values(); StringRef prefix_ref(path.get_path()); @@ -190,7 +189,7 @@ class FunctionVariantElement : public IFunction { sparse_data_offsets.push_back(sparse_data_paths->size()); } target_ptr->get_subcolumns().create_root(root); - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); target_ptr->set_num_rows(src_ptr->size()); } @@ -211,9 +210,9 @@ class FunctionVariantElement : public IFunction { // Ordinary Variant extraction keeps the selected prefix in sparse data, matching the // source branch behavior. Only doc-mode columns keep extracted data in doc_value. auto& extracted_offsets = - assert_cast(*(write_to_doc_value ? target_ptr->get_doc_value_column() - : target_ptr->get_sparse_column()) - ->assume_mutable()) + assert_cast(write_to_doc_value + ? target_ptr->get_doc_value_column_mutable() + : target_ptr->get_sparse_column_mutable()) .get_offsets(); auto [extracted_paths, extracted_values] = write_to_doc_value ? target_ptr->get_doc_value_data_paths_and_values() @@ -251,9 +250,9 @@ class FunctionVariantElement : public IFunction { } target_ptr->get_subcolumns().create_root(root); if (write_to_doc_value) { - target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_sparse_column_mutable().resize(src_ptr->size()); } else { - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); } target_ptr->set_num_rows(src_ptr->size()); } @@ -323,7 +322,7 @@ class FunctionVariantElement : public IFunction { if (new_subcolumns.empty() && !nodes.empty()) { CHECK_EQ(nodes.size(), 1); new_subcolumns.create_root(ColumnVariant::Subcolumn { - nodes[0]->data.get_finalized_column_ptr()->assume_mutable(), + IColumn::mutate(nodes[0]->data.get_finalized_column_ptr()), nodes[0]->data.get_least_common_type(), true, true}); auto container = ColumnVariant::create(src.max_subcolumns_count(), src.enable_doc_mode(), @@ -349,12 +348,12 @@ class FunctionVariantElement : public IFunction { } result_col->insert_range_from(*container, 0, container->size()); } - *result = result_col->get_ptr(); // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - (*result)->assume_mutable()->finalize(); + result_col->finalize(); VLOG_DEBUG << "dump new object " << static_cast(result_col.get())->debug_string() << ", path " << path.get_path(); + *result = std::move(result_col); return Status::OK(); } } diff --git a/be/src/format/column_type_convert.h b/be/src/format/column_type_convert.h index 04003c098f0d30..554e5a0c3662a2 100644 --- a/be/src/format/column_type_convert.h +++ b/be/src/format/column_type_convert.h @@ -44,6 +44,20 @@ namespace doris::converter { enum FileFormat { COMMON, ORC, PARQUET }; +// Helper: get the inner (non-nullable) mutable column from an exclusively-owned dst_col. +// - For non-nullable dst_col: returns a raw pointer to the column itself. +// - For nullable dst_col: returns a raw pointer to the nested (non-null) column. +// Must only be called when dst_col has exclusive ownership (use_count == 1). +// Returns IColumn* (raw pointer) to avoid creating a second owning MutableColumnPtr, +// which would violate COW invariant (use_count > 1). +inline IColumn* get_mutable_inner_col(MutableColumnPtr& dst_col) { + if (dst_col->is_nullable()) { + return static_cast(dst_col.get())->get_nested_column_ptr().get(); + } else { + return dst_col.get(); + } +} + template constexpr bool is_decimal_type() { return type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || @@ -165,13 +179,13 @@ class IntegerToIntegerConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { if constexpr (sizeof(DstCppType) < sizeof(SrcCppType)) { SrcCppType src_value = src_data[i]; @@ -212,7 +226,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -223,7 +237,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcCppType src_value = src_data[i]; if constexpr (is_integer_type()) { @@ -248,11 +262,11 @@ class BooleanToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i] != 0 ? "TRUE" : "FALSE"; string_col.insert_data(value.data(), value.size()); @@ -269,7 +283,7 @@ class NumericToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -279,7 +293,7 @@ class NumericToStringConverter : public ColumnTypeConverter { size_t rows = from_col->size(); size_t start_idx = to_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { if constexpr (SrcPrimitiveType == TYPE_FLOAT || SrcPrimitiveType == TYPE_DOUBLE) { if (fileFormat == FileFormat::ORC && std::isnan(src_data[i])) { @@ -318,11 +332,11 @@ class DecimalToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i].to_string(_scale); string_col.insert_data(value.data(), value.size()); @@ -339,11 +353,11 @@ class TimeToStringConverter : public ColumnTypeConverter { using SrcCppType = typename PrimitiveTypeTraits::CppType; using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); char buf[50]; for (int i = 0; i < rows; ++i) { int len = (reinterpret_cast(src_data[i])).to_buffer(buf); @@ -571,19 +585,19 @@ class CastStringConverter : public ColumnTypeConverter { } NullMap* null_map = nullptr; - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; if (dst_col->is_nullable()) { auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = nullable->get_nested_column_ptr().get(); null_map = &nullable->get_null_map_data(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } size_t rows = string_col->size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(to_col.get())->get_data(); + auto& data = assert_cast(to_col)->get_data(); CastParameters params; for (int i = 0; i < rows; ++i) { bool can_cast = false; @@ -628,7 +642,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -639,7 +653,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { const auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const SrcCppType& src_value = src_data[i]; @@ -680,13 +694,13 @@ class TimeV2Converter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const auto& src_value = reinterpret_cast(src_data[i]); auto& dst_value = reinterpret_cast(data[start_idx + i]); @@ -718,7 +732,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { using DstDorisType = typename PrimitiveTypeTraits::ColumnType::value_type; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -729,7 +743,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); auto max_result = DataTypeDecimal::get_max_digits_number(_precision); auto multiplier = DataTypeDecimal::get_scale_multiplier(_scale); @@ -804,13 +818,13 @@ class DecimalToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -889,13 +903,13 @@ class DecimalToDecimalConverter : public ColumnTypeConverter { bool narrow_integral = (_to_precision - _to_scale) < (_from_precision - _from_scale); ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcNativeType src_value = src_data[i].value; @@ -983,15 +997,15 @@ class VarBinaryConverter : public ColumnTypeConverter { from_col = &assert_cast(*src_col); } - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; // nullmap flag seems have been handled in upper level if (dst_col->is_nullable()) { const auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = const_cast(nullable)->get_nested_column_ptr().get(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } - auto* to_dst_column = assert_cast(to_col.get()); + auto* to_dst_column = assert_cast(to_col); for (size_t i = 0; i < from_col->size(); ++i) { auto string_ref = from_col->get_data_at(i); diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp index bcb1a8d70f4b3f..d20a290a75add7 100644 --- a/be/src/format/orc/vorc_reader.cpp +++ b/be/src/format/orc/vorc_reader.cpp @@ -116,6 +116,39 @@ namespace doris { // TODO: we need to determine it by test. static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits::max(); static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = ""; + +static void fill_orc_null_map(ColumnNullable* nullable_column, const orc::ColumnVectorBatch* cvb, + size_t num_values) { + NullMap& map_data_column = nullable_column->get_null_map_data(); + const auto origin_size = map_data_column.size(); + map_data_column.resize(origin_size + num_values); + if (cvb->hasNulls) { + const auto* cvb_nulls = cvb->notNull.data(); + for (int i = 0; i < num_values; ++i) { + map_data_column[origin_size + i] = !cvb_nulls[i]; + } + } else { + memset(map_data_column.data() + origin_size, 0, num_values); + } +} + +static void align_orc_null_map(const ColumnPtr& src_column, ColumnNullable* dst_nullable_column, + size_t new_rows) { + auto& dst_null_map = dst_nullable_column->get_null_map_column(); + const size_t old_rows = dst_nullable_column->get_nested_column().size(); + const size_t expected_rows = old_rows + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_rows); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->size(), new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), 0, new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} // Because HIVE 0.11 & 0.12 does not support precision and scale for decimal // The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0) // We should set a default precision and scale for these orc files. @@ -2018,13 +2051,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle key column: if still missing, fill with default values if (key_is_missing) { // Fill key column with default values (nulls or empty values) - auto mutable_key_column = doris_key_column->assume_mutable(); + auto mutable_key_column = IColumn::mutate(std::move(doris_key_column)); if (mutable_key_column->is_nullable()) { auto* nullable_column = static_cast(mutable_key_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_key_column->insert_many_defaults(element_size); } + doris_key_column = std::move(mutable_key_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2035,13 +2069,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle value column: if still missing, fill with default values if (value_is_missing) { // Fill value column with default values (nulls or empty values) - auto mutable_value_column = doris_value_column->assume_mutable(); + auto mutable_value_column = IColumn::mutate(std::move(doris_value_column)); if (mutable_value_column->is_nullable()) { auto* nullable_column = static_cast(mutable_value_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_value_column->insert_many_defaults(element_size); } + doris_value_column = std::move(mutable_value_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2106,8 +2141,10 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, "Child field of '{}' is not nullable, but is missing in orc file", col_name); } - reinterpret_cast(doris_field->assume_mutable().get()) + auto mutable_field = IColumn::mutate(std::move(doris_field)); + reinterpret_cast(mutable_field.get()) ->insert_many_defaults(num_values); + doris_field = std::move(mutable_field); } for (auto read_field : read_fields) { @@ -2172,45 +2209,61 @@ Status OrcReader::_orc_column_to_doris_column( resolved_column = converter->get_column(src_type, doris_column, data_type); resolved_type = converter->get_type(); - if (resolved_column->is_nullable()) { + MutableColumnPtr mutable_resolved_column; + if (converter->is_consistent()) { + resolved_column.reset(); + mutable_resolved_column = IColumn::mutate(std::move(doris_column)); + } else { + mutable_resolved_column = IColumn::mutate(std::move(resolved_column)); + } + + if (mutable_resolved_column->is_nullable()) { SCOPED_RAW_TIMER(&_statistics.decode_null_map_time); auto* nullable_column = - reinterpret_cast(resolved_column->assume_mutable().get()); + reinterpret_cast(mutable_resolved_column.get()); data_column = nullable_column->get_nested_column_ptr(); - - NullMap& map_data_column = nullable_column->get_null_map_data(); - auto origin_size = map_data_column.size(); - map_data_column.resize(origin_size + num_values); - if (cvb->hasNulls) { - const auto* cvb_nulls = cvb->notNull.data(); - for (int i = 0; i < num_values; ++i) { - map_data_column[origin_size + i] = !cvb_nulls[i]; - } - } else { - memset(map_data_column.data() + origin_size, 0, num_values); - } + fill_orc_null_map(nullable_column, cvb, num_values); } else { if (cvb->hasNulls) { return Status::InternalError("Not nullable column {} has null values in orc file", col_name); } - data_column = resolved_column->assume_mutable(); + data_column = std::move(mutable_resolved_column); } RETURN_IF_ERROR(_fill_doris_data_column( col_name, data_column, remove_nullable(resolved_type), root_node, orc_column_type, cvb, num_values)); - // resolve schema change + + if (mutable_resolved_column) { + data_column.reset(); + resolved_column = std::move(mutable_resolved_column); + } else { + resolved_column = std::move(data_column); + } + + if (converter->is_consistent()) { + doris_column = std::move(resolved_column); + return Status::OK(); + } + + doris_column = IColumn::mutate(std::move(doris_column)); auto converted_column = doris_column->assume_mutable(); + if (converted_column->is_nullable()) { + align_orc_null_map(resolved_column, + reinterpret_cast(converted_column.get()), + resolved_column->size()); + } return converter->convert(resolved_column, converted_column); } else { - auto mutable_column = doris_column->assume_mutable(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); if (mutable_column->is_nullable()) { auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(num_values); } else { mutable_column->insert_many_defaults(num_values); } + doris_column = std::move(mutable_column); } return Status::OK(); diff --git a/be/src/format/parquet/parquet_column_convert.h b/be/src/format/parquet/parquet_column_convert.h index f56ad295bab968..6608c542e4a79e 100644 --- a/be/src/format/parquet/parquet_column_convert.h +++ b/be/src/format/parquet/parquet_column_convert.h @@ -194,6 +194,46 @@ struct ConvertParams { } }; +inline IColumn* get_mutable_inner_column(ColumnPtr& column) { + column = IColumn::mutate(std::move(column)); + auto mutable_column = column->assume_mutable(); + if (mutable_column->is_nullable()) { + return &assert_cast(mutable_column.get())->get_nested_column(); + } + return mutable_column.get(); +} + +inline size_t get_mutable_inner_column_size(const ColumnPtr& column) { + if (column->is_nullable()) { + const auto* nullable = assert_cast(column.get()); + return nullable->get_nested_column().size(); + } + return column->size(); +} + +inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_rows, + size_t new_rows) { + if (!dst_column->is_nullable()) { + return; + } + + dst_column = IColumn::mutate(std::move(dst_column)); + auto* dst_nullable = assert_cast(dst_column->assume_mutable().get()); + auto& dst_null_map = dst_nullable->get_null_map_column(); + const size_t expected_rows = old_rows + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_rows); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->size(), new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), 0, new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} + /** * Convert parquet physical column to logical column * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), @@ -254,26 +294,36 @@ class PhysicalToLogicalConverter { PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); } if (is_consistent() && _logical_converter->is_consistent()) { + dst_logical_col = std::move(src_physical_col); return Status::OK(); } + if (_logical_converter->is_consistent()) { + const size_t old_rows = get_mutable_inner_column_size(dst_logical_col); + RETURN_IF_ERROR(physical_convert(src_physical_col, dst_logical_col)); + align_null_map(src_physical_col, dst_logical_col, old_rows, + get_mutable_inner_column_size(dst_logical_col) - old_rows); + return Status::OK(); + } + ColumnPtr src_logical_column; if (is_consistent()) { - if (dst_logical_type->is_nullable()) { - auto doris_nullable_column = - assert_cast(dst_logical_col.get()); - src_logical_column = - ColumnNullable::create(_cached_src_physical_column, - doris_nullable_column->get_null_map_column_ptr()); - } else { - src_logical_column = _cached_src_physical_column; - } + src_logical_column = src_physical_col; } else { src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, dst_logical_type); } + const size_t src_old_rows = get_mutable_inner_column_size(src_logical_column); RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); + align_null_map(src_physical_col, src_logical_column, src_old_rows, + get_mutable_inner_column_size(src_logical_column) - src_old_rows); + + dst_logical_col = IColumn::mutate(std::move(dst_logical_col)); + const size_t dst_old_rows = get_mutable_inner_column_size(dst_logical_col); auto converted_column = dst_logical_col->assume_mutable(); - return _logical_converter->convert(src_logical_column, converted_column); + RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column)); + align_null_map(src_logical_column, dst_logical_col, dst_old_rows, + get_mutable_inner_column_size(dst_logical_col) - dst_old_rows); + return Status::OK(); } virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, @@ -283,6 +333,11 @@ class PhysicalToLogicalConverter { DataTypePtr& get_physical_type() { return _cached_src_physical_type; } + bool read_directly_into_dst_logical_column() { + return !_convert_params->is_type_compatibility && is_consistent() && + _logical_converter->is_consistent(); + } + virtual bool is_consistent() { return false; } virtual bool support() { return true; } @@ -319,14 +374,14 @@ class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); size_t rows = from_col->size(); // always comes from tparquet::Type::INT32 auto& src_data = assert_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { data[start_idx + i] = static_cast(src_data[i]); } @@ -378,13 +433,13 @@ class UnsignedIntegerConverter : public PhysicalToLogicalConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); auto& src_data = assert_cast(from_col.get())->get_data(); size_t rows = src_data.size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; i++) { StorageCppType src_value = src_data[i]; @@ -405,12 +460,12 @@ class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); auto* src_data = assert_cast(from_col.get()); size_t length = src_data->size(); size_t num_values = length / _type_length; - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); auto& offsets = string_col.get_offsets(); auto& chars = string_col.get_chars(); @@ -441,12 +496,12 @@ class Float16PhysicalConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); const auto* src_data = assert_cast(from_col.get()); size_t length = src_data->size(); size_t num_values = length / _type_length; - auto* to_float_column = assert_cast(to_col.get()); + auto* to_float_column = assert_cast(to_col); size_t start_idx = to_float_column->size(); to_float_column->resize(start_idx + num_values); auto& to_float_column_data = to_float_column->get_data(); @@ -528,15 +583,8 @@ class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { uint8_col = &assert_cast(*src_physical_col); } - MutableColumnPtr to_col = nullptr; - // nullmap flag seems have been handled in upper level - if (src_logical_column->is_nullable()) { - const auto* nullable = assert_cast(src_logical_column.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); - } else { - to_col = src_logical_column->assume_mutable(); - } - auto* to_varbinary_column = assert_cast(to_col.get()); + IColumn* to_col = get_mutable_inner_column(src_logical_column); + auto* to_varbinary_column = assert_cast(to_col); size_t length = uint8_col->size(); size_t num_values = length / _type_length; const auto* ptr = uint8_col->get_data().data(); @@ -561,7 +609,7 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); #define M(FixedTypeLength, ValueCopyType) \ case FixedTypeLength: \ @@ -612,13 +660,13 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { } template - Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { + Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { size_t rows = src_col->size() / fixed_type_length; auto* buf = static_cast(src_col.get())->get_data().data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); size_t offset = 0; for (int i = 0; i < rows; i++) { // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -645,7 +693,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto buf = static_cast(src_col.get())->get_chars().data(); @@ -653,7 +701,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); for (int i = 0; i < rows; i++) { size_t len = offset[i] - offset[i - 1]; // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -678,7 +726,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = typename DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto* src_data = @@ -686,7 +734,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto* data = static_cast*>(dst_col.get())->get_data().data(); + auto* data = static_cast*>(dst_col)->get_data().data(); for (int i = 0; i < rows; i++) { ValueCopyType value; @@ -706,14 +754,14 @@ class NumberToDecimal : public PhysicalToLogicalConverter { class Int32ToDate : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->reserve(start_idx + rows); auto& src_data = static_cast(src_col.get())->get_data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); date_day_offset_dict& date_dict = date_day_offset_dict::get(); for (int i = 0; i < rows; i++) { @@ -727,14 +775,14 @@ class Int32ToDate : public PhysicalToLogicalConverter { struct Int64ToTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); auto src_data = static_cast(src_col.get())->get_data().data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { int64_t x = src_data[i]; @@ -760,14 +808,14 @@ struct Int64ToTimestamp : public PhysicalToLogicalConverter { struct Int64ToTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); const auto& src_data = assert_cast(src_col.get())->get_data(); - auto& dest_data = assert_cast(dst_col.get())->get_data(); + auto& dest_data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { @@ -784,14 +832,14 @@ struct Int64ToTimestampTz : public PhysicalToLogicalConverter { struct Int96toTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); auto& src_data = static_cast(src_col.get())->get_data(); auto ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { ParquetInt96 src_cell_data = ParquetInt96_data[i]; @@ -818,14 +866,14 @@ struct Int96toTimestamp : public PhysicalToLogicalConverter { struct Int96toTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); const auto& src_data = assert_cast(src_col.get())->get_data(); auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = assert_cast(dst_col.get())->get_data(); + auto& data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { diff --git a/be/src/format/parquet/vparquet_column_reader.cpp b/be/src/format/parquet/vparquet_column_reader.cpp index ba7d42a5aed84e..1deffec6a04633 100644 --- a/be/src/format/parquet/vparquet_column_reader.cpp +++ b/be/src/format/parquet/vparquet_column_reader.cpp @@ -328,12 +328,11 @@ Status ScalarColumnReader::_read_values(size_t num_ MutableColumnPtr data_column; std::vector null_map; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - assert_cast(const_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); @@ -411,12 +410,11 @@ Status ScalarColumnReader::_read_nested_column( // Handle nullable columns MutableColumnPtr data_column; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - const_cast(assert_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); } else { @@ -550,6 +548,10 @@ Status ScalarColumnReader::read_column_data( ColumnPtr resolved_column = _converter->get_physical_column(_field_schema->physical_type, _field_schema->data_type, doris_column, type, is_dict_filter); + if (_converter->read_directly_into_dst_logical_column()) { + DCHECK_EQ(resolved_column.get(), doris_column.get()); + resolved_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); _def_levels.clear(); @@ -658,6 +660,7 @@ Status ArrayColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -713,6 +716,7 @@ Status MapColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -789,6 +793,7 @@ Status StructColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -986,6 +991,7 @@ Status StructColumnReader::read_column_data( auto& doris_field = doris_struct.get_column_ptr(idx); auto& doris_type = doris_struct_type->get_element(idx); DCHECK(doris_type->is_nullable()); + doris_field = IColumn::mutate(std::move(doris_field)); auto mutable_column = doris_field->assume_mutable(); auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(missing_column_sz); diff --git a/be/src/format/parquet/vparquet_column_reader.h b/be/src/format/parquet/vparquet_column_reader.h index 9d9fd2280c88f8..8673361eb46dd6 100644 --- a/be/src/format/parquet/vparquet_column_reader.h +++ b/be/src/format/parquet/vparquet_column_reader.h @@ -482,6 +482,7 @@ class SkipReadingReader : public ParquetColumnReader { // Simulate reading without actually reading data // Fill with default/null values based on column type + doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column = doris_column->assume_mutable(); if (real_column_size > 0) { diff --git a/be/src/load/memtable/memtable.cpp b/be/src/load/memtable/memtable.cpp index 588d8543d7b4b4..3bdcaa1ef961d7 100644 --- a/be/src/load/memtable/memtable.cpp +++ b/be/src/load/memtable/memtable.cpp @@ -453,12 +453,11 @@ void MemTable::_sort_one_column(DorisVector>& row_in } template -void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, - int row_pos) { +void MemTable::_finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos) { // move key columns for (size_t i = 0; i < _tablet_schema->num_key_columns(); ++i) { - _output_mutable_block.get_column_by_position(i)->insert_from(*block_data[i].column.get(), - row->_row_pos); + _output_mutable_block.get_column_by_position(i)->insert_from( + *mutable_block.get_column_by_position(i), row->_row_pos); } if (row->has_init_agg()) { // get value columns from agg_places @@ -490,7 +489,7 @@ void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& // move columns for rows do not need agg for (size_t i = _tablet_schema->num_key_columns(); i < _num_columns; ++i) { _output_mutable_block.get_column_by_position(i)->insert_from( - *block_data[i].column.get(), row->_row_pos); + *mutable_block.get_column_by_position(i), row->_row_pos); } } if constexpr (!is_final) { @@ -527,7 +526,6 @@ void MemTable::_aggregate() { Block in_block = _input_mutable_block.to_block(); MutableBlock mutable_block = MutableBlock::build_mutable_block(&in_block); _vec_row_comparator->set_block(&mutable_block); - auto& block_data = in_block.get_columns_with_type_and_name(); DorisVector> temp_row_in_blocks; temp_row_in_blocks.reserve(_last_sorted_pos); //only init agg if needed @@ -558,7 +556,7 @@ void MemTable::_aggregate() { if (!temp_row_in_blocks.empty()) { // The rows from the previous batch of _row_in_blocks have been merged into temp_row_in_blocks, // now call finalize to write the aggregation results into _output_mutable_block. - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } temp_row_in_blocks.push_back(cur_row_ptr); @@ -567,15 +565,15 @@ void MemTable::_aggregate() { } if (!temp_row_in_blocks.empty()) { // finalize the last low - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, row_pos); + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } } else { DCHECK(_delete_sign_col_idx != -1); if (_seq_col_idx_in_block == -1) { - _aggregate_for_flexible_partial_update_without_seq_col( - block_data, mutable_block, temp_row_in_blocks); + _aggregate_for_flexible_partial_update_without_seq_col(mutable_block, + temp_row_in_blocks); } else { - _aggregate_for_flexible_partial_update_with_seq_col(block_data, mutable_block, + _aggregate_for_flexible_partial_update_with_seq_col(mutable_block, temp_row_in_blocks); } } @@ -593,8 +591,7 @@ void MemTable::_aggregate() { template void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { std::shared_ptr prev_row {nullptr}; int row_pos = -1; auto& skip_bitmaps = @@ -609,12 +606,12 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( auto finalize_rows = [&]() { if (row_with_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_with_delete_sign); - _finalize_one_row(row_with_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_with_delete_sign.get(), mutable_block, ++row_pos); row_with_delete_sign = nullptr; } if (row_without_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_without_delete_sign); - _finalize_one_row(row_without_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_without_delete_sign.get(), mutable_block, ++row_pos); row_without_delete_sign = nullptr; } // _arena.clear(); @@ -670,15 +667,14 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( template void MemTable::_aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { // For flexible partial update, when table has sequence column, we don't do any aggregation // in memtable. These duplicate rows will be aggregated in VerticalSegmentWriter int row_pos = -1; for (const auto& row_ptr : *_row_in_blocks) { RowInBlock* row = row_ptr.get(); temp_row_in_blocks.push_back(row_ptr); - _finalize_one_row(row, block_data, ++row_pos); + _finalize_one_row(row, mutable_block, ++row_pos); } } diff --git a/be/src/load/memtable/memtable.h b/be/src/load/memtable/memtable.h index 42f96dd4f5f769..ad20667527fed1 100644 --- a/be/src/load/memtable/memtable.h +++ b/be/src/load/memtable/memtable.h @@ -262,7 +262,7 @@ class MemTable { void _sort_one_column(DorisVector>& row_in_blocks, Tie& tie, std::function cmp); template - void _finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, int row_pos); + void _finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos); void _init_row_for_agg(RowInBlock* row, MutableBlock& mutable_block); void _clear_row_agg(RowInBlock* row); @@ -271,12 +271,12 @@ class MemTable { template void _aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); template void _aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); Status _put_into_output(Block& in_block); diff --git a/be/src/runtime/query_cache/query_cache.cpp b/be/src/runtime/query_cache/query_cache.cpp index d79acfa7ef788d..06817adf1544ce 100644 --- a/be/src/runtime/query_cache/query_cache.cpp +++ b/be/src/runtime/query_cache/query_cache.cpp @@ -17,6 +17,8 @@ #include "runtime/query_cache/query_cache.h" +#include "common/logging.h" + namespace doris { std::vector* QueryCacheHandle::get_cache_slot_orders() { @@ -43,7 +45,10 @@ void QueryCache::insert(const CacheKey& key, int64_t version, CacheResult& res, CacheResult cache_result; for (auto& block_data : res) { cache_result.emplace_back(Block::create_unique())->swap(block_data->clone_empty()); - (void)MutableBlock(cache_result.back().get()).merge(*block_data); + MutableBlock mutable_block(cache_result.back().get()); + auto st = mutable_block.merge(*block_data); + DORIS_CHECK(st.ok()); + cache_result.back()->set_columns(std::move(mutable_block.mutable_columns())); } auto cache_value_ptr = std::make_unique(version, std::move(cache_result), slot_orders); diff --git a/be/src/runtime/result_block_buffer.cpp b/be/src/runtime/result_block_buffer.cpp index ba7f135ce762d5..aebea97ea1ee90 100644 --- a/be/src/runtime/result_block_buffer.cpp +++ b/be/src/runtime/result_block_buffer.cpp @@ -214,10 +214,12 @@ Status ResultBlockBuffer::add_batch(RuntimeState* state, (batch_size + _last_batch_bytes) <= config::thrift_max_message_size) { if constexpr (std::is_same_v) { auto last_block = _result_batch_queue.back(); + auto mutable_columns = last_block->mutate_columns(); for (size_t i = 0; i < last_block->columns(); i++) { - last_block->mutate_columns()[i]->insert_range_from( - *result->get_by_position(i).column, 0, num_rows); + mutable_columns[i]->insert_range_from(*result->get_by_position(i).column, 0, + num_rows); } + last_block->set_columns(std::move(mutable_columns)); } else { std::vector& back_rows = _result_batch_queue.back()->result_batch.rows; diff --git a/be/src/storage/iterator/vertical_block_reader.cpp b/be/src/storage/iterator/vertical_block_reader.cpp index aa90c83ccb0a3d..335584997f0f92 100644 --- a/be/src/storage/iterator/vertical_block_reader.cpp +++ b/be/src/storage/iterator/vertical_block_reader.cpp @@ -413,6 +413,7 @@ Status VerticalBlockReader::_agg_key_next_block(Block* block, bool* eof) { break; } LOG(WARNING) << "next failed: " << res; + block->set_columns(std::move(target_columns)); return res; } DCHECK(_next_row.block->columns() == block->columns()); @@ -484,11 +485,12 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { int delete_sign_idx = block->columns() - 1; DCHECK(delete_sign_idx > 0); auto target_columns = block->mutate_columns(); - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); - reinterpret_cast(delete_filter_column.get())->resize(block_rows); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); + auto* delete_filter_data_column = + reinterpret_cast(delete_filter_column.get()); + delete_filter_data_column->resize(block_rows); - auto* __restrict filter_data = - reinterpret_cast(delete_filter_column.get())->get_data().data(); + auto* __restrict filter_data = delete_filter_data_column->get_data().data(); auto* __restrict delete_data = reinterpret_cast(target_columns[delete_sign_idx].get()) ->get_data() @@ -517,12 +519,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { row_source_idx++; } + const auto column_to_keep = target_columns.size(); + block->set_columns(std::move(target_columns)); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; block->insert(column_with_type_and_name); - RETURN_IF_ERROR( - Block::filter_block(block, target_columns.size(), target_columns.size())); + RETURN_IF_ERROR(Block::filter_block(block, column_to_keep, column_to_keep)); _stats.rows_del_filtered += block_rows - block->rows(); if (UNLIKELY(_reader_context.record_rowids)) { DCHECK_EQ(_block_row_locations.size(), block->rows() + delete_count); @@ -562,6 +566,7 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { RETURN_IF_ERROR(mask_iter->unique_key_next_batch(&batches, _reader_context.batch_size, &actual_rows)); if (actual_rows == 0) { + block->set_columns(std::move(target_columns)); *eof = true; _eof = true; return Status::OK(); @@ -605,6 +610,7 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { break; } LOG(WARNING) << "next failed: " << res; + block->set_columns(std::move(target_columns)); return res; } const auto& src_block = _next_row.block; diff --git a/be/src/storage/segment/variant/binary_column_extract_iterator.h b/be/src/storage/segment/variant/binary_column_extract_iterator.h index 0e5632b9853400..8458955dab762c 100644 --- a/be/src/storage/segment/variant/binary_column_extract_iterator.h +++ b/be/src/storage/segment/variant/binary_column_extract_iterator.h @@ -154,8 +154,8 @@ class BinaryColumnExtractIterator : public BaseBinaryColumnProcessor { _sparse_column_cache->binary_column->get_ptr(), 0, _sparse_column_cache->binary_column->size()); var.incr_num_rows(_sparse_column_cache->binary_column->size()); - var.get_sparse_column()->assume_mutable()->resize(var.rows()); - var.get_doc_value_column()->assume_mutable()->resize(var.rows()); + var.get_sparse_column_mutable().resize(var.rows()); + var.get_doc_value_column_mutable().resize(var.rows()); ENABLE_CHECK_CONSISTENCY(&var); } diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp index 052f231b27e68c..41de18d846e28c 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp @@ -203,7 +203,7 @@ Status HierarchicalDataIterator::_process_sub_columns( ColumnVariant& container_variant, const PathsWithColumnAndType& non_nested_subcolumns) { for (const auto& entry : non_nested_subcolumns) { DCHECK(!entry.path.has_nested_part()); - bool add = container_variant.add_sub_column(entry.path, entry.column->assume_mutable(), + bool add = container_variant.add_sub_column(entry.path, IColumn::mutate(entry.column), entry.type); if (!add) { return Status::InternalError("Duplicated {}, type {}", entry.path.get_path(), @@ -225,7 +225,7 @@ Status HierarchicalDataIterator::_process_nested_columns( check_and_get_column(*remove_nullable(entry.second[0].column)); MutableColumnPtr nested_object = ColumnVariant::create(0, false, base_array->get_data().size()); - MutableColumnPtr offset = base_array->get_offsets_ptr()->assume_mutable(); + MutableColumnPtr offset = IColumn::mutate(base_array->get_offsets_ptr()); auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays for (const auto& subcolumn : entry.second) { @@ -246,7 +246,7 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.get_path(), subcolumn.type->get_name()); } #endif - MutableColumnPtr flattend_column = target_array->get_data_ptr()->assume_mutable(); + MutableColumnPtr flattend_column = IColumn::mutate(target_array->get_data_ptr()); DataTypePtr flattend_type = check_and_get_data_type(remove_nullable(type).get()) ->get_nested_type(); @@ -255,14 +255,18 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()), std::move(flattend_column), std::move(flattend_type)); } - nested_object = make_nullable(nested_object->get_ptr())->assume_mutable(); - auto array = - make_nullable(ColumnArray::create(std::move(nested_object), std::move(offset))); + const size_t nested_object_size = nested_object->size(); + nested_object = ColumnNullable::create(std::move(nested_object), + ColumnUInt8::create(nested_object_size, 0)); + auto array = ColumnArray::create(std::move(nested_object), std::move(offset)); + const size_t array_size = array->size(); + auto nullable_array = + ColumnNullable::create(std::move(array), ColumnUInt8::create(array_size, 0)); PathInDataBuilder builder; // add parent prefix builder.append(entry.first.get_parts(), false); PathInData parent_path = builder.build(); - container_variant.add_sub_column(parent_path, array->assume_mutable(), + container_variant.add_sub_column(parent_path, std::move(nullable_array), container_variant.NESTED_TYPE); } return Status::OK(); @@ -283,14 +287,17 @@ Status HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si // auto column = root_var.get_root(); // auto type = root_var.get_root_type(); - MutableColumnPtr column = _root_reader->column->get_ptr(); + MutableColumnPtr column = IColumn::mutate(_root_reader->column->get_ptr()); // container_variant.add_sub_column({}, std::move(column), _root_reader->type); DCHECK(column->size() == nrows); - auto nullable_column = make_nullable(column->get_ptr()); + if (!column->is_nullable()) { + const size_t column_size = column->size(); + column = ColumnNullable::create(std::move(column), ColumnUInt8::create(column_size, 0)); + } auto type = make_nullable(_root_reader->type); // make sure the root type is nullable container = ColumnVariant::create(max_subcolumns_count, enable_doc_mode, type, - nullable_column->assume_mutable()); + std::move(column)); } else { DataTypePtr root_type = std::make_shared(); auto column = ColumnNothing::create(nrows); @@ -359,10 +366,10 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container if (_path.get_parts().empty()) { if (_read_type == ReadType::SUBCOLUMNS_AND_SPARSE) { container_variant.set_sparse_column(_binary_column_reader->column->get_ptr()); - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } else if (_read_type == ReadType::DOC_VALUE_COLUMN) { container_variant.set_doc_value_column(_binary_column_reader->column->get_ptr()); - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { return Status::InternalError("Invalid read type {}", _read_type); } @@ -378,7 +385,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container const auto& src_values = assert_cast(src_map.get_values()); // Clear pre-initialized doc_value offsets (created by ColumnVariant ctor with num_rows) - container_variant.get_doc_value_column()->assume_mutable()->clear(); + container_variant.get_doc_value_column_mutable().clear(); auto [dst_paths, dst_values] = container_variant.get_doc_value_data_paths_and_values(); auto& dst_offsets = container_variant.serialized_doc_value_column_offsets(); @@ -419,13 +426,13 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } dst_offsets.push_back(dst_paths->size()); } - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { const auto& offsets = assert_cast(*_binary_column_reader->column).get_offsets(); /// Check if there is no data in shared data in current range. if (offsets.back() == offsets[-1]) { - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { // Read for variant sparse column // Example path: a.b @@ -444,8 +451,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast( - *container_variant.get_sparse_column()->assume_mutable()) + assert_cast(container_variant.get_sparse_column_mutable()) .get_offsets(); auto [sparse_data_paths, sparse_data_values] = container_variant.get_sparse_data_paths_and_values(); @@ -544,7 +550,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } } } - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } ENABLE_CHECK_CONSISTENCY(&container_variant); return Status::OK(); diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.h b/be/src/storage/segment/variant/hierarchical_data_iterator.h index 3e3816736a4851..cc7e3f7bd15f7a 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.h +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.h @@ -138,6 +138,7 @@ class HierarchicalDataIterator : public ColumnIterator { // process read template Status process_read(ReadFunction&& read_func, MutableColumnPtr& dst, size_t nrows) { + dst = IColumn::mutate(std::move(dst)); // // Read all sub columns, and merge with root column ColumnNullable* nullable_column = nullptr; if (dst->is_nullable()) { diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp b/be/src/storage/segment/variant/variant_column_writer_impl.cpp index 8ad08640ba19bd..9a491aaa8f4f9f 100644 --- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp +++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp @@ -1220,8 +1220,14 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, DCHECK_EQ(ptr->get_root()->get_ptr()->size(), num_rows); converter->add_column_data_convertor(*_tablet_column); const uint8_t* nullmap = nullptr; - auto& nullable_column = assert_cast(*ptr->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + // get_root() already returns a MutableColumnPtr; store it to avoid dangling ref and + // to avoid calling assume_mutable() again (which would see use_count>1 and throw). + auto root_mut = ptr->get_root(); + auto& nullable_column = assert_cast(*root_mut); + // Use const access to get the nested column ptr without bumping use_count in the + // non-const chameleon_ptr path, then mutate() to get exclusive ownership. + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const bool has_root_ng = std::ranges::any_of(_nested_group_routing_plan.ng_only_prefixes, @@ -1233,13 +1239,15 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, // If the root variant is nullable, then update the root column null column with the outer null column. if (_tablet_column->is_nullable()) { // use outer null column as final null column + // Move root_column (exclusive) directly into create() to avoid sharing ownership. root_column = - ColumnNullable::create(root_column->get_ptr(), ColumnUInt8::create(*_null_column)); + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(*_null_column)); nullmap = _null_column->get_data().data(); } else { // Otherwise setting to all not null. - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + size_t col_size = root_column->size(); + root_column = + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(col_size, 0)); } // make sure the root_column is nullable RETURN_IF_ERROR(converter->set_source_content_with_specifid_column( diff --git a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp index 0dcf05e095e302..a05201be9af38f 100644 --- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp +++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp @@ -141,8 +141,10 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant auto expected_root_type = make_nullable(std::make_shared()); variant->ensure_root_node_type(expected_root_type); - auto& nullable_column = assert_cast(*variant->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + auto root_mut = variant->get_root(); + auto& nullable_column = assert_cast(*root_mut); + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const size_t num_rows = chunk_variant.rows(); variant_writer_helpers::maybe_remove_root_jsonb_with_empty_defaults( &root_column, num_rows, _streaming_plan.can_remove_root_jsonb()); @@ -157,10 +159,11 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant } else { null_column->insert_many_defaults(num_rows); } - root_column = ColumnNullable::create(root_column->get_ptr(), std::move(null_column)); + root_column = ColumnNullable::create(std::move(root_column), std::move(null_column)); } else { - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + const size_t root_column_size = root_column->size(); + root_column = ColumnNullable::create(std::move(root_column), + ColumnUInt8::create(root_column_size, 0)); } auto converter = std::make_unique(); diff --git a/be/test/core/block/column_nullable_test.cpp b/be/test/core/block/column_nullable_test.cpp index dc837c335b13d1..0b92d1813fb8fb 100644 --- a/be/test/core/block/column_nullable_test.cpp +++ b/be/test/core/block/column_nullable_test.cpp @@ -44,7 +44,7 @@ TEST(ColumnNullableTest, HashTest) { nullable_column->update_hash_with_value(0, hashes[1]); EXPECT_EQ(hashes[0].get64(), hashes[1].get64()); - auto& null_map = ((ColumnNullable)(*nullable_column)).get_null_map_data(); + auto& null_map = nullable_column->get_null_map_data(); null_map[1] = true; column->update_hash_with_value(1, hashes[0]); nullable_column->update_hash_with_value(1, hashes[1]); diff --git a/be/test/core/column/column_array_test.cpp b/be/test/core/column/column_array_test.cpp index e8c0bd4467898c..e00bae393856c0 100644 --- a/be/test/core/column/column_array_test.cpp +++ b/be/test/core/column/column_array_test.cpp @@ -688,8 +688,7 @@ TEST_F(ColumnArrayTest, ConvertIfOverflowAndInsertTest) { // check ptr is itself auto ptr = column->convert_column_if_overflow(); EXPECT_EQ(ptr.get(), column.get()); - auto arr_col = - check_and_get_column(remove_nullable(column->assume_mutable()).get()); + auto arr_col = check_and_get_column(remove_nullable(column->get_ptr()).get()); auto nested_col = arr_col->get_data_ptr(); auto array_col1 = check_and_get_column(remove_nullable(ptr).get()); auto nested_col1 = array_col1->get_data_ptr(); diff --git a/be/test/core/column/column_ip_test.cpp b/be/test/core/column/column_ip_test.cpp index fc03446e45503b..05cf6034ed37e5 100644 --- a/be/test/core/column/column_ip_test.cpp +++ b/be/test/core/column/column_ip_test.cpp @@ -77,32 +77,32 @@ class ColumnIPTest : public CommonColumnTest { TEST_F(ColumnIPTest, InsertRangeFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_range_from_callback); } TEST_F(ColumnIPTest, InsertManyFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_from_callback); } TEST_F(ColumnIPTest, InsertIndicesFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_indices_from_callback); } TEST_F(ColumnIPTest, InsertDefaultTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); // ipv4 default value is '0.0.0.0' and ipv6 default value is '::' check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_default_callback); } @@ -110,31 +110,31 @@ TEST_F(ColumnIPTest, InsertDefaultTest) { TEST_F(ColumnIPTest, InsertManyDefaultsTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_defaults_callback); } TEST_F(ColumnIPTest, GetDataAtTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_get_data_at_callback); } TEST_F(ColumnIPTest, FieldTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_field_callback); } TEST_F(ColumnIPTest, GetRawDataTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, {serde[1]}, ';', {2}, data_files[0], assert_get_raw_data_callback); } @@ -142,8 +142,8 @@ TEST_F(ColumnIPTest, GetRawDataTest) { TEST_F(ColumnIPTest, SerDeVecTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deser_vec(ip_cols, {dt_ipv4, dt_ipv6}); } @@ -151,8 +151,8 @@ TEST_F(ColumnIPTest, SerDeVecTest) { TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deserialize_with_arena_impl(ip_cols, {dt_ipv4, dt_ipv6}); @@ -161,16 +161,16 @@ TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { TEST_F(ColumnIPTest, SizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_size_callback); } TEST_F(ColumnIPTest, ByteSizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_byte_size_callback); } @@ -178,8 +178,8 @@ TEST_F(ColumnIPTest, ByteSizeTest) { TEST_F(ColumnIPTest, AllocateBytesTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_allocated_bytes_callback); } @@ -187,8 +187,8 @@ TEST_F(ColumnIPTest, AllocateBytesTest) { TEST_F(ColumnIPTest, PopbackTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_pop_back_callback); } @@ -197,18 +197,18 @@ TEST_F(ColumnIPTest, CloneTest) { // we test the column with clone_resize, clone_empty for assert size and ptr // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); - assert_clone_empty(column_ipv4->assume_mutable_ref()); - assert_clone_empty(column_ipv6->assume_mutable_ref()); + assert_clone_empty(ip_cols[0]->assume_mutable_ref()); + assert_clone_empty(ip_cols[1]->assume_mutable_ref()); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_clone_resized_callback); } TEST_F(ColumnIPTest, CutTest) { MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_cut_callback); } @@ -216,24 +216,24 @@ TEST_F(ColumnIPTest, CutTest) { TEST_F(ColumnIPTest, ResizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_resize_callback); } TEST_F(ColumnIPTest, ReserveTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_reserve_callback); } TEST_F(ColumnIPTest, ReplaceColumnTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // replace_column_data @@ -246,26 +246,26 @@ TEST_F(ColumnIPTest, ReplaceColumnTest) { TEST_F(ColumnIPTest, AppendDataBySelectorTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_append_data_by_selector_callback); } TEST_F(ColumnIPTest, PermutationAndSortTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[1], ';', {1, 2}); - assert_column_permutations(column_ipv4->assume_mutable_ref(), dt_ipv4); - assert_column_permutations(column_ipv6->assume_mutable_ref(), dt_ipv6); + assert_column_permutations(ip_cols[0]->assume_mutable_ref(), dt_ipv4); + assert_column_permutations(ip_cols[1]->assume_mutable_ref(), dt_ipv6); } TEST_F(ColumnIPTest, FilterTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_filter_callback); } @@ -274,8 +274,8 @@ TEST_F(ColumnIPTest, HashTest) { // XXHash // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // update_hashes_with_value diff --git a/be/test/core/column/column_variant_test.cpp b/be/test/core/column/column_variant_test.cpp index ef99a57a257e1c..c5141fe697a0c7 100644 --- a/be/test/core/column/column_variant_test.cpp +++ b/be/test/core/column/column_variant_test.cpp @@ -1178,9 +1178,11 @@ TEST_F(ColumnVariantTest, field_test) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); test_func(obj); } @@ -2122,13 +2124,16 @@ TEST_F(ColumnVariantTest, fill_path_column_from_sparse_data) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); auto sparse_col = obj->get_sparse_column(); auto cloned_sparse = sparse_col->clone_empty(); - auto& offsets = obj->serialized_sparse_column_offsets(); + const auto& offsets = + static_cast(*obj).serialized_sparse_column_offsets(); for (size_t i = 0; i != offsets.size(); ++i) { auto start = offsets[i - 1]; auto end = offsets[i]; diff --git a/be/test/core/column/common_column_test.h b/be/test/core/column/common_column_test.h index ac4ed5eff76582..fe0ecf051d0140 100644 --- a/be/test/core/column/common_column_test.h +++ b/be/test/core/column/common_column_test.h @@ -634,11 +634,15 @@ class CommonColumnTest : public ::testing::Test { Block block; for (size_t i = 0; i < load_cols.size(); ++i) { ColumnWithTypeAndName columnTypeAndName; - columnTypeAndName.column = load_cols[i]->assume_mutable(); + columnTypeAndName.column = load_cols[i]->get_ptr(); columnTypeAndName.type = types[i]; block.insert(columnTypeAndName); } MutableBlock mb = MutableBlock::build_mutable_block(&block); + // Rebuild block from load_cols after build_mutable_block stole the column pointers + for (size_t i = 0; i < load_cols.size(); ++i) { + block.get_by_position(i).column = load_cols[i]->get_ptr(); + } // step2. to construct a block for assert_cols Block assert_block; Block empty_block; @@ -691,7 +695,9 @@ class CommonColumnTest : public ::testing::Test { continue; } else if (*pos + *cl > source_column->size()) { if (is_column( - remove_nullable(source_column->assume_mutable()).get())) { + remove_nullable(static_cast(source_column.get()) + ->get_ptr()) + .get())) { // insert_range_from in array has DCHECK_LG continue; } @@ -3544,13 +3550,13 @@ auto assert_column_vector_serialize_vec_callback = [](auto x, if (test_null_map) { cloned_target_column->serialize(input_keys.data(), rows); deser_column_wrapper = cloned_target_column->clone_empty(); - deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { target_column->serialize(input_keys.data(), rows); deser_column = source_column->clone_empty(); } if (test_null_map) { deser_column_wrapper->deserialize(input_keys.data(), rows); + deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { deser_column->deserialize(input_keys.data(), rows); } diff --git a/be/test/core/data_type/common_data_type_serder_test.h b/be/test/core/data_type/common_data_type_serder_test.h index d968cc1213e92d..a7393b9d8eee0c 100644 --- a/be/test/core/data_type/common_data_type_serder_test.h +++ b/be/test/core/data_type/common_data_type_serder_test.h @@ -277,7 +277,7 @@ class CommonDataTypeSerdeTest : public ::testing::Test { jsonb_column->reserve(load_cols[0]->size()); MutableColumns assert_cols; for (size_t i = 0; i < load_cols.size(); ++i) { - assert_cols.push_back(load_cols[i]->assume_mutable()); + assert_cols.push_back(load_cols[i]->clone_empty()); } DataTypeSerDe::FormatOptions options; auto tz = cctz::utc_time_zone(); diff --git a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp index 0478507cab0844..84bce05751a061 100644 --- a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp @@ -512,8 +512,9 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto struct_col = static_cast( - static_cast(*col.get()).get_nested_column()); + // Use const access for read-only assertions: avoids assume_mutable_ref() on sub-columns. + const auto& struct_col = static_cast( + static_cast(*col.get()).get_nested_column()); EXPECT_EQ(struct_col.get_column(0).get_data_at(0).to_string(), "false"); EXPECT_EQ(struct_col.get_column(1).get_data_at(0).to_string(), "example"); @@ -537,11 +538,11 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto array_col = static_cast( - static_cast(*col.get()).get_nested_column()); + const auto& array_col = static_cast( + static_cast(*col.get()).get_nested_column()); - auto string_col = static_cast( - static_cast(array_col.get_data()).get_nested_column()); + const auto& string_col = static_cast( + static_cast(array_col.get_data()).get_nested_column()); EXPECT_EQ(string_col.get_data_at(0).to_string(), "1\003example"); EXPECT_EQ(string_col.get_data_at(1).to_string(), "2\003test"); } diff --git a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp index e583b50e4302f2..5158ab01c75f12 100644 --- a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp @@ -144,10 +144,9 @@ TEST_F(DataTypeStructSerDeTest, ArrowMemNotAligned) { EXPECT_EQ(string_values_address % 4, 1); // 5.Test read_column_from_arrow - std::vector vector_columns; - vector_columns.emplace_back(ColumnInt32::create()); - vector_columns.emplace_back(ColumnString::create()); - auto ser_col = ColumnStruct::create(vector_columns); + // Create sub-columns exclusively (no extra refs) so that ColumnStruct::get_column() + // non-const path does not find use_count > 1. + auto ser_col = ColumnStruct::create(Columns {ColumnInt32::create(), ColumnString::create()}); cctz::time_zone tz; DataTypeSerDeSPtrs elem_serdes = {serde_int32, serde_str}; Strings field_names = {"int_field", "string_field"}; diff --git a/be/test/exec/column_type_convert_test.cpp b/be/test/exec/column_type_convert_test.cpp index 5178cddbd59d2f..f336a245568cbd 100644 --- a/be/test/exec/column_type_convert_test.cpp +++ b/be/test/exec/column_type_convert_test.cpp @@ -63,8 +63,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -96,8 +95,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -130,8 +128,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -160,8 +157,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max() + 1); src_data.push_back(std::numeric_limits::min() - 1); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(!st.ok()); @@ -189,8 +185,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back((1L << 23) - 1); src_data.push_back(1L << 23); src_data.push_back((1L << 23) + 1); - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -232,8 +227,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_col->insert_data("invalid", 7); // Invalid string src_col->insert_data("", 0); // Empty string - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -289,8 +283,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back(-std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -325,8 +318,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-12345)); // -123.45 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -354,8 +346,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-67890)); // -678.90 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -385,8 +376,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(12345678901234)); // Normal value: 1234567890.1234 src_data.push_back(Decimal64(-98765432109876)); // Negative value: -9876543210.9876 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); @@ -419,8 +409,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(-12345)); // -123.45 src_data.push_back(Decimal32(23345)); // Too large 233.45 - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -458,8 +447,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal128V3(-102345)); src_data.push_back(Decimal128V3(203345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -499,8 +487,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal256(655363345)); src_data.push_back(Decimal256(3333333333332345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -539,8 +526,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -569,8 +555,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -598,8 +583,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -628,8 +612,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -667,8 +650,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 src_data.push_back(Decimal64(-1000000000)); // Out of range (underflow) - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -698,9 +680,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(999999999)); // Edge case: max for Decimal32 src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 ASSERT_EQ(3, src_data.size()); - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -743,9 +724,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(std::numeric_limits::infinity()); // Infinity src_data.push_back(std::numeric_limits::quiet_NaN()); // NaN - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -791,9 +771,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_col->insert_data("0.0", 3); // Zero value src_col->insert_data("9999999999.99", 13); // Edge case: max valid value within precision - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -837,8 +816,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::min()); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -869,8 +847,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -901,8 +878,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("not a number", 11); src_col->insert_data("2147483648", 10); // Greater than INT32_MAX - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -935,9 +911,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(Decimal32(-67890)); // -678.90 src_data.push_back(Decimal32(0)); // Zero - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -972,9 +947,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { value.unchecked_set_time(2070, 1, 1, 0, 0, 0); src_data.push_back(*reinterpret_cast(&value)); // "2070-01-01" in days format - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1005,8 +979,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(1); // true src_data.push_back(0); // false - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1041,8 +1014,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("1.5", 3); // Hive: null (not an integer) src_col->insert_data("", 0); // Hive: null - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast( @@ -1090,8 +1062,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("abc", 3); // Invalid - should be NULL src_col->insert_data("", 0); // Empty - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1137,8 +1108,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("-32769", 6); // Underflow - should be NULL src_col->insert_data("123.45", 6); // Decimal - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1182,8 +1152,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("1000000", 7); // Million src_col->insert_data("2147483648", 10); // Overflow - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1227,8 +1196,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("9223372036854775808", 19); // Overflow - should be NULL src_col->insert_data("123abc", 6); // Invalid - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1270,8 +1238,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("0", 1); // Zero src_col->insert_data("123e45", 6); // Scientific notation - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1460,8 +1427,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 2024-01-01 00:00:00.123456 auto src_col = make_datetimev2_col({{2024, 1, 1, 0, 0, 0, 123456}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1484,8 +1450,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 1970-01-01 00:00:00.000000 // 3000-01-01 00:00:00.000000 auto src_col = make_datetimev2_col({{1970, 1, 1, 0, 0, 0, 0}, {3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1512,8 +1477,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 3000-01-01 00:00:00.000000(会溢出int32) auto src_col = make_datetimev2_col({{3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_FALSE(st.ok()); @@ -1545,8 +1509,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 13:00:00")); src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 14:00:00")); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1715,10 +1678,9 @@ TEST_F(ColumnTypeConverterTest, TestEmptyColumnConversions) { ASSERT_FALSE(converter->is_consistent()); auto src_col = ColumnInt32::create(); // Empty column (no data) - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); src_col->resize(0); - dst_col->resize(0); + mutable_dst->resize(0); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); diff --git a/be/test/exec/common/schema_util_rowset_test.cpp b/be/test/exec/common/schema_util_rowset_test.cpp index cf99c9824956c5..18bc77ccb3f883 100644 --- a/be/test/exec/common/schema_util_rowset_test.cpp +++ b/be/test/exec/common/schema_util_rowset_test.cpp @@ -172,6 +172,7 @@ static void fill_block_with_test_data(Block* block, int size) { auto v4 = Field::create_field(i); columns[4]->insert(v4); } + block->set_columns(std::move(columns)); } static int64_t inc_id = 1000; static RowsetWriterContext rowset_writer_context(const std::unique_ptr& data_dir, diff --git a/be/test/exec/common/schema_util_test.cpp b/be/test/exec/common/schema_util_test.cpp index 63ba645272282c..262e08eaf030ff 100644 --- a/be/test/exec/common/schema_util_test.cpp +++ b/be/test/exec/common/schema_util_test.cpp @@ -817,7 +817,8 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) { // Test casting from variant to variant auto variant_column = ColumnVariant::create(10, false); - variant_column->create_root(nullable_array_type, nullable_array_column->assume_mutable()); + // nullable_array_column is also stored in array_col.column (use_count=2), so mutate() clones it. + variant_column->create_root(nullable_array_type, IColumn::mutate(nullable_array_column)); ColumnWithTypeAndName variant_col; variant_col.type = variant_type; @@ -1955,14 +1956,14 @@ TEST_F(SchemaUtilTest, parse_and_materialize_variant_columns_ambiguous_paths) { // Prepare the variant column with the string column as root ColumnVariant::Subcolumns dynamic_subcolumns; dynamic_subcolumns.create_root( - ColumnVariant::Subcolumn(string_col->assume_mutable(), string_type, true)); + ColumnVariant::Subcolumn(std::move(string_col), string_type, true)); auto variant_col = ColumnVariant::create(0, false, std::move(dynamic_subcolumns)); auto variant_type = std::make_shared(); // Construct the block Block block; - block.insert(ColumnWithTypeAndName(variant_col->assume_mutable(), variant_type, "v")); + block.insert(ColumnWithTypeAndName(std::move(variant_col), variant_type, "v")); // The variant column is at index 0 std::vector variant_pos = {0}; diff --git a/be/test/exec/connector/vjdbc_connector_test.cpp b/be/test/exec/connector/vjdbc_connector_test.cpp index 16ff8689aafaf2..5ec3fb7046a5a9 100644 --- a/be/test/exec/connector/vjdbc_connector_test.cpp +++ b/be/test/exec/connector/vjdbc_connector_test.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include @@ -33,26 +34,55 @@ class JdbcUtilsTest : public ::testing::Test { void SetUp() override { // Save original config and environment original_jdbc_drivers_dir_ = config::jdbc_drivers_dir; - original_doris_home_ = getenv("DORIS_HOME"); + const char* original_doris_home = getenv("DORIS_HOME"); + if (original_doris_home != nullptr) { + original_doris_home_ = original_doris_home; + has_original_doris_home_ = true; + } // Set DORIS_HOME for testing - setenv("DORIS_HOME", "/tmp/test_doris", 1); + temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_" + std::to_string(::getpid())); + second_temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_second_" + std::to_string(::getpid())); + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + std::filesystem::create_directories(temp_home_); + setenv("DORIS_HOME", temp_home_.c_str(), 1); } void TearDown() override { // Restore original config and environment config::jdbc_drivers_dir = original_jdbc_drivers_dir_; - if (original_doris_home_) { - setenv("DORIS_HOME", original_doris_home_, 1); + if (has_original_doris_home_) { + setenv("DORIS_HOME", original_doris_home_.c_str(), 1); } else { unsetenv("DORIS_HOME"); } + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + } + + std::string default_driver_dir() const { + return (temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string old_driver_dir() const { return (temp_home_ / "jdbc_drivers").string(); } + + std::string second_default_driver_dir() const { + return (second_temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string second_old_driver_dir() const { + return (second_temp_home_ / "jdbc_drivers").string(); } -private: std::string original_jdbc_drivers_dir_; - const char* original_doris_home_ = nullptr; + std::string original_doris_home_; + bool has_original_doris_home_ = false; + std::filesystem::path temp_home_; + std::filesystem::path second_temp_home_; }; // Test resolve_driver_url with absolute URLs @@ -79,10 +109,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { std::string result_url; // Set config to default value to trigger the default directory logic - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; // Create directory and file @@ -104,10 +134,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { // Test resolve_driver_url with default directory TEST_F(JdbcUtilsTest, TestResolveWithDefaultConfig) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; std::filesystem::create_directories(dir); @@ -138,9 +168,9 @@ TEST_F(JdbcUtilsTest, TestResolveWithCustomConfig) { } TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/existing-driver.jar"; std::filesystem::create_directories(dir); @@ -160,10 +190,10 @@ TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { } TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create only the old directory and file (not the new one) - std::string old_dir = "/tmp/test_doris/jdbc_drivers"; + std::string old_dir = old_driver_dir(); std::string file_path = old_dir + "/fallback-driver.jar"; std::filesystem::create_directories(old_dir); @@ -183,10 +213,11 @@ TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { } TEST_F(JdbcUtilsTest, TestPathConstruction) { - setenv("DORIS_HOME", "/tmp/test_doris2", 1); - config::jdbc_drivers_dir = "/tmp/test_doris2/plugins/jdbc_drivers"; + std::filesystem::create_directories(second_temp_home_); + setenv("DORIS_HOME", second_temp_home_.c_str(), 1); + config::jdbc_drivers_dir = second_default_driver_dir(); - std::string old_dir = "/tmp/test_doris2/jdbc_drivers"; + std::string old_dir = second_old_driver_dir(); std::string file_path = old_dir + "/test.jar"; std::filesystem::create_directories(old_dir); @@ -223,9 +254,9 @@ TEST_F(JdbcUtilsTest, TestEdgeCases) { } TEST_F(JdbcUtilsTest, TestMultipleCallsConsistency) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/same-driver.jar"; std::filesystem::create_directories(dir); diff --git a/be/test/exprs/function/function_variant_element_test.cpp b/be/test/exprs/function/function_variant_element_test.cpp index d4d413a601aa45..19f85217c6fa37 100644 --- a/be/test/exprs/function/function_variant_element_test.cpp +++ b/be/test/exprs/function/function_variant_element_test.cpp @@ -40,7 +40,7 @@ TEST(function_variant_element_test, extract_from_sparse_column) { sparse_column_offsets.push_back(sparse_column_keys->size()); variant_ptr->get_subcolumn({})->insert_default(); variant_ptr->set_num_rows(1); - variant_ptr->get_doc_value_column()->assume_mutable()->resize(1); + variant_ptr->get_doc_value_column_mutable().resize(1); ColumnPtr result; ColumnPtr index_column_ptr = ColumnString::create(); @@ -61,4 +61,4 @@ TEST(function_variant_element_test, extract_from_sparse_column) { EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}"); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/native/native_reader_writer_test.cpp b/be/test/format/native/native_reader_writer_test.cpp index 5d1d7dc207cef7..cf568354925b17 100644 --- a/be/test/format/native/native_reader_writer_test.cpp +++ b/be/test/format/native/native_reader_writer_test.cpp @@ -530,6 +530,7 @@ TEST_F(NativeReaderWriterTest, round_trip_native_file_large_rows) { MutableBlock merged_mutable(&merged_block); Status add_st = merged_mutable.add_rows(&dst_block, 0, read_rows); ASSERT_TRUE(add_st.ok()) << add_st; + merged_block.set_columns(std::move(merged_mutable.mutable_columns())); total_read_rows += read_rows; } } diff --git a/be/test/format/parquet/parquet_thrift_test.cpp b/be/test/format/parquet/parquet_thrift_test.cpp index 2253b6c12cce5f..56d8c9c7b1d36a 100644 --- a/be/test/format/parquet/parquet_thrift_test.cpp +++ b/be/test/format/parquet/parquet_thrift_test.cpp @@ -160,8 +160,8 @@ TEST_F(ParquetThriftReaderTest, complex_nested_file) { static int fill_nullable_column(ColumnPtr& doris_column, level_t* definitions, size_t num_values) { CHECK(doris_column->is_nullable()); - auto* nullable_column = - const_cast(static_cast(doris_column.get())); + doris_column = IColumn::mutate(std::move(doris_column)); + auto* nullable_column = assert_cast(doris_column->assume_mutable().get()); NullMap& map_data = nullable_column->get_null_map_data(); int null_cnt = 0; for (int i = 0; i < num_values; ++i) { @@ -192,6 +192,9 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column ColumnPtr src_column = _converter->get_physical_column( field_schema->physical_type, field_schema->data_type, doris_column, data_type, false); + if (_converter->read_directly_into_dst_logical_column()) { + src_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); io::BufferedFileStreamReader stream_reader(file_reader, start_offset, chunk_size, 1024); @@ -216,10 +219,10 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column if (src_column->is_nullable()) { // fill nullable values fill_nullable_column(src_column, definitions, rows); - auto* nullable_column = - const_cast(static_cast(src_column.get())); + auto* nullable_column = assert_cast(src_column->assume_mutable().get()); data_column = nullable_column->get_nested_column_ptr(); } else { + src_column = IColumn::mutate(std::move(src_column)); data_column = src_column->assume_mutable(); } FilterMap filter_map; diff --git a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp index bbfe9e18a35830..c8a40194ff0803 100644 --- a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp +++ b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp @@ -214,7 +214,8 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriterClusterKey : public ::testing::Test { diff --git a/be/test/load/delta_writer/delta_writer_test.cpp b/be/test/load/delta_writer/delta_writer_test.cpp index 08cd0f7c7e579a..0ce52ceea706eb 100644 --- a/be/test/load/delta_writer/delta_writer_test.cpp +++ b/be/test/load/delta_writer/delta_writer_test.cpp @@ -461,15 +461,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriter : public ::testing::Test { @@ -670,6 +671,7 @@ TEST_F(TestDeltaWriter, vec_write) { date_v2_int = date_v2.to_date_int_val(); columns[21]->insert_data((const char*)&date_v2_int, sizeof(date_v2_int)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/load/memtable/memtable_memory_limiter_test.cpp b/be/test/load/memtable/memtable_memory_limiter_test.cpp index 1d5c1238335346..f3566448a1f29b 100644 --- a/be/test/load/memtable/memtable_memory_limiter_test.cpp +++ b/be/test/load/memtable/memtable_memory_limiter_test.cpp @@ -165,6 +165,7 @@ TEST_F(MemTableMemoryLimiterTest, handle_memtable_flush_test) { int32_t k3 = -2147483647; columns[2]->insert_data((const char*)&k3, sizeof(k3)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 6c320d225f5e44..209ab1139a406b 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -214,6 +214,7 @@ static void add_rowset(int64_t tablet_id, int32_t schema_hash, int64_t partition auto columns = block.mutate_columns(); int16_t c1 = value; columns[0]->insert_data((const char*)&c1, sizeof(c1)); + block.set_columns(std::move(columns)); Status res = delta_writer->write(&block, {0}); EXPECT_TRUE(res.ok()); diff --git a/be/test/runtime/stream_load_parquet_test.cpp b/be/test/runtime/stream_load_parquet_test.cpp index bf9a35c2a64111..62e280f1e80a64 100644 --- a/be/test/runtime/stream_load_parquet_test.cpp +++ b/be/test/runtime/stream_load_parquet_test.cpp @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +#include + +#include + #include "gtest/gtest.h" #include "load/load_path_mgr.h" #include "runtime/exec_env.h" @@ -27,18 +31,19 @@ class LoadPathMgrTest : public testing::Test { _exec_env = ExecEnv::GetInstance(); _load_path_mgr = std::make_unique(_exec_env); - // create tmp file - _test_dir = "/tmp/test_clean_file"; - _test_dir1 = "/tmp/test_clean_file/mini_download"; - _test_dir2 = "/tmp/test_clean_file1/mini_download/test.parquet"; - - auto result = io::global_local_filesystem()->delete_directory_or_file(_test_dir1); - result = io::global_local_filesystem()->create_directory(_test_dir1); - EXPECT_TRUE(result.ok()); + auto test_root = std::filesystem::temp_directory_path() / + ("doris_load_path_mgr_test_" + std::to_string(::getpid())); + _test_dir = test_root.string(); + _test_dir1 = _test_dir + "/mini_download"; + _test_dir2 = _test_dir1 + "/test.parquet"; - result = io::global_local_filesystem()->delete_directory_or_file(_test_dir2); - result = io::global_local_filesystem()->create_directory(_test_dir2); - EXPECT_TRUE(result.ok()); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir1, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir2, ec); + ASSERT_FALSE(ec) << ec.message(); const_cast&>(_exec_env->store_paths()).emplace_back(_test_dir, 1024); } @@ -46,6 +51,9 @@ class LoadPathMgrTest : public testing::Test { void TearDown() override { const_cast&>(_exec_env->store_paths()).clear(); _load_path_mgr->stop(); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + EXPECT_FALSE(ec) << ec.message(); _exec_env->destroy(); } @@ -96,4 +104,4 @@ TEST_F(LoadPathMgrTest, CheckDiskSpaceTest) { EXPECT_FALSE(exists); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/adaptive_thread_pool_controller_test.cpp b/be/test/storage/adaptive_thread_pool_controller_test.cpp index 06d79629330e14..6c4e42fea05897 100644 --- a/be/test/storage/adaptive_thread_pool_controller_test.cpp +++ b/be/test/storage/adaptive_thread_pool_controller_test.cpp @@ -19,6 +19,7 @@ #include +#include #include #include "common/config.h" @@ -44,15 +45,19 @@ class AdaptiveThreadPoolControllerTest : public testing::Test { void SetUp() override { _original_enable_adaptive = config::enable_adaptive_flush_threads; + int num_cpus = std::thread::hardware_concurrency(); + if (num_cpus <= 0) num_cpus = 1; + int max_threads = std::max(64, num_cpus * 4); + ASSERT_TRUE(ThreadPoolBuilder("TestPool") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool) .ok()); ASSERT_TRUE(ThreadPoolBuilder("TestPool2") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool2) .ok()); } diff --git a/be/test/storage/compaction/ordered_data_compaction_test.cpp b/be/test/storage/compaction/ordered_data_compaction_test.cpp index 006d48358c467e..712f74d1394fa5 100644 --- a/be/test/storage/compaction/ordered_data_compaction_test.cpp +++ b/be/test/storage/compaction/ordered_data_compaction_test.cpp @@ -318,6 +318,7 @@ class OrderedDataCompactionTest : public ::testing::Test { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); @@ -587,6 +588,7 @@ TEST_F(OrderedDataCompactionTest, test_index_disk_size) { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/compaction/segcompaction_mow_test.cpp b/be/test/storage/compaction/segcompaction_mow_test.cpp index 760a5d953aa693..13b836cd5a38d3 100644 --- a/be/test/storage/compaction/segcompaction_mow_test.cpp +++ b/be/test/storage/compaction/segcompaction_mow_test.cpp @@ -103,6 +103,14 @@ class SegCompactionMoWTest : public ::testing::TestWithParam { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -358,7 +366,7 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) { } } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -458,7 +466,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -486,7 +494,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -514,7 +522,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -542,7 +550,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -573,7 +581,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { } unique_keys.emplace(k1, rid); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -610,7 +618,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -688,7 +696,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -716,7 +724,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -744,7 +752,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -772,7 +780,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -800,7 +808,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -873,7 +881,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/segcompaction_test.cpp b/be/test/storage/compaction/segcompaction_test.cpp index 6c43fea684cb43..15dc86c89d74b2 100644 --- a/be/test/storage/compaction/segcompaction_test.cpp +++ b/be/test/storage/compaction/segcompaction_test.cpp @@ -124,6 +124,14 @@ class SegCompactionTest : public testing::Test { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -316,7 +324,7 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -437,7 +445,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -455,7 +463,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -473,7 +481,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -491,7 +499,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -509,7 +517,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -528,7 +536,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -591,7 +599,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -609,7 +617,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -627,7 +635,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -645,7 +653,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -663,7 +671,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -730,7 +738,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -754,7 +762,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -779,7 +787,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -804,7 +812,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -817,7 +825,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -830,7 +838,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -997,7 +1005,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1021,7 +1029,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1046,7 +1054,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1071,7 +1079,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1084,7 +1092,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1097,7 +1105,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/vertical_compaction_test.cpp b/be/test/storage/compaction/vertical_compaction_test.cpp index 3b736857242caf..d744d7aa67e185 100644 --- a/be/test/storage/compaction/vertical_compaction_test.cpp +++ b/be/test/storage/compaction/vertical_compaction_test.cpp @@ -107,6 +107,14 @@ class VerticalCompactionTest : public ::testing::Test { ExecEnv::GetInstance()->set_storage_engine(nullptr); } + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + TabletSchemaSPtr create_schema(KeysType keys_type = DUP_KEYS, bool without_key = false) { TabletSchemaSPtr tablet_schema = std::make_shared(); TabletSchemaPB tablet_schema_pb; @@ -255,7 +263,7 @@ class VerticalCompactionTest : public ::testing::Test { } num_rows++; } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_TRUE(s.ok()); @@ -1146,7 +1154,7 @@ TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMergeWithNullableSparseColum columns[2]->insert_data((const char*)&delete_sign, sizeof(delete_sign)); } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); ASSERT_TRUE(s.ok()) << s; s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s; @@ -1311,7 +1319,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesAccuracy) { columns[0]->insert_data(reinterpret_cast(&int_val), sizeof(int_val)); columns[1]->insert_data(fixed_string.data(), fixed_string.size()); } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; @@ -1412,7 +1420,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesNullableSparse) { columns[1]->insert_default(); // ColumnNullable default is null } } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; diff --git a/be/test/storage/index/date_bloom_filter_test.cpp b/be/test/storage/index/date_bloom_filter_test.cpp index 636e7a6848d39b..261c49a92d6595 100644 --- a/be/test/storage/index/date_bloom_filter_test.cpp +++ b/be/test/storage/index/date_bloom_filter_test.cpp @@ -146,6 +146,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); Status st; st = rowset_writer->add_block(&block); @@ -240,6 +241,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/index_builder_test.cpp b/be/test/storage/index/index_builder_test.cpp index c281fd511477fb..96cc6839390e3a 100644 --- a/be/test/storage/index/index_builder_test.cpp +++ b/be/test/storage/index/index_builder_test.cpp @@ -257,6 +257,8 @@ TEST_F(IndexBuilderTest, DropInvertedIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -545,6 +547,8 @@ TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -874,6 +878,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1042,6 +1048,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1191,6 +1199,8 @@ TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1341,6 +1351,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1515,6 +1527,8 @@ TEST_F(IndexBuilderTest, RenameColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1669,6 +1683,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1841,6 +1857,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1991,6 +2009,8 @@ TEST_F(IndexBuilderTest, NonNullIndexDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2115,6 +2135,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2246,6 +2268,8 @@ TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2370,6 +2394,8 @@ TEST_F(IndexBuilderTest, ResourceCleanupTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2535,6 +2561,8 @@ TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { array_col.insert(Field::create_field(arr)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2631,6 +2659,8 @@ TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2789,6 +2819,8 @@ TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2909,6 +2941,8 @@ TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -3024,6 +3058,8 @@ TEST_F(IndexBuilderTest, DropOneIndexNotAffectOtherIndexesOnSameColumnTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); diff --git a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp index 1512212d6f24d0..c9856eeaa53ec1 100644 --- a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp +++ b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp @@ -154,6 +154,7 @@ TEST_F(IndexGcBinglogsTest, gc_binlogs_test) { Field v1 = Field::create_field("v1"); columns[0]->insert(key); columns[1]->insert(v1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index 93670029927b6e..c3c23265de154d 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -699,6 +699,8 @@ class IndexCompactionUtils { } } + block.set_columns(std::move(columns)); + Status st = rowset_writer->add_block(&block); EXPECT_TRUE(st.ok()) << st.to_string(); st = rowset_writer->flush(); @@ -760,4 +762,4 @@ class IndexCompactionUtils { } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/rowid_conversion_test.cpp b/be/test/storage/rowid_conversion_test.cpp index 3ec611ca430cf6..0d470d1c7a7e74 100644 --- a/be/test/storage/rowid_conversion_test.cpp +++ b/be/test/storage/rowid_conversion_test.cpp @@ -205,6 +205,7 @@ class TestRowIdConversion : public testing::TestWithParamadd_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/segment/segment_cache_test.cpp b/be/test/storage/segment/segment_cache_test.cpp index 82bfe8242411e2..04b395f0089c89 100644 --- a/be/test/storage/segment/segment_cache_test.cpp +++ b/be/test/storage/segment/segment_cache_test.cpp @@ -198,15 +198,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class SegmentCacheTest : public ::testing::Test { diff --git a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp index 0236ac98c9f7be..b9cad3c63b3eb7 100644 --- a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp +++ b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp @@ -188,6 +188,7 @@ class SegmentsKeyBoundsTruncationTest : public testing::Test { columns[1]->insert_data(reinterpret_cast(&const_value), sizeof(const_value)); } + block.set_columns(std::move(columns)); ret.emplace_back(std::move(block)); } return ret; diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 3de2feb4b33600..a557b0cfbcf3a4 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -82,6 +82,38 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id tablet_index->add_col_unique_id(col_unique_id); } +static void fill_nullable_variant_block(Block* block, + std::unordered_map* inserted_jsonstr, + variant_util::PathToNoneNullValues* path_with_size) { + MutableColumnPtr column = IColumn::mutate(block->get_by_position(0).column); + auto* nullable_object = assert_cast(column.get()); + for (int idx = 0; idx < 10; idx++) { + nullable_object->insert_default(); // insert null + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 80; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + nullable_object->insert_many_defaults(17); + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 2, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 2; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + } + block->replace_by_position(0, std::move(column)); +} + // MockColumnReaderCache class for testing class MockColumnReaderCache : public segment_v2::ColumnReaderCache { public: @@ -2649,28 +2681,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2802,28 +2815,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable_without_finalize) auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2897,28 +2891,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2992,28 +2967,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bf_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3089,28 +3045,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_zm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3186,28 +3123,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_inverted_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3734,7 +3652,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t n = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3745,8 +3663,8 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object12 = ColumnVariant::create(3, false); - MutableColumnPtr null_object12 = ColumnNullable::create( - new_column_object12->assume_mutable(), ColumnUInt8::create()); + MutableColumnPtr null_object12 = + ColumnNullable::create(std::move(new_column_object12), ColumnUInt8::create()); st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); st = nested_iter->next_batch(&n, null_object12, &has_null); @@ -3778,7 +3696,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object2 = ColumnVariant::create(3, false); MutableColumnPtr null_object2 = - ColumnNullable::create(new_column_object2->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object2), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter2->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3888,7 +3806,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter_nullable) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); diff --git a/be/test/storage/tablet/tablet_cooldown_test.cpp b/be/test/storage/tablet/tablet_cooldown_test.cpp index b919aa887834e7..acf16442537fbd 100644 --- a/be/test/storage/tablet/tablet_cooldown_test.cpp +++ b/be/test/storage/tablet/tablet_cooldown_test.cpp @@ -344,7 +344,7 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep columns[1]->insert_data((const char*)&c2, sizeof(c2)); int32_t c3 = 1; - columns[2]->insert_data((const char*)&c3, sizeof(c2)); + columns[2]->insert_data((const char*)&c3, sizeof(c3)); VecDateTimeValue c4; { @@ -354,8 +354,9 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep {"2020-07-16 19:39:43", 19}, c4, nullptr, p); } int64_t c4_int = c4.to_int64(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); + block.set_columns(std::move(columns)); st = delta_writer->write(&block, {0}); ASSERT_EQ(Status::OK(), st); } diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp index af106e3a26011c..6bebd7289182ec 100644 --- a/be/test/util/bit_util_test.cpp +++ b/be/test/util/bit_util_test.cpp @@ -63,14 +63,14 @@ TEST(BitUtil, BigEndianToHost) { void insert_true(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(1); + assert_cast(column->get_nested_column()).insert_value(1); column->push_false_to_nullmap(1); } } void insert_false(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(0); + assert_cast(column->get_nested_column()).insert_value(0); column->push_false_to_nullmap(1); } } @@ -102,16 +102,12 @@ TEST(BitUtil, CountZero) { insert_false(column.get(), 54); insert_true(column.get(), 1); insert_false(column.get(), 14); + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } @@ -131,16 +127,12 @@ TEST(BitUtil, CountZero) { } } } + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } } From bb81c131a2d9bc415c2640f8aa8cf9b47f88d330 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Thu, 7 May 2026 11:28:30 +0800 Subject: [PATCH 2/4] fix1 --- be/src/core/column/column_variant.cpp | 18 +++++ be/src/core/column/column_variant.h | 2 + be/src/core/data_type/data_type_array.cpp | 1 + be/src/core/data_type/data_type_map.cpp | 4 +- be/src/core/data_type/data_type_struct.cpp | 1 + .../common/arrow_column_to_doris_column.cpp | 10 +-- be/src/exec/common/variant_util.cpp | 2 +- .../bucketed_aggregation_sink_operator.cpp | 5 +- ...istinct_streaming_aggregation_operator.cpp | 8 ++- be/src/exec/operator/hashjoin_build_sink.cpp | 4 +- .../join/process_hash_table_probe_impl.h | 19 ++++-- .../nested_loop_join_probe_operator.cpp | 4 +- be/src/exec/operator/schema_scan_operator.cpp | 16 +++-- .../streaming_aggregation_operator.cpp | 5 +- be/src/exec/rowid_fetcher.cpp | 4 +- be/src/exec/scan/file_scanner.cpp | 15 +++-- be/src/exec/scan/meta_scanner.cpp | 19 +++--- be/src/exec/scan/scanner.cpp | 2 +- be/src/exec/sink/vtablet_block_convertor.cpp | 26 +++++--- .../aggregate/aggregate_function_java_udaf.h | 9 ++- .../function/array/function_array_flatten.cpp | 11 ++-- be/src/exprs/function/cast/cast_to_variant.h | 27 ++++++-- .../table_function/python_udtf_function.cpp | 5 +- .../table_function/udf_table_function.cpp | 4 +- be/src/exprs/table_function/vexplode.cpp | 10 +-- be/src/exprs/table_function/vexplode_v2.cpp | 9 +-- be/src/exprs/vcase_expr.h | 5 +- be/src/exprs/vcompound_pred.h | 65 +++++++++++-------- be/src/format/arrow/arrow_stream_reader.cpp | 3 +- be/src/format/csv/csv_reader.cpp | 37 ++++++----- be/src/format/csv/csv_reader.h | 6 +- be/src/format/jni/jni_data_bridge.cpp | 42 +++++++----- be/src/format/lance/lance_rust_reader.cpp | 8 ++- be/src/format/table/equality_delete.cpp | 8 +-- be/src/format/table/paimon_cpp_reader.cpp | 8 ++- be/src/format/table/remote_doris_reader.cpp | 8 ++- .../schema_active_queries_scanner.cpp | 1 + ...ma_authentication_integrations_scanner.cpp | 1 + .../schema_backend_active_tasks.cpp | 3 +- .../schema_backend_kerberos_ticket_cache.cpp | 1 + ...chema_catalog_meta_cache_stats_scanner.cpp | 1 + .../schema_database_properties_scanner.cpp | 1 + .../schema_file_cache_statistics.cpp | 1 + .../schema_partitions_scanner.cpp | 1 + .../schema_role_mappings_scanner.cpp | 1 + be/src/information_schema/schema_scanner.cpp | 35 +++++++--- .../schema_scanner_helper.cpp | 36 ++++++---- .../schema_sql_block_rule_status_scanner.cpp | 1 + .../schema_table_options_scanner.cpp | 1 + .../schema_table_properties_scanner.cpp | 1 + ...chema_table_stream_consumption_scanner.cpp | 3 +- .../schema_table_streams_scanner.cpp | 3 +- .../schema_view_dependency_scanner.cpp | 1 + .../schema_workload_group_privileges.cpp | 1 + ..._workload_group_resource_usage_scanner.cpp | 3 +- .../schema_workload_groups_scanner.cpp | 1 + .../schema_workload_sched_policy_scanner.cpp | 1 + be/src/service/point_query_executor.cpp | 21 +++--- be/src/storage/iterator/block_reader.cpp | 5 +- be/src/storage/partial_update_info.cpp | 38 +++++++---- .../storage/schema_change/schema_change.cpp | 28 ++++---- be/src/storage/segment/column_reader.cpp | 38 ++++++----- be/src/storage/segment/segment_writer.cpp | 10 +-- be/src/storage/segment/segment_writer.h | 2 +- .../segment/variant/variant_column_reader.cpp | 5 +- .../segment/vertical_segment_writer.cpp | 25 ++++--- .../storage/segment/vertical_segment_writer.h | 2 +- be/src/storage/tablet/base_tablet.cpp | 16 +++-- be/src/util/jsonb/serialize.cpp | 18 +++-- 69 files changed, 476 insertions(+), 260 deletions(-) diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index 7e9aae62fb8469..d47d5cb6d2ddb7 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -2117,6 +2117,22 @@ void ColumnVariant::clear_sparse_column() { serialized_sparse_column = ColumnPtr(create_binary_column_fn()); } +void ColumnVariant::ensure_binary_columns_rows() { + auto resize_if_empty = [this](WrappedPtr& column) { + const auto& const_column = static_cast(column); + if (const_column->size() == num_rows) { + return; + } + CHECK(const_column->empty()) << "ColumnVariant binary column size mismatch, rows: " + << num_rows << ", column rows: " << const_column->size(); + auto mutable_column = IColumn::mutate(std::move(static_cast(column))); + mutable_column->resize(num_rows); + column = std::move(mutable_column); + }; + resize_if_empty(serialized_sparse_column); + resize_if_empty(serialized_doc_value_column); +} + Status ColumnVariant::convert_typed_path_to_storage_type( const std::unordered_map& typed_paths) { for (auto&& entry : subcolumns) { @@ -2229,6 +2245,7 @@ Status ColumnVariant::pick_subcolumns_to_sparse_column( } void ColumnVariant::finalize(FinalizeMode mode) { + ensure_binary_columns_rows(); if (is_finalized() && mode == FinalizeMode::READ_MODE) { _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2276,6 +2293,7 @@ void ColumnVariant::finalize(FinalizeMode mode) { std::swap(subcolumns, new_subcolumns); _prev_positions.clear(); + ensure_binary_columns_rows(); ENABLE_CHECK_CONSISTENCY(this); } diff --git a/be/src/core/column/column_variant.h b/be/src/core/column/column_variant.h index 428cf112eb9ef0..1ae92afd54cccc 100644 --- a/be/src/core/column/column_variant.h +++ b/be/src/core/column/column_variant.h @@ -354,6 +354,8 @@ class ColumnVariant final : public COWHelper { void clear_sparse_column(); + void ensure_binary_columns_rows(); + // root is null or type nothing bool is_null_root() const; diff --git a/be/src/core/data_type/data_type_array.cpp b/be/src/core/data_type/data_type_array.cpp index be97fc0c460ff4..3c7545c7490d45 100644 --- a/be/src/core/data_type/data_type_array.cpp +++ b/be/src/core/data_type/data_type_array.cpp @@ -122,6 +122,7 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column // children auto nested_column = std::move(*data_column->get_data_ptr()).mutate(); buf = get_nested_type()->deserialize(buf, &nested_column, be_exec_version); + data_column->get_data_ptr() = std::move(nested_column); return buf; } diff --git a/be/src/core/data_type/data_type_map.cpp b/be/src/core/data_type/data_type_map.cpp index 7e633c21490bce..043fd7a70248f3 100644 --- a/be/src/core/data_type/data_type_map.cpp +++ b/be/src/core/data_type/data_type_map.cpp @@ -139,6 +139,8 @@ const char* DataTypeMap::deserialize(const char* buf, MutableColumnPtr* column, auto nested_values_column = std::move(*map_column->get_values_ptr()).mutate(); buf = get_key_type()->deserialize(buf, &nested_keys_column, be_exec_version); buf = get_value_type()->deserialize(buf, &nested_values_column, be_exec_version); + map_column->get_keys_ptr() = std::move(nested_keys_column); + map_column->get_values_ptr() = std::move(nested_values_column); return buf; } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/core/data_type/data_type_struct.cpp b/be/src/core/data_type/data_type_struct.cpp index 6d6ceb856ee1fb..873e3159ae536c 100644 --- a/be/src/core/data_type/data_type_struct.cpp +++ b/be/src/core/data_type/data_type_struct.cpp @@ -216,6 +216,7 @@ const char* DataTypeStruct::deserialize(const char* buf, MutableColumnPtr* colum for (size_t i = 0; i < elems.size(); ++i) { auto child_column = std::move(*struct_column->get_column_ptr(i)).mutate(); buf = elems[i]->deserialize(buf, &child_column, be_exec_version); + struct_column->get_column_ptr(i) = std::move(child_column); } return buf; } diff --git a/be/src/exec/common/arrow_column_to_doris_column.cpp b/be/src/exec/common/arrow_column_to_doris_column.cpp index cd6e959596791b..645376ee12d7a7 100644 --- a/be/src/exec/common/arrow_column_to_doris_column.cpp +++ b/be/src/exec/common/arrow_column_to_doris_column.cpp @@ -100,10 +100,12 @@ Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arr Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const cctz::time_zone& ctz) { - RETURN_IF_ERROR(type->get_serde()->read_column_from_arrow( - doris_column->assume_mutable_ref(), arrow_column, arrow_batch_cur_idx, - arrow_batch_cur_idx + num_elements, ctz)); - return Status::OK(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); + auto status = type->get_serde()->read_column_from_arrow( + *mutable_column, arrow_column, arrow_batch_cur_idx, arrow_batch_cur_idx + num_elements, + ctz); + doris_column = std::move(mutable_column); + return status; } } // namespace doris diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 1fd262e5604bf2..09ac52075cd9ce 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -2264,7 +2264,7 @@ Status _parse_and_materialize_variant_columns(Block& block, auto expected_root_type = make_nullable(std::make_shared()); var.ensure_root_node_type(expected_root_type); - variant_column = var.assume_mutable(); + variant_column = std::move(var_column); } // Wrap variant with nullmap if it is nullable diff --git a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp index 58f47001185983..8cb58b2d532b95 100644 --- a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp +++ b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp @@ -175,8 +175,11 @@ Status BucketedAggSinkLocalState::_execute_with_serialized_key(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp index 63b184889276bf..cb115bb1395a25 100644 --- a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp @@ -215,12 +215,16 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( if (out_block->rows() + _distinct_row.size() > batch_size) { size_t split_size = batch_size - out_block->rows(); for (int i = 0; i < key_size; ++i) { - auto output_dst = out_block->get_by_position(i).column->assume_mutable(); + auto output_dst = + IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(output_dst, _distinct_row, 0, split_size); - auto cache_dst = _cache_block.get_by_position(i).column->assume_mutable(); + out_block->get_by_position(i).column = std::move(output_dst); + auto cache_dst = + IColumn::mutate(std::move(_cache_block.get_by_position(i).column)); key_columns[i]->append_data_by_selector(cache_dst, _distinct_row, split_size, _distinct_row.size()); + _cache_block.get_by_position(i).column = std::move(cache_dst); } } else { for (int i = 0; i < key_size; ++i) { diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp b/be/src/exec/operator/hashjoin_build_sink.cpp index 9f1a05876f8bb5..eb68668a043eb9 100644 --- a/be/src/exec/operator/hashjoin_build_sink.cpp +++ b/be/src/exec/operator/hashjoin_build_sink.cpp @@ -574,7 +574,9 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo for (auto& data : block) { data.column = std::move(*data.column).mutate()->convert_column_if_overflow(); if (p._need_finalize_variant_column) { - std::move(*data.column).mutate()->finalize(); + auto mutable_column = IColumn::mutate(std::move(data.column)); + mutable_column->finalize(); + data.column = std::move(mutable_column); } } diff --git a/be/src/exec/operator/join/process_hash_table_probe_impl.h b/be/src/exec/operator/join/process_hash_table_probe_impl.h index 3fc4285517d4eb..23242a82d15080 100644 --- a/be/src/exec/operator/join/process_hash_table_probe_impl.h +++ b/be/src/exec/operator/join/process_hash_table_probe_impl.h @@ -164,7 +164,10 @@ void ProcessHashTableProbe::probe_side_output_column(MutableColumns& for (int i = 0; i < _left_output_slot_flags.size(); ++i) { if (_left_output_slot_flags[i]) { if (_parent_operator->need_finalize_variant_column()) { - std::move(*probe_block.get_by_position(i).column).mutate()->finalize(); + auto mutable_column = + IColumn::mutate(std::move(probe_block.get_by_position(i).column)); + mutable_column->finalize(); + probe_block.get_by_position(i).column = std::move(mutable_column); } } @@ -720,14 +723,16 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(Block* output_b return Status::OK(); } - auto mark_column_mutable = - output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable(); - auto& mark_column = assert_cast(*mark_column_mutable); - IColumn::Filter& filter = assert_cast(mark_column.get_nested_column()).get_data(); + auto mark_column_mutable = IColumn::mutate( + std::move(output_block->get_by_position(_parent->_mark_column_id).column)); + auto* mark_column = assert_cast(mark_column_mutable.get()); + IColumn::Filter& filter = assert_cast(mark_column->get_nested_column()).get_data(); + auto& null_map_column = mark_column->get_null_map_column(); + output_block->replace_by_position(_parent->_mark_column_id, std::move(mark_column_mutable)); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_parent->_mark_join_conjuncts, output_block, - mark_column.get_null_map_column(), filter)); + null_map_column, filter)); uint8_t* mark_filter_data = filter.data(); - uint8_t* mark_null_map = mark_column.get_null_map_data().data(); + uint8_t* mark_null_map = mark_column->get_null_map_data().data(); if (is_null_aware_join) { // For null aware anti/semi join, if the equal conjuncts was not matched and the build side has null value, diff --git a/be/src/exec/operator/nested_loop_join_probe_operator.cpp b/be/src/exec/operator/nested_loop_join_probe_operator.cpp index 7a3be55cbb1988..a3644857bb7312 100644 --- a/be/src/exec/operator/nested_loop_join_probe_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_probe_operator.cpp @@ -78,10 +78,12 @@ Status NestedLoopJoinProbeLocalState::close(RuntimeState* state) { void NestedLoopJoinProbeLocalState::_update_additional_flags(Block* block) { auto& p = _parent->cast(); if (p._is_mark_join) { - auto mark_column = block->get_by_position(block->columns() - 1).column->assume_mutable(); + auto mark_column = IColumn::mutate( + std::move(block->get_by_position(block->columns() - 1).column)); if (mark_column->size() < block->rows()) { ColumnFilterHelper(*mark_column).resize_fill(block->rows(), 1); } + block->replace_by_position(block->columns() - 1, std::move(mark_column)); } } diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index 030e49b54d48c0..c44ff030326484 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -21,6 +21,7 @@ #include +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "exec/operator/operator.h" #include "runtime/runtime_profile.h" @@ -256,10 +257,17 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* e if (src_block.rows()) { // block->check_number_of_rows(); for (int i = 0; i < _slot_num; ++i) { - MutableColumnPtr column_ptr = std::move(*block->get_by_position(i).column).mutate(); - column_ptr->insert_range_from( - *src_block.safe_get_by_position(_slot_offsets[i]).column, 0, - src_block.rows()); + MutableColumnPtr column_ptr = + IColumn::mutate(std::move(block->get_by_position(i).column)); + ColumnPtr src_column = + src_block.safe_get_by_position(_slot_offsets[i]) + .column->convert_to_full_column_if_const(); + if (column_ptr->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(column_ptr->is_nullable() == src_column->is_nullable()); + column_ptr->insert_range_from(*src_column, 0, src_block.rows()); + block->replace_by_position(i, std::move(column_ptr)); } DCHECK_EQ(block->columns(), _dest_tuple_desc->slots().size()); RETURN_IF_ERROR(local_state.filter_block(local_state._conjuncts, block)); diff --git a/be/src/exec/operator/streaming_aggregation_operator.cpp b/be/src/exec/operator/streaming_aggregation_operator.cpp index 5744b288a4487e..b15e6de0d3f70d 100644 --- a/be/src/exec/operator/streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/streaming_aggregation_operator.cpp @@ -330,8 +330,11 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo in_block->get_by_position(result_column_id).column = in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(in_block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index f97bce17a8c6a4..a62cf420794e57 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -1090,7 +1090,8 @@ Status RowIdStorageReader::read_doris_format_row( } } else { for (int x = 0; x < slots.size(); ++x) { - MutableColumnPtr column = result_block.get_by_position(x).column->assume_mutable(); + MutableColumnPtr column = + IColumn::mutate(std::move(result_block.get_by_position(x).column)); IteratorKey iterator_key {.tablet_id = tablet_id, .rowset_id = rowset_id, .segment_id = segment_id, @@ -1106,6 +1107,7 @@ Status RowIdStorageReader::read_doris_format_row( full_read_schema, &slots[x], row_id, column, iterator_item.storage_read_options, iterator_item.iterator)); } + result_block.replace_by_position(x, std::move(column)); } } return Status::OK(); diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index ad694a412b88c7..531b02e5fdc032 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -438,8 +438,10 @@ Status FileScanner::_process_runtime_filters_partition_prune(bool& can_filter_al if (!first_column_filled) { // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 // The following process may be tricky and time-consuming, but we have no other way. - _runtime_filter_partition_prune_block.get_by_position(0).column->assume_mutable()->resize( - partition_value_column_size); + auto column = IColumn::mutate( + std::move(_runtime_filter_partition_prune_block.get_by_position(0).column)); + column->resize(partition_value_column_size); + _runtime_filter_partition_prune_block.replace_by_position(0, std::move(column)); } IColumn::Filter result_filter(_runtime_filter_partition_prune_block.rows(), 1); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_runtime_filter_partition_prune_ctxs, nullptr, @@ -778,11 +780,13 @@ Status FileScanner::_convert_to_output_block(Block* block) { auto& mutable_output_columns = mutable_output_block.mutable_columns(); std::vector* skip_bitmaps {nullptr}; + MutableColumnPtr skip_bitmap_column; if (_should_process_skip_bitmap_col()) { + skip_bitmap_column = + IColumn::mutate(std::move(_src_block_ptr->get_by_position(_skip_bitmap_col_idx) + .column)); auto* skip_bitmap_nullable_col_ptr = - assert_cast(_src_block_ptr->get_by_position(_skip_bitmap_col_idx) - .column->assume_mutable() - .get()); + assert_cast(skip_bitmap_column.get()); skip_bitmaps = &(assert_cast( skip_bitmap_nullable_col_ptr->get_nested_column_ptr().get()) ->get_data()); @@ -799,6 +803,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { } } } + _src_block_ptr->replace_by_position(_skip_bitmap_col_idx, std::move(skip_bitmap_column)); } // for (auto slot_desc : _output_tuple_desc->slots()) { diff --git a/be/src/exec/scan/meta_scanner.cpp b/be/src/exec/scan/meta_scanner.cpp index adf1aabe4b8903..52892882f7bcbb 100644 --- a/be/src/exec/scan/meta_scanner.cpp +++ b/be/src/exec/scan/meta_scanner.cpp @@ -112,21 +112,14 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof columns.resize(column_size); for (auto i = 0; i < column_size; i++) { if (mem_reuse) { - columns[i] = block->get_by_position(i).column->assume_mutable(); + columns[i] = IColumn::mutate(std::move(block->get_by_position(i).column)); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } } // fill block RETURN_IF_ERROR(_fill_block_with_remote_data(columns)); - if (_meta_eos == true) { - if (block->rows() == 0) { - *eof = true; - } - break; - } - // Before really use the Block, must clear other ptr of column in block - // So here need do std::move and clear in `columns` + const bool empty_result = columns.empty() || columns.front()->empty(); if (!mem_reuse) { int column_index = 0; for (const auto slot_desc : _tuple_desc->slots()) { @@ -135,7 +128,13 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof slot_desc->col_name())); } } else { - columns.clear(); + block->set_columns(std::move(columns)); + } + if (_meta_eos == true) { + if (empty_result) { + *eof = true; + } + break; } VLOG_ROW << "VMetaScanNode output rows: " << block->rows(); } while (block->rows() == 0 && !(*eof)); diff --git a/be/src/exec/scan/scanner.cpp b/be/src/exec/scan/scanner.cpp index 97f12d1195c4a0..51199bf71e4917 100644 --- a/be/src/exec/scan/scanner.cpp +++ b/be/src/exec/scan/scanner.cpp @@ -216,7 +216,7 @@ Status Scanner::_do_projections(Block* origin_block, Block* output_block) { if (mutable_columns[i]->is_nullable() != column_ptr->is_nullable()) { throw Exception(ErrorCode::INTERNAL_ERROR, "Nullable mismatch"); } - mutable_columns[i] = column_ptr->assume_mutable(); + mutable_columns[i] = IColumn::mutate(std::move(column_ptr)); } output_block->set_columns(std::move(mutable_columns)); diff --git a/be/src/exec/sink/vtablet_block_convertor.cpp b/be/src/exec/sink/vtablet_block_convertor.cpp index b567b599cfa3bf..95be814299ca9c 100644 --- a/be/src/exec/sink/vtablet_block_convertor.cpp +++ b/be/src/exec/sink/vtablet_block_convertor.cpp @@ -238,8 +238,8 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B } } - const auto* tmp_column_ptr = check_and_get_column(*orig_column); - const auto& tmp_real_column_ptr = + auto tmp_column_ptr = check_and_get_column(*orig_column); + auto tmp_real_column_ptr = tmp_column_ptr == nullptr ? orig_column : (tmp_column_ptr->get_nested_column_ptr()); const auto* column_string = assert_cast(tmp_real_column_ptr.get()); const auto* null_map = @@ -281,13 +281,21 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B {len_column, len_type, "len"}, {nullptr, input_type, "result"}}); RETURN_IF_ERROR(func->execute(nullptr, tmp_block, {0, 1, 2}, 3, row_count)); - column_string = - assert_cast(tmp_block.get_by_position(3).column.get()); - orig_column = - orig_column->is_nullable() - ? ColumnNullable::create(tmp_block.get_by_position(3).column, - tmp_column_ptr->get_null_map_column_ptr()) - : std::move(tmp_block.get_by_position(3).column); + auto result_column = IColumn::mutate(std::move(tmp_block.get_by_position(3).column)); + if (orig_column->is_nullable()) { + orig_column = ColumnNullable::create( + std::move(result_column), + IColumn::mutate(tmp_column_ptr->get_null_map_column_ptr())); + } else { + orig_column = std::move(result_column); + } + tmp_column_ptr = check_and_get_column(*orig_column); + tmp_real_column_ptr = tmp_column_ptr == nullptr + ? orig_column + : tmp_column_ptr->get_nested_column_ptr(); + column_string = assert_cast(tmp_real_column_ptr.get()); + null_map = tmp_column_ptr == nullptr ? nullptr + : tmp_column_ptr->get_null_map_data().data(); } for (size_t j = 0; j < row_count; ++j) { auto row = rows ? (*rows)[j] : j; diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index 42b3bc87af6d9d..c79880fc39ce15 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -187,7 +187,8 @@ struct AggregateJavaUdafData { RETURN_NOT_OK_STATUS_WITH_WARN(Jni::Env::Get(&env), "Java-Udaf get value function"); Block output_block; - output_block.insert(ColumnWithTypeAndName(to.get_ptr(), result_type, "_result_")); + output_block.insert(ColumnWithTypeAndName(result_type->create_column(), result_type, + "_result_")); auto output_table_schema = JniDataBridge::parse_table_schema(&output_block); std::string output_nullable = result_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, @@ -203,7 +204,11 @@ struct AggregateJavaUdafData { .with_arg(output_map) .call(&output_address)); - return JniDataBridge::fill_block(&output_block, {0}, output_address); + RETURN_IF_ERROR(JniDataBridge::fill_block(&output_block, {0}, output_address)); + const auto& result_column = output_block.get_by_position(0).column; + DORIS_CHECK(result_column->size() == 1); + to.insert_from(*result_column, 0); + return Status::OK(); } private: diff --git a/be/src/exprs/function/array/function_array_flatten.cpp b/be/src/exprs/function/array/function_array_flatten.cpp index 03086f37008788..36be392b154e81 100644 --- a/be/src/exprs/function/array/function_array_flatten.cpp +++ b/be/src/exprs/function/array/function_array_flatten.cpp @@ -55,23 +55,24 @@ class FunctionArrayFlatten : public IFunction { auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto* src_column_array_ptr = - assert_cast(remove_nullable(src_column)->assume_mutable().get()); - ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; + assert_cast(remove_nullable(src_column).get()); + const ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; DataTypePtr src_data_type = block.get_by_position(arguments[0]).type; auto* src_data_type_array = assert_cast(remove_nullable(src_data_type).get()); auto result_column_offsets = - assert_cast(src_column_array_ptr->get_offsets_column()) + assert_cast( + src_column_array_ptr->get_offsets_column()) .clone(); auto* offsets = assert_cast(result_column_offsets.get()) ->get_data() .data(); while (src_data_type_array->get_nested_type()->get_primitive_type() == TYPE_ARRAY) { - nested_src_column_array_ptr = assert_cast( - remove_nullable(src_column_array_ptr->get_data_ptr())->assume_mutable().get()); + nested_src_column_array_ptr = assert_cast( + remove_nullable(src_column_array_ptr->get_data_ptr()).get()); for (size_t i = 0; i < input_rows_count; ++i) { offsets[i] = nested_src_column_array_ptr->get_offsets()[offsets[i] - 1]; diff --git a/be/src/exprs/function/cast/cast_to_variant.h b/be/src/exprs/function/cast/cast_to_variant.h index b679c15b870271..c616a2d764cf20 100644 --- a/be/src/exprs/function/cast/cast_to_variant.h +++ b/be/src/exprs/function/cast/cast_to_variant.h @@ -29,19 +29,34 @@ inline Status cast_from_variant_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const NullMap::value_type* null_map, const DataTypePtr& data_type_to) { - const auto& col_with_type_and_name = block.get_by_position(arguments[0]); - const auto& col_from = col_with_type_and_name.column; + auto& col_with_type_and_name = block.get_by_position(arguments[0]); + auto& col_from = col_with_type_and_name.column; const IColumn* variant_column = col_from.get(); if (const auto* nullable = check_and_get_column(*variant_column)) { variant_column = &nullable->get_nested_column(); } - const auto& variant = assert_cast(*variant_column); - ColumnPtr col_to = data_type_to->create_column(); - if (!variant.is_finalized()) { + if (!assert_cast(*variant_column).is_finalized()) { // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - variant.assume_mutable()->finalize(); + auto mutable_column = IColumn::mutate(std::move(col_with_type_and_name.column)); + if (auto* nullable = check_and_get_column(*mutable_column)) { + const auto& const_nullable = assert_cast(*nullable); + auto nested_column = IColumn::mutate(const_nullable.get_nested_column_ptr()); + assert_cast(*nested_column).finalize(); + ColumnPtr nested_column_ptr = std::move(nested_column); + nullable->change_nested_column(nested_column_ptr); + } else { + assert_cast(*mutable_column).finalize(); + } + col_with_type_and_name.column = std::move(mutable_column); + } + + variant_column = col_with_type_and_name.column.get(); + if (const auto* nullable = check_and_get_column(*variant_column)) { + variant_column = &nullable->get_nested_column(); } + const auto& variant = assert_cast(*variant_column); + ColumnPtr col_to = data_type_to->create_column(); // It's important to convert as many elements as possible in this context. For instance, // if the root of this variant column is a number column, converting it to a number column diff --git a/be/src/exprs/table_function/python_udtf_function.cpp b/be/src/exprs/table_function/python_udtf_function.cpp index a116a3d6785297..b2e016473189be 100644 --- a/be/src/exprs/table_function/python_udtf_function.cpp +++ b/be/src/exprs/table_function/python_udtf_function.cpp @@ -250,8 +250,7 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( if (_return_type->is_nullable()) { nullable_col = assert_cast(array_col_ptr.get()); - array_col = assert_cast( - nullable_col->get_nested_column_ptr()->assume_mutable().get()); + array_col = assert_cast(&nullable_col->get_nested_column()); } else { array_col = assert_cast(array_col_ptr.get()); } @@ -265,7 +264,7 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( // This directly converts Arrow ListArray to Doris ColumnArray // No struct unwrapping needed - Python server sends the correct format! RETURN_IF_ERROR(array_serde->read_column_from_arrow( - array_col->assume_mutable_ref(), list_array.get(), 0, num_input_rows, _timezone_obj)); + *array_col, list_array.get(), 0, num_input_rows, _timezone_obj)); // Handle nullable wrapper: all array elements are non-null // (empty arrays [] are non-null, different from NULL) diff --git a/be/src/exprs/table_function/udf_table_function.cpp b/be/src/exprs/table_function/udf_table_function.cpp index 4b6037f7ab1771..414766ef9157c3 100644 --- a/be/src/exprs/table_function/udf_table_function.cpp +++ b/be/src/exprs/table_function/udf_table_function.cpp @@ -123,10 +123,12 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { .with_arg(output_map) .call(&output_address)); RETURN_IF_ERROR(JniDataBridge::fill_block(block, {_result_column_idx}, output_address)); + _array_result_column = + IColumn::mutate(std::move(block->get_by_position(_result_column_idx).column)); block->erase(_result_column_idx); if (!extract_column_array_info(*_array_result_column, _array_column_detail)) { return Status::NotSupported("column type {} not supported now", - block->get_by_position(_result_column_idx).column->get_name()); + _array_result_column->get_name()); } return Status::OK(); } diff --git a/be/src/exprs/table_function/vexplode.cpp b/be/src/exprs/table_function/vexplode.cpp index 680e5ccff66ed1..0b8556229a4ee3 100644 --- a/be/src/exprs/table_function/vexplode.cpp +++ b/be/src/exprs/table_function/vexplode.cpp @@ -45,7 +45,8 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _detail.output_as_variant = true; _detail.variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -62,9 +63,10 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu _detail.nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), - ColumnUInt8::create(0)); - _array_column->assume_mutable()->insert_many_defaults(variant_column.size()); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); + array_column->insert_many_defaults(variant_column.size()); + _array_column = std::move(array_column); _detail.nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/table_function/vexplode_v2.cpp b/be/src/exprs/table_function/vexplode_v2.cpp index b21802690a84b8..2e8bf9e2734903 100644 --- a/be/src/exprs/table_function/vexplode_v2.cpp +++ b/be/src/exprs/table_function/vexplode_v2.cpp @@ -51,7 +51,8 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _multi_detail[children_column_idx].output_as_variant = true; _multi_detail[children_column_idx].variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -68,10 +69,10 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co _multi_detail[children_column_idx].nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_columns[children_column_idx] = ColumnNullable::create( + auto array_column = ColumnNullable::create( ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0)); - _array_columns[children_column_idx]->assume_mutable()->insert_many_defaults( - variant_column.size()); + array_column->insert_many_defaults(variant_column.size()); + _array_columns[children_column_idx] = std::move(array_column); _multi_detail[children_column_idx].nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h index 382193276cad29..dd58476b87cf88 100644 --- a/be/src/exprs/vcase_expr.h +++ b/be/src/exprs/vcase_expr.h @@ -217,9 +217,8 @@ class VCaseExpr final : public VExpr { if (!then_columns[i]) { continue; } - auto* __restrict column_raw_data = - assert_cast( - then_columns[i]->assume_mutable().get()) + const auto* __restrict column_raw_data = + assert_cast(then_columns[i].get()) ->get_data() .data(); if constexpr (std::is_same_v || diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h index 5e83b0bb0283b7..af94d11cd40aeb 100644 --- a/be/src/exprs/vcompound_pred.h +++ b/be/src/exprs/vcompound_pred.h @@ -180,8 +180,8 @@ class VCompoundPred : public VectorizedFnCall { } ColumnPtr rhs_column = nullptr; - uint8_t* __restrict rhs_data_column = nullptr; - uint8_t* __restrict rhs_null_map = nullptr; + const uint8_t* __restrict rhs_data_column = nullptr; + const uint8_t* __restrict rhs_null_map = nullptr; bool rhs_is_nullable = false; bool rhs_all_true = false; bool rhs_all_false = false; @@ -216,10 +216,10 @@ class VCompoundPred : public VectorizedFnCall { }; auto create_null_map_column = [&](ColumnPtr& null_map_column, - uint8_t* __restrict null_map_data) { + const uint8_t* __restrict null_map_data) { if (null_map_data == nullptr) { null_map_column = ColumnUInt8::create(size, 0); - null_map_data = assert_cast(null_map_column->assume_mutable().get()) + null_map_data = assert_cast(null_map_column.get()) ->get_data() .data(); } @@ -227,20 +227,26 @@ class VCompoundPred : public VectorizedFnCall { }; auto vector_vector = [&]() { + MutableColumnPtr mutable_result_column; + uint8_t* __restrict result_data_column = nullptr; + const uint8_t* __restrict other_data_column = rhs_data_column; if (lhs_column->use_count() == 1) { - result_column = lhs_column; + mutable_result_column = IColumn::mutate(std::move(lhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } else if (rhs_column->use_count() == 1) { - result_column = rhs_column; - auto tmp_column = rhs_data_column; - rhs_data_column = lhs_data_column; - lhs_data_column = tmp_column; + mutable_result_column = IColumn::mutate(std::move(rhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); + other_data_column = lhs_data_column; } else { - auto col_res = lhs_column->clone_resized(size); - lhs_data_column = assert_cast(col_res.get())->get_data().data(); - result_column = std::move(col_res); + mutable_result_column = lhs_column->clone_resized(size); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } - do_not_null_pred(lhs_data_column, rhs_data_column, size); + do_not_null_pred(result_data_column, other_data_column, size); + result_column = std::move(mutable_result_column); }; auto vector_vector_null = [&]() { auto col_res = ColumnUInt8::create(size); @@ -349,7 +355,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_not_null_pred(uint8_t* __restrict lhs, uint8_t* __restrict rhs, size_t size) { + void static do_not_null_pred(uint8_t* __restrict lhs, const uint8_t* __restrict rhs, + size_t size) { #ifdef NDEBUG #if defined(__clang__) #pragma clang loop vectorize(enable) @@ -367,8 +374,10 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_null_pred(uint8_t* __restrict lhs_data, uint8_t* __restrict lhs_null, - uint8_t* __restrict rhs_data, uint8_t* __restrict rhs_null, + void static do_null_pred(const uint8_t* __restrict lhs_data, + const uint8_t* __restrict lhs_null, + const uint8_t* __restrict rhs_data, + const uint8_t* __restrict rhs_null, uint8_t* __restrict res_data, uint8_t* __restrict res_null, size_t size) { #ifdef NDEBUG @@ -394,22 +403,22 @@ class VCompoundPred : public VectorizedFnCall { [](const VExprSPtr& arg) -> bool { return arg->is_constant(); }); } - std::pair _get_raw_data_and_null_map(ColumnPtr column, - bool has_nullable_column) const { + std::pair _get_raw_data_and_null_map( + const ColumnPtr& column, bool has_nullable_column) const { if (has_nullable_column) { - auto* nullable_column = assert_cast(column->assume_mutable().get()); - auto* data_column = - assert_cast(nullable_column->get_nested_column_ptr().get()) - ->get_data() - .data(); - auto* null_map = - assert_cast(nullable_column->get_null_map_column_ptr().get()) - ->get_data() - .data(); + const auto* nullable_column = assert_cast(column.get()); + auto* data_column = assert_cast( + nullable_column->get_nested_column_ptr().get()) + ->get_data() + .data(); + auto* null_map = assert_cast( + nullable_column->get_null_map_column_ptr().get()) + ->get_data() + .data(); return std::make_pair(data_column, null_map); } else { auto* data_column = - assert_cast(column->assume_mutable().get())->get_data().data(); + assert_cast(column.get())->get_data().data(); return std::make_pair(data_column, nullptr); } } diff --git a/be/src/format/arrow/arrow_stream_reader.cpp b/be/src/format/arrow/arrow_stream_reader.cpp index b91608ee3fafa1..d5b53dff3306e5 100644 --- a/be/src/format/arrow/arrow_stream_reader.cpp +++ b/be/src/format/arrow/arrow_stream_reader.cpp @@ -113,7 +113,7 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo } RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + *columns[c], column, 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } @@ -121,6 +121,7 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo *read_rows += batch.num_rows(); } + block->set_columns(std::move(columns)); *eof = (*read_rows == 0); return Status::OK(); } diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp index 539132c7c9f003..90340afafe1739 100644 --- a/be/src/format/csv/csv_reader.cpp +++ b/be/src/format/csv/csv_reader.cpp @@ -65,6 +65,19 @@ enum class FileCachePolicy : uint8_t; namespace doris { +namespace { + +size_t columns_byte_size(const std::vector& columns) { + size_t bytes = 0; + for (const auto& column : columns) { + DCHECK(column.get() != nullptr); + bytes += column->byte_size(); + } + return bytes; +} + +} // namespace + void EncloseCsvTextFieldSplitter::do_split(const Slice& line, std::vector* splitted_values) { const char* data = line.data; const auto& column_sep_positions = _text_line_reader_ctx->column_sep_positions(); @@ -437,7 +450,8 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) block->set_columns(std::move(mutate_columns)); } else { auto columns = block->mutate_columns(); - while (rows < batch_size && !_line_reader_eof && (block->bytes() < max_block_bytes)) { + while (rows < batch_size && !_line_reader_eof && + (columns_byte_size(columns) < max_block_bytes)) { const uint8_t* ptr = nullptr; size_t size = 0; RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx)); @@ -457,7 +471,7 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) } if (size == 0) { if (!_line_reader_eof && _state->is_read_csv_empty_line_as_null()) { - RETURN_IF_ERROR(_fill_empty_line(block, columns, &rows)); + RETURN_IF_ERROR(_fill_empty_line(columns, &rows)); } // Read empty line, continue continue; @@ -467,7 +481,7 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) if (!success) { continue; } - RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), block, columns, &rows)); + RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), columns, &rows)); } block->set_columns(std::move(columns)); } @@ -719,8 +733,8 @@ Status CsvReader::_deserialize_one_cell(DataTypeSerDeSPtr serde, IColumn& column return serde->deserialize_one_cell_from_csv(column, slice, _options); } -Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows) { +Status CsvReader::_fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows) { bool is_success = false; RETURN_IF_ERROR(_line_split_to_values(line, &is_success)); @@ -738,10 +752,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } if (_use_nullable_string_opt[i]) { @@ -758,15 +769,11 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, return Status::OK(); } -Status CsvReader::_fill_empty_line(Block* block, std::vector& columns, - size_t* rows) { +Status CsvReader::_fill_empty_line(std::vector& columns, size_t* rows) { for (int i = 0; i < _file_slot_descs.size(); ++i) { IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } auto& null_column = assert_cast(*col_ptr); null_column.insert_data(nullptr, 0); diff --git a/be/src/format/csv/csv_reader.h b/be/src/format/csv/csv_reader.h index 077f089e5e9a18..f619ce4d4a85e5 100644 --- a/be/src/format/csv/csv_reader.h +++ b/be/src/format/csv/csv_reader.h @@ -232,9 +232,9 @@ class CsvReader : public TableFormatReader { private: Status _create_decompressor(); Status _create_file_reader(bool need_schema); - Status _fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows); - Status _fill_empty_line(Block* block, std::vector& columns, size_t* rows); + Status _fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows); + Status _fill_empty_line(std::vector& columns, size_t* rows); Status _line_split_to_values(const Slice& line, bool* success); void _split_line(const Slice& line); void _init_system_properties(); diff --git a/be/src/format/jni/jni_data_bridge.cpp b/be/src/format/jni/jni_data_bridge.cpp index 9d5e37978a0118..80fb94545c97b3 100644 --- a/be/src/format/jni/jni_data_bridge.cpp +++ b/be/src/format/jni/jni_data_bridge.cpp @@ -105,24 +105,28 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0 return Status::InternalError("Unsupported type {} in java side", data_type->get_name()); } + auto mutable_doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column; - if (doris_column->is_nullable()) { - auto* nullable_column = - reinterpret_cast(doris_column->assume_mutable().get()); + if (mutable_doris_column->is_nullable()) { + auto* nullable_column = assert_cast(mutable_doris_column.get()); data_column = nullable_column->get_nested_column_ptr(); NullMap& null_map = nullable_column->get_null_map_data(); size_t origin_size = null_map.size(); null_map.resize(origin_size + num_rows); memcpy(null_map.data() + origin_size, static_cast(null_map_ptr), num_rows); } else { - data_column = doris_column->assume_mutable(); + data_column = mutable_doris_column->get_ptr(); } // Date and DateTime are deprecated and not supported. + Status status = Status::OK(); switch (logical_type) { -#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ - case TYPE_INDEX: \ - return _fill_fixed_length_column( \ - data_column, reinterpret_cast(address.next_meta_as_ptr()), num_rows); +#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ + case TYPE_INDEX: { \ + auto* data = reinterpret_cast(address.next_meta_as_ptr()); \ + status = _fill_fixed_length_column( \ + data_column, data, num_rows); \ + break; \ + } FOR_FIXED_LENGTH_TYPES(DISPATCH) #undef DISPATCH case PrimitiveType::TYPE_STRING: @@ -130,19 +134,27 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co case PrimitiveType::TYPE_CHAR: [[fallthrough]]; case PrimitiveType::TYPE_VARCHAR: - return _fill_string_column(address, data_column, num_rows); + status = _fill_string_column(address, data_column, num_rows); + break; case PrimitiveType::TYPE_ARRAY: - return _fill_array_column(address, data_column, data_type, num_rows); + status = _fill_array_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_MAP: - return _fill_map_column(address, data_column, data_type, num_rows); + status = _fill_map_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_STRUCT: - return _fill_struct_column(address, data_column, data_type, num_rows); + status = _fill_struct_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_VARBINARY: - return _fill_varbinary_column(address, data_column, num_rows); + status = _fill_varbinary_column(address, data_column, num_rows); + break; default: - return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name()); + status = Status::InvalidArgument("Unsupported type {} in jni scanner", + data_type->get_name()); + break; } - return Status::OK(); + doris_column = std::move(mutable_doris_column); + return status; } Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, diff --git a/be/src/format/lance/lance_rust_reader.cpp b/be/src/format/lance/lance_rust_reader.cpp index 166bbd52dcc519..2eed2356734ca3 100644 --- a/be/src/format/lance/lance_rust_reader.cpp +++ b/be/src/format/lance/lance_rust_reader.cpp @@ -230,6 +230,7 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool const auto num_columns = record_batch->num_columns(); // Convert Arrow columns to Doris Block columns (same pattern as PaimonCppReader) + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); @@ -238,16 +239,17 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); try { RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + *columns[block_pos], record_batch->column(c).get(), 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert Lance arrow to block: {}", e.what()); } } + block->set_columns(std::move(columns)); *read_rows = num_rows; *eof = false; return Status::OK(); diff --git a/be/src/format/table/equality_delete.cpp b/be/src/format/table/equality_delete.cpp index 82deb7bd59c20a..dc94d8151f2048 100644 --- a/be/src/format/table/equality_delete.cpp +++ b/be/src/format/table/equality_delete.cpp @@ -68,9 +68,8 @@ Status SimpleEqualityDelete::filter_data_block( const NullMap& null_map = reinterpret_cast(column_and_type.column.get()) ->get_null_map_data(); - _hybrid_set->find_batch_nullable( - remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, - *_single_filter); + _hybrid_set->find_batch_nullable(*remove_nullable(column_and_type.column), rows, null_map, + *_single_filter); if (_hybrid_set->contain_null()) { auto* filter_data = _single_filter->data(); for (size_t i = 0; i < rows; ++i) { @@ -78,8 +77,7 @@ Status SimpleEqualityDelete::filter_data_block( } } } else { - _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, - *_single_filter); + _hybrid_set->find_batch(*column_and_type.column, rows, *_single_filter); } // should reverse _filter auto* filter_data = filter.data(); diff --git a/be/src/format/table/paimon_cpp_reader.cpp b/be/src/format/table/paimon_cpp_reader.cpp index 4925bbb3e7a9bd..e628c30af737ba 100644 --- a/be/src/format/table/paimon_cpp_reader.cpp +++ b/be/src/format/table/paimon_cpp_reader.cpp @@ -117,6 +117,7 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool auto record_batch = std::move(import_result).ValueUnsafe(); const auto num_rows = static_cast(record_batch->num_rows()); const auto num_columns = record_batch->num_columns(); + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); if (field->name() == VALUE_KIND_FIELD) { @@ -128,16 +129,17 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool // Skip columns that are not in the block (e.g., partition columns handled elsewhere) continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); try { RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + *columns[block_pos], record_batch->column(c).get(), 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } } + block->set_columns(std::move(columns)); *read_rows = num_rows; *eof = false; return Status::OK(); diff --git a/be/src/format/table/remote_doris_reader.cpp b/be/src/format/table/remote_doris_reader.cpp index 5280b655a63ef8..487aad2869b90d 100644 --- a/be/src/format/table/remote_doris_reader.cpp +++ b/be/src/format/table/remote_doris_reader.cpp @@ -72,6 +72,7 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo auto batch = chunk.data; auto num_rows = batch->num_rows(); auto num_columns = batch->num_columns(); + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { arrow::Array* column = batch->column(c).get(); @@ -82,10 +83,10 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo } try { - const ColumnWithTypeAndName& column_with_name = - block->get_by_position((*_col_name_to_block_idx)[column_name]); + auto block_pos = (*_col_name_to_block_idx)[column_name]; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + *columns[block_pos], column, 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError( "Failed to convert from arrow to block, column_name: {}, e: {}", column_name, @@ -93,6 +94,7 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo } } + block->set_columns(std::move(columns)); *read_rows += num_rows; return Status::OK(); diff --git a/be/src/information_schema/schema_active_queries_scanner.cpp b/be/src/information_schema/schema_active_queries_scanner.cpp index 00f0c5b5de763e..de0844af8abc93 100644 --- a/be/src/information_schema/schema_active_queries_scanner.cpp +++ b/be/src/information_schema/schema_active_queries_scanner.cpp @@ -133,6 +133,7 @@ Status SchemaActiveQueriesScanner::get_next_block_internal(Block* block, bool* e int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_active_query_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_authentication_integrations_scanner.cpp b/be/src/information_schema/schema_authentication_integrations_scanner.cpp index 95359b58264d15..4cbf55b198d31b 100644 --- a/be/src/information_schema/schema_authentication_integrations_scanner.cpp +++ b/be/src/information_schema/schema_authentication_integrations_scanner.cpp @@ -137,6 +137,7 @@ Status SchemaAuthenticationIntegrationsScanner::get_next_block_internal(Block* b MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_authentication_integrations_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_backend_active_tasks.cpp b/be/src/information_schema/schema_backend_active_tasks.cpp index b41f116b7550af..ddb15b84aa409d 100644 --- a/be/src/information_schema/schema_backend_active_tasks.cpp +++ b/be/src/information_schema/schema_backend_active_tasks.cpp @@ -89,10 +89,11 @@ Status SchemaBackendActiveTasksScanner::get_next_block_internal(Block* block, bo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_task_stats_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp index 3c7b1ec0bc5c9a..5b25a84304d1bb 100644 --- a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp +++ b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp @@ -85,6 +85,7 @@ Status SchemaBackendKerberosTicketCacheScanner::get_next_block_internal(Block* b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_info_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp index fec899c252a933..18e490f09b3fed 100644 --- a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp +++ b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp @@ -145,6 +145,7 @@ Status SchemaCatalogMetaCacheStatsScanner::get_next_block_internal(Block* block, int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_database_properties_scanner.cpp b/be/src/information_schema/schema_database_properties_scanner.cpp index c73dd9301e056d..d1427fe43e915f 100644 --- a/be/src/information_schema/schema_database_properties_scanner.cpp +++ b/be/src/information_schema/schema_database_properties_scanner.cpp @@ -149,6 +149,7 @@ Status SchemaDatabasePropertiesScanner::get_next_block_internal(Block* block, bo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_dbproperties_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { *eos = false; diff --git a/be/src/information_schema/schema_file_cache_statistics.cpp b/be/src/information_schema/schema_file_cache_statistics.cpp index 0b69766bbeeae9..5be2df30d53b11 100644 --- a/be/src/information_schema/schema_file_cache_statistics.cpp +++ b/be/src/information_schema/schema_file_cache_statistics.cpp @@ -77,6 +77,7 @@ Status SchemaFileCacheStatisticsScanner::get_next_block_internal(Block* block, b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_stats_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_partitions_scanner.cpp b/be/src/information_schema/schema_partitions_scanner.cpp index 834fd928f7126e..87c0ce078b787d 100644 --- a/be/src/information_schema/schema_partitions_scanner.cpp +++ b/be/src/information_schema/schema_partitions_scanner.cpp @@ -210,6 +210,7 @@ Status SchemaPartitionsScanner::get_next_block_internal(Block* block, bool* eos) int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_partitions_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_role_mappings_scanner.cpp b/be/src/information_schema/schema_role_mappings_scanner.cpp index 31e58e6cbe9fb5..84d0e26eb44393 100644 --- a/be/src/information_schema/schema_role_mappings_scanner.cpp +++ b/be/src/information_schema/schema_role_mappings_scanner.cpp @@ -134,6 +134,7 @@ Status SchemaRoleMappingsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_role_mappings_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_scanner.cpp b/be/src/information_schema/schema_scanner.cpp index 981956330ce7f5..d410167e9caa7d 100644 --- a/be/src/information_schema/schema_scanner.cpp +++ b/be/src/information_schema/schema_scanner.cpp @@ -95,6 +95,24 @@ namespace doris { class ObjectPool; +namespace { + +void insert_column_range(ColumnWithTypeAndName* dst, const ColumnWithTypeAndName& src, + size_t start, size_t length) { + DORIS_CHECK(dst->column.get() != nullptr); + DORIS_CHECK(src.column.get() != nullptr); + MutableColumnPtr dst_column = IColumn::mutate(std::move(dst->column)); + ColumnPtr src_column = src.column->convert_to_full_column_if_const(); + if (dst_column->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(dst_column->is_nullable() == src_column->is_nullable()); + dst_column->insert_range_from(*src_column, start, length); + dst->column = std::move(dst_column); +} + +} // namespace + SchemaScanner::SchemaScanner(const std::vector& columns, TSchemaTableType::type type) : _is_init(false), _columns(columns), _schema_table_type(type) {} @@ -115,10 +133,8 @@ Status SchemaScanner::get_next_block(RuntimeState* state, Block* block, bool* eo DCHECK(_async_thread_running == false); RETURN_IF_ERROR(_scanner_status.status()); for (size_t i = 0; i < block->columns(); i++) { - std::move(*block->get_by_position(i).column) - .mutate() - ->insert_range_from(*_data_block->get_by_position(i).column, 0, - _data_block->rows()); + insert_column_range(&block->get_by_position(i), _data_block->get_by_position(i), 0, + _data_block->rows()); } _data_block->clear_column_data(); *eos = _eos; @@ -295,11 +311,10 @@ void SchemaScanner::_init_block(Block* src_block) { Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, const std::vector& datas) { const ColumnDesc& col_desc = _columns[pos]; - MutableColumnPtr column_ptr; - column_ptr = std::move(*block->get_by_position(pos).column).assume_mutable(); + MutableColumnPtr column_ptr = IColumn::mutate(std::move(block->get_by_position(pos).column)); IColumn* col_ptr = column_ptr.get(); - auto* nullable_column = reinterpret_cast(col_ptr); + auto* nullable_column = assert_cast(col_ptr); // Resize in advance to improve insertion efficiency. size_t fill_num = datas.size(); @@ -440,6 +455,7 @@ Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, } } } + block->replace_by_position(pos, std::move(column_ptr)); return Status::OK(); } @@ -454,8 +470,8 @@ std::string SchemaScanner::get_db_from_full_name(const std::string& full_name) { Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* block, PrimitiveType type) { MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable(); - auto* nullable_column = reinterpret_cast(mutable_col_ptr.get()); + mutable_col_ptr = IColumn::mutate(std::move(block->get_by_position(col_index).column)); + auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); switch (type) { @@ -510,6 +526,7 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* bloc } } nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); return Status::OK(); } diff --git a/be/src/information_schema/schema_scanner_helper.cpp b/be/src/information_schema/schema_scanner_helper.cpp index 9ec2cdcd7cbaa2..7907dc264b66fd 100644 --- a/be/src/information_schema/schema_scanner_helper.cpp +++ b/be/src/information_schema/schema_scanner_helper.cpp @@ -19,6 +19,7 @@ #include "cctz/time_zone.h" #include "core/block/block.h" +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "core/data_type/primitive_type.h" #include "core/string_ref.h" @@ -31,29 +32,31 @@ namespace doris { void SchemaScannerHelper::insert_string_value(int col_index, std::string_view str_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_data(str_val.data(), str_val.size()); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, const std::vector& datas, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp, const cctz::time_zone& ctz, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); @@ -64,41 +67,46 @@ void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_bool_value(int col_index, bool bool_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(bool_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int32_value(int col_index, int32_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int64_value(int col_index, int64_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_double_value(int col_index, double double_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(double_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } } // namespace doris diff --git a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp index 2a223c144ba5fa..1fcc0cb838ad93 100644 --- a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp +++ b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp @@ -170,6 +170,7 @@ Status SchemaSqlBlockRuleStatusScanner::get_next_block_internal(Block* block, bo MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_sql_block_rule_status_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_table_options_scanner.cpp b/be/src/information_schema/schema_table_options_scanner.cpp index 096f0860bfc3bd..717cb91cccfa29 100644 --- a/be/src/information_schema/schema_table_options_scanner.cpp +++ b/be/src/information_schema/schema_table_options_scanner.cpp @@ -167,6 +167,7 @@ Status SchemaTableOptionsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_tableoptions_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_table_properties_scanner.cpp b/be/src/information_schema/schema_table_properties_scanner.cpp index 0affe500b35f7b..e89153542a190c 100644 --- a/be/src/information_schema/schema_table_properties_scanner.cpp +++ b/be/src/information_schema/schema_table_properties_scanner.cpp @@ -161,6 +161,7 @@ Status SchemaTablePropertiesScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_tableproperties_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp index c2c5ceab41ceb2..6b3141e404bf27 100644 --- a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp +++ b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp @@ -132,10 +132,11 @@ Status SchemaTableStreamConsumptionScanner::get_next_block_internal(Block* block MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_table_stream_consumption_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_table_streams_scanner.cpp b/be/src/information_schema/schema_table_streams_scanner.cpp index 288d4e56c9a876..48299c7a1783c6 100644 --- a/be/src/information_schema/schema_table_streams_scanner.cpp +++ b/be/src/information_schema/schema_table_streams_scanner.cpp @@ -132,10 +132,11 @@ Status SchemaTableStreamsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_table_streams_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_view_dependency_scanner.cpp b/be/src/information_schema/schema_view_dependency_scanner.cpp index 1aa6ce614312f7..3723f4f9e5e2a3 100644 --- a/be/src/information_schema/schema_view_dependency_scanner.cpp +++ b/be/src/information_schema/schema_view_dependency_scanner.cpp @@ -133,6 +133,7 @@ Status SchemaViewDependencyScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_view_dependency_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_group_privileges.cpp b/be/src/information_schema/schema_workload_group_privileges.cpp index d0dab55965c3d1..854e151fd2521d 100644 --- a/be/src/information_schema/schema_workload_group_privileges.cpp +++ b/be/src/information_schema/schema_workload_group_privileges.cpp @@ -128,6 +128,7 @@ Status SchemaWorkloadGroupPrivilegesScanner::get_next_block_internal(Block* bloc MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_workload_groups_privs_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp index 175b1dbd080e81..f790bf913bb75c 100644 --- a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp +++ b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp @@ -80,10 +80,11 @@ Status SchemaBackendWorkloadGroupResourceUsage::get_next_block_internal(Block* b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_workload_groups_scanner.cpp b/be/src/information_schema/schema_workload_groups_scanner.cpp index 5ad1b744e975e6..b2dd403f48652b 100644 --- a/be/src/information_schema/schema_workload_groups_scanner.cpp +++ b/be/src/information_schema/schema_workload_groups_scanner.cpp @@ -139,6 +139,7 @@ Status SchemaWorkloadGroupsScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_workload_groups_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp index 040b747bb435c4..bc5d5f9c229e4c 100644 --- a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp +++ b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp @@ -129,6 +129,7 @@ Status SchemaWorkloadSchedulePolicyScanner::get_next_block_internal(Block* block int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 441284a251b3c8..dcb6555411696e 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -558,20 +558,20 @@ Status PointQueryExecutor::_lookup_row_data() { int pos = _reusable->get_col_uid_to_idx().at(cid); auto row_id = static_cast(row_loc.row_id); MutableColumnPtr column = - _result_block->get_by_position(pos).column->assume_mutable(); + IColumn::mutate(std::move(_result_block->get_by_position(pos).column)); std::unique_ptr iter; SlotDescriptor* slot = _reusable->tuple_desc()->slots()[pos]; StorageReadOptions storage_read_options; storage_read_options.stats = &_read_stats; storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; - RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, - row_id, column, - storage_read_options, iter)); - if (_tablet->tablet_schema() - ->column_by_uid(slot->col_unique_id()) - .has_char_type()) { + auto st = segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, row_id, + column, storage_read_options, iter); + if (st.ok() && + _tablet->tablet_schema()->column_by_uid(slot->col_unique_id()).has_char_type()) { column->shrink_padding_chars(); } + _result_block->replace_by_position(pos, std::move(column)); + RETURN_IF_ERROR(st); } } } @@ -583,10 +583,13 @@ Status PointQueryExecutor::_lookup_row_data() { // SlotDescriptor{id=9, col=v2, colUniqueId=2 ...} // thus missing in include_col_uids and missing_col_uids for (size_t i = 0; i < _result_block->columns(); ++i) { - auto column = _result_block->get_by_position(i).column; + const auto& column = _result_block->get_by_position(i).column; int padding_rows = _row_hits - cast_set(column->size()); if (padding_rows > 0) { - column->assume_mutable()->insert_many_defaults(padding_rows); + auto mutable_column = + IColumn::mutate(std::move(_result_block->get_by_position(i).column)); + mutable_column->insert_many_defaults(padding_rows); + _result_block->replace_by_position(i, std::move(mutable_column)); } } } diff --git a/be/src/storage/iterator/block_reader.cpp b/be/src/storage/iterator/block_reader.cpp index e50ca8a9c831b1..82358ca7c85899 100644 --- a/be/src/storage/iterator/block_reader.cpp +++ b/be/src/storage/iterator/block_reader.cpp @@ -400,6 +400,7 @@ Status BlockReader::_replace_key_next_block(Block* block, bool* eof) { } } _merged_rows += merged_row; + block->set_columns(std::move(target_columns)); return Status::OK(); } @@ -580,9 +581,10 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { LOG(WARNING) << "tablet_id: " << tablet()->tablet_id() << " delete sign idx " << delete_sign_idx << " not invalid, skip filter delete in base compaction"; + block->set_columns(std::move(target_columns)); return Status::OK(); } - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); reinterpret_cast(delete_filter_column.get())->resize(target_block_row); auto* __restrict filter_data = @@ -603,6 +605,7 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { } } auto target_columns_size = target_columns.size(); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 7b97ecfc081167..14c6b400078bba 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -38,6 +38,18 @@ #include "storage/utils.h" namespace doris { +namespace { + +ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + +} // namespace + Status PartialUpdateInfo::init(int64_t tablet_id, int64_t txn_id, const TabletSchema& tablet_schema, UniqueKeyUpdateModePB unique_key_update_mode, PartialUpdateNewRowPolicyPB policy, @@ -325,7 +337,10 @@ Status FixedReadPlan::read_columns_by_plan( } } bool has_row_column = tablet_schema.has_row_store_for_all_columns(); - auto mutable_columns = block.mutate_columns(); + MutableColumns mutable_columns; + if (!has_row_column) { + mutable_columns = block.mutate_columns(); + } uint32_t read_idx = 0; for (const auto& [rowset_id, segment_row_mappings] : plan) { for (const auto& [segment_id, mappings] : segment_row_mappings) { @@ -360,7 +375,9 @@ Status FixedReadPlan::read_columns_by_plan( } } } - block.set_columns(std::move(mutable_columns)); + if (!has_row_column) { + block.set_columns(std::move(mutable_columns)); + } return Status::OK(); } @@ -797,8 +814,7 @@ void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, i ->get_data() .back(); const auto& new_row_skip_bitmap = - assert_cast( - src_block->get_by_position(cid).column->assume_mutable().get()) + assert_cast(src_block->get_by_position(cid).column.get()) ->get_data()[rid]; cur_skip_bitmap &= new_row_skip_bitmap; continue; @@ -943,11 +959,8 @@ Status BlockAggregator::aggregate_for_sequence_column( DCHECK_EQ(block->columns(), _tablet_schema.num_columns()); // the process logic here is the same as MemTable::_aggregate_for_flexible_partial_update_without_seq_col() // after this function, there will be at most 2 rows for a specified key - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx())->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filtered_block = _tablet_schema.create_block(); @@ -1016,11 +1029,8 @@ Status BlockAggregator::aggregate_for_insert_after_delete( // there will be at most 2 rows for a specified key in block when control flow reaches here // after this function, there will not be duplicate rows in block - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx())->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filter_column = ColumnUInt8::create(num_rows, 1); diff --git a/be/src/storage/schema_change/schema_change.cpp b/be/src/storage/schema_change/schema_change.cpp index f2583e3bcfc2d2..0f88bf70ec1dcc 100644 --- a/be/src/storage/schema_change/schema_change.cpp +++ b/be/src/storage/schema_change/schema_change.cpp @@ -172,14 +172,18 @@ class MultiBlockMerger { if (i == rows - 1 || _cmp.compare(row_refs[i], row_refs[i + 1])) { for (int j = 0; j < key_number; j++) { - finalized_block.get_by_position(j).column->assume_mutable()->insert_from( - *row_ref.get_column(j), row_ref.position); + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); + column->insert_from(*row_ref.get_column(j), row_ref.position); + column_ptr = std::move(column); } for (int j = key_number; j < columns; j++) { + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); agg_functions[j - key_number]->insert_result_into( - agg_places[j - key_number], - finalized_block.get_by_position(j).column->assume_mutable_ref()); + agg_places[j - key_number], *column); + column_ptr = std::move(column); agg_functions[j - key_number]->reset(agg_places[j - key_number]); } @@ -225,12 +229,14 @@ class MultiBlockMerger { int limit = std::min(ALTER_TABLE_BATCH_SIZE, rows - i); for (int idx = 0; idx < columns; idx++) { - auto column = finalized_block.get_by_position(idx).column->assume_mutable(); + auto& column_ptr = finalized_block.get_by_position(idx).column; + auto column = column_ptr->assume_mutable(); for (int j = 0; j < limit; j++) { auto row_ref = pushed_row_refs[i + j]; column->insert_from(*row_ref.get_column(idx), row_ref.position); } + column_ptr = std::move(column); } RETURN_IF_ERROR(rowset_writer->add_block(&finalized_block)); finalized_block.clear_column_data(); @@ -382,6 +388,7 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { column = column->convert_to_predicate_column_if_dictionary(); column->insert_duplicate_fields(value, row_num); } + new_block->get_by_position(idx).column = std::move(column); } else { // same type, just swap column swap_idx_list.emplace_back(_schema_mapping[idx].ref_column_idx, idx); @@ -398,21 +405,20 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { if (ref_col_nullable != new_col_nullable) { // not nullable to nullable if (new_col_nullable) { - auto* new_nullable_col = - assert_cast(new_col->assume_mutable().get()); + auto mutable_new_col = new_col->assume_mutable(); + auto* new_nullable_col = assert_cast(mutable_new_col.get()); new_nullable_col->change_nested_column(ref_col); new_nullable_col->get_null_map_data().resize_fill(ref_col->size()); + new_col = std::move(mutable_new_col); } else { // nullable to not nullable: // suppose column `c_phone` is originally varchar(16) NOT NULL, // then do schema change `alter table test modify column c_phone int not null`, // the cast expr of schema change is `CastExpr(CAST String to Nullable(Int32))`, // so need to handle nullable to not nullable here - auto* ref_nullable_col = - assert_cast(ref_col->assume_mutable().get()); - - new_col = ref_nullable_col->get_nested_column_ptr(); + const auto& ref_nullable_col = assert_cast(*ref_col); + new_col = ref_nullable_col.get_nested_column_ptr(); } } else { new_block->get_by_position(it.second).column = diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index 20e20879d087b8..716f6af31a1220 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -77,6 +77,7 @@ #include "util/bitmap.h" #include "util/block_compression.h" #include "util/concurrency_stats.h" +#include "util/defer_op.h" #include "util/rle_encoding.h" // for RleDecoder #include "util/slice.h" @@ -993,7 +994,8 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto column_offsets_ptr = column_map.get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(column_offsets_ptr); }}; bool offsets_has_null = false; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offsets_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); @@ -1005,10 +1007,12 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start - 1]); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto key_ptr = column_map.get_keys().assume_mutable(); - auto val_ptr = column_map.get_values().assume_mutable(); if (num_items > 0) { + auto key_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto val_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(key_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(val_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual key/value data, fill with defaults key_ptr->insert_many_defaults(num_items); @@ -1021,9 +1025,6 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, val_ptr, &val_has_null)); DCHECK(num_read == num_items); } - - column_map.get_keys_ptr() = std::move(key_ptr); - column_map.get_values_ptr() = std::move(val_ptr); } if (dst->is_nullable()) { @@ -1078,9 +1079,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t return Status::OK(); } // resolve ColumnMap and nullable wrapper - const auto* column_map = check_and_get_column( + auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto offsets_ptr = column_map->get_offsets_column().assume_mutable(); + auto offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(offsets_ptr); }}; auto& offsets = static_cast(*offsets_ptr); size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back(); @@ -1164,8 +1166,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t } // 6. read key/value elements for non-empty sizes - auto keys_ptr = column_map->get_keys().assume_mutable(); - auto vals_ptr = column_map->get_values().assume_mutable(); + auto keys_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto vals_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(keys_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(vals_ptr); }}; size_t this_run = sizes[0]; auto start_idx = starts_data[0]; @@ -1410,12 +1414,14 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); for (size_t i = 0; i < column_struct.tuple_size(); i++) { size_t num_read = *n; - auto sub_column_ptr = column_struct.get_column(i).assume_mutable(); + auto sub_column_ptr = IColumn::mutate(std::move(column_struct.get_column_ptr(i))); + Defer defer_sub_column {[&] { + column_struct.get_column_ptr(i) = std::move(sub_column_ptr); + }}; bool column_has_null = false; RETURN_IF_ERROR( _sub_column_iterators[i]->next_batch(&num_read, sub_column_ptr, &column_has_null)); DCHECK(num_read == *n); - column_struct.get_column_ptr(i) = std::move(sub_column_ptr); } if (dst->is_nullable()) { @@ -1770,11 +1776,12 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo return Status::OK(); } - const auto* column_array = check_and_get_column( + auto& column_array = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); bool offsets_has_null = false; - auto column_offsets_ptr = column_array->get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_array.get_offsets_ptr())); + Defer defer_offsets {[&] { column_array.get_offsets_ptr() = std::move(column_offsets_ptr); }}; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offset_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); if (*n == 0) { @@ -1784,8 +1791,9 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo RETURN_IF_ERROR(_offset_iterator->_calculate_offsets(start, column_offsets)); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto column_items_ptr = column_array->get_data().assume_mutable(); if (num_items > 0) { + auto column_items_ptr = IColumn::mutate(std::move(column_array.get_data_ptr())); + Defer defer_items {[&] { column_array.get_data_ptr() = std::move(column_items_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual item data, fill with defaults column_items_ptr->insert_many_defaults(num_items); diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index ac6841a6cf2651..ba9d8ce58e3e47 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -383,7 +383,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const std::string& key) { } } -void SegmentWriter::_serialize_block_to_row_column(const Block& block) { +void SegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -392,14 +392,14 @@ void SegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, {_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()}); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -713,7 +713,7 @@ Status SegmentWriter::append_block(const Block* block, size_t row_pos, size_t nu // or it's schema change write(since column data type maybe changed, so we should reubild) if (_opts.write_type == DataWriteType::TYPE_DIRECT || _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { - _serialize_block_to_row_column(*block); + _serialize_block_to_row_column(*const_cast(block)); } if (_opts.rowset_ctx->write_type != DataWriteType::TYPE_COMPACTION && diff --git a/be/src/storage/segment/segment_writer.h b/be/src/storage/segment/segment_writer.h index 37b4e996448d76..0c88cb193134a2 100644 --- a/be/src/storage/segment/segment_writer.h +++ b/be/src/storage/segment/segment_writer.h @@ -182,7 +182,7 @@ class SegmentWriter { void set_min_max_key(const Slice& key); void set_min_key(const Slice& key); void set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _generate_primary_key_index( const std::vector& primary_key_coders, const std::vector& primary_key_columns, diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp b/be/src/storage/segment/variant/variant_column_reader.cpp index d41775581bf2e5..96c5d514b6fb48 100644 --- a/be/src/storage/segment/variant/variant_column_reader.cpp +++ b/be/src/storage/segment/variant/variant_column_reader.cpp @@ -1601,8 +1601,9 @@ static void fill_nested_with_defaults(MutableColumnPtr& dst, MutableColumnPtr& s } auto new_nested = dst_array->get_data_ptr()->clone_resized(sibling_array->get_data_ptr()->size()); - auto new_array = make_nullable(ColumnArray::create( - new_nested->assume_mutable(), sibling_array->get_offsets_ptr()->assume_mutable())); + ColumnPtr nested_column = std::move(new_nested); + auto new_array = make_nullable( + ColumnArray::create(nested_column, sibling_array->get_offsets_ptr())); dst->insert_range_from(*new_array, 0, new_array->size()); #ifndef NDEBUG if (!dst_array->has_equal_offsets(*sibling_array)) { diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 6203bf50b233de..67fff40faa15cd 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -91,6 +91,14 @@ inline std::string vertical_segment_writer_mem_tracker_name(uint32_t segment_id) return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id); } +static ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, @@ -356,7 +364,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con } } -void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { +void VerticalSegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -365,15 +373,15 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); std::unordered_set row_store_cids_set(_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, row_store_cids_set); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -759,10 +767,9 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_block_aggregator.convert_seq_column(const_cast(data.block), data.row_pos, data.num_rows, seq_column)); - std::vector* skip_bitmaps = &( - assert_cast( - data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) - ->get_data()); + auto* mutable_block = const_cast(data.block); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(mutable_block, skip_bitmap_col_idx)->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*data.block, data.row_pos + data.num_rows); DCHECK(delete_signs != nullptr); @@ -1003,7 +1010,7 @@ Status VerticalSegmentWriter::write_batch() { _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { for (auto& data : _batched_blocks) { // TODO: maybe we should pass range to this method - _serialize_block_to_row_column(*data.block); + _serialize_block_to_row_column(*const_cast(data.block)); } } diff --git a/be/src/storage/segment/vertical_segment_writer.h b/be/src/storage/segment/vertical_segment_writer.h index 5c0ec0930e522d..39235811c07880 100644 --- a/be/src/storage/segment/vertical_segment_writer.h +++ b/be/src/storage/segment/vertical_segment_writer.h @@ -158,7 +158,7 @@ class VerticalSegmentWriter { void _set_min_max_key(const Slice& key); void _set_min_key(const Slice& key); void _set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _probe_key_for_mow(std::string key, std::size_t segment_pos, bool have_input_seq_column, bool have_delete_sign, const std::vector& specified_rowsets, diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp index 6802092c74995e..7611bf12874264 100644 --- a/be/src/storage/tablet/base_tablet.cpp +++ b/be/src/storage/tablet/base_tablet.cpp @@ -851,9 +851,10 @@ Status BaseTablet::sort_block(Block& in_block, Block& output_block) { vec_row_comparator->set_block(&mutable_input_block); std::vector> row_in_blocks; - DCHECK(in_block.rows() <= std::numeric_limits::max()); - row_in_blocks.reserve(in_block.rows()); - for (size_t i = 0; i < in_block.rows(); ++i) { + const auto input_rows = mutable_input_block.rows(); + DCHECK(input_rows <= std::numeric_limits::max()); + row_in_blocks.reserve(input_rows); + for (size_t i = 0; i < input_rows; ++i) { row_in_blocks.emplace_back(std::make_unique(i)); } std::sort(row_in_blocks.begin(), row_in_blocks.end(), @@ -865,12 +866,15 @@ Status BaseTablet::sort_block(Block& in_block, Block& output_block) { return value < 0; }); std::vector row_pos_vec; - row_pos_vec.reserve(in_block.rows()); + row_pos_vec.reserve(input_rows); for (auto& block : row_in_blocks) { row_pos_vec.emplace_back(block->_row_pos); } - return mutable_output_block.add_rows(&in_block, row_pos_vec.data(), - row_pos_vec.data() + in_block.rows()); + in_block.set_columns(std::move(mutable_input_block.mutable_columns())); + RETURN_IF_ERROR(mutable_output_block.add_rows(&in_block, row_pos_vec.data(), + row_pos_vec.data() + input_rows)); + output_block.set_columns(std::move(mutable_output_block.mutable_columns())); + return Status::OK(); } // fetch value by row column diff --git a/be/src/util/jsonb/serialize.cpp b/be/src/util/jsonb/serialize.cpp index 0088c6249f0030..669747b7949941 100644 --- a/be/src/util/jsonb/serialize.cpp +++ b/be/src/util/jsonb/serialize.cpp @@ -102,9 +102,10 @@ Status JsonbSerializeUtil::jsonb_to_block( auto col_it = col_id_to_idx.find(it->getKeyId()); if (col_it != col_id_to_idx.end() && (include_cids.empty() || include_cids.contains(it->getKeyId()))) { - MutableColumnPtr dst_column = - dst.get_by_position(col_it->second).column->assume_mutable(); + auto dst_column = + IColumn::mutate(std::move(dst.get_by_position(col_it->second).column)); serdes[col_it->second]->read_one_cell_from_jsonb(*dst_column, it->value()); + dst.replace_by_position(col_it->second, std::move(dst_column)); ++filled_columns; } } @@ -112,20 +113,25 @@ Status JsonbSerializeUtil::jsonb_to_block( return Status::OK(); } auto fill_column = [&](Block& dst, int pos, size_t old_num_rows) { - MutableColumnPtr dst_column = dst.get_by_position(pos).column->assume_mutable(); + auto dst_column = IColumn::mutate(std::move(dst.get_by_position(pos).column)); if (dst_column->size() < old_num_rows + 1) { DCHECK(dst_column->size() == old_num_rows); + Status st = Status::OK(); if (default_values[pos].empty()) { dst_column->insert_default(); } else { Slice value(default_values[pos].data(), default_values[pos].size()); DataTypeSerDe::FormatOptions opt; opt.converted_from_string = true; - RETURN_IF_ERROR( - serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt)); + st = serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt); } + dst.replace_by_position(pos, std::move(dst_column)); + RETURN_IF_ERROR(st); + DCHECK(dst.get_by_position(pos).column->size() == num_rows + 1); + return Status::OK(); } DCHECK(dst_column->size() == num_rows + 1); + dst.replace_by_position(pos, std::move(dst_column)); return Status::OK(); }; // fill missing column @@ -145,4 +151,4 @@ Status JsonbSerializeUtil::jsonb_to_block( return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris From 0f9867aeb6e0623ae11bb1015058fcc89bea1dab Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Thu, 7 May 2026 22:05:46 +0800 Subject: [PATCH 3/4] [fix](be) Restore equality delete block after mutable merge Issue Number: None Related PR: #63001 Problem Summary: After the COW mutable ownership changes, constructing a MutableBlock from the cached Iceberg equality-delete block transfers the block columns into the mutable block. The equality-delete loader merged rows into the mutable block but did not write the result back, leaving null columns in the cached block and crashing HybridSet while building delete filters. This also applies the requested BE clang-format updates. None - Test: Manual test - build-support/check-format.sh - ./build.sh --be -j 100 - git diff --check - Behavior changed: No - Does this need documentation: No --- be/src/core/column/column_variant.cpp | 5 +- ...istinct_streaming_aggregation_operator.cpp | 117 +++++++++--------- .../join/process_hash_table_probe_impl.h | 3 +- .../nested_loop_join_probe_operator.cpp | 4 +- be/src/exec/operator/schema_scan_operator.cpp | 5 +- be/src/exec/scan/file_scanner.cpp | 8 +- be/src/exec/sink/vtablet_block_convertor.cpp | 3 +- .../aggregate/aggregate_function_java_udaf.h | 4 +- .../function/array/function_array_flatten.cpp | 7 +- .../table_function/python_udtf_function.cpp | 4 +- be/src/exprs/table_function/vexplode_v2.cpp | 4 +- be/src/exprs/vcase_expr.h | 3 +- be/src/exprs/vcompound_pred.h | 22 ++-- be/src/format/jni/jni_data_bridge.cpp | 11 +- be/src/format/table/iceberg_reader_mixin.h | 1 + be/src/information_schema/schema_scanner.cpp | 4 +- be/src/service/point_query_executor.cpp | 5 +- be/src/storage/partial_update_info.cpp | 6 +- be/src/storage/segment/column_reader.cpp | 5 +- .../segment/variant/variant_column_reader.cpp | 4 +- be/test/core/data_type/complex_type_test.cpp | 115 +++++++++++++++++ 21 files changed, 227 insertions(+), 113 deletions(-) diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index d47d5cb6d2ddb7..48fa8731f45f8f 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -2123,8 +2123,9 @@ void ColumnVariant::ensure_binary_columns_rows() { if (const_column->size() == num_rows) { return; } - CHECK(const_column->empty()) << "ColumnVariant binary column size mismatch, rows: " - << num_rows << ", column rows: " << const_column->size(); + CHECK(const_column->empty()) + << "ColumnVariant binary column size mismatch, rows: " << num_rows + << ", column rows: " << const_column->size(); auto mutable_column = IColumn::mutate(std::move(static_cast(column))); mutable_column->resize(num_rows); column = std::move(mutable_column); diff --git a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp index cb115bb1395a25..92c11cf2896154 100644 --- a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp @@ -76,64 +76,65 @@ bool DistinctStreamingAggLocalState::_should_expand_preagg_hash_tables() { } return std::visit( - Overload {[&](std::monostate& arg) -> bool { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); - return false; - }, - [&](auto& agg_method) -> bool { - auto& hash_tbl = *agg_method.hash_table; - auto [ht_mem, ht_rows] = - std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()}; - - // Need some rows in tables to have valid statistics. - if (ht_rows == 0) { - return true; - } - - const auto* reduction = _is_single_backend - ? SINGLE_BE_STREAMING_HT_MIN_REDUCTION - : STREAMING_HT_MIN_REDUCTION; - - // Find the appropriate reduction factor in our table for the current hash table sizes. - int cache_level = 0; - while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && - ht_mem >= reduction[cache_level + 1].min_ht_mem) { - ++cache_level; - } - - // Compare the number of rows in the hash table with the number of input rows that - // were aggregated into it. Exclude passed through rows from this calculation since - // they were not in hash tables. - const int64_t input_rows = _input_num_rows; - const int64_t aggregated_input_rows = input_rows - _num_rows_returned; - // TODO chenhao - // const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; - double current_reduction = static_cast(aggregated_input_rows) / - static_cast(ht_rows); - - // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be - // inaccurate, which could lead to a divide by zero below. - if (aggregated_input_rows <= 0) { - return true; - } - - // Extrapolate the current reduction factor (r) using the formula - // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data - // set, N is the number of input rows, excluding passed-through rows, and n is the - // number of rows inserted or merged into the hash tables. This is a very rough - // approximation but is good enough to be useful. - // TODO: consider collecting more statistics to better estimate reduction. - // double estimated_reduction = aggregated_input_rows >= expected_input_rows - // ? current_reduction - // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); - double min_reduction = reduction[cache_level].streaming_ht_min_reduction; - - // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); - // COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); - // return estimated_reduction > min_reduction; - _should_expand_hash_table = current_reduction > min_reduction; - return _should_expand_hash_table; - }}, + Overload { + [&](std::monostate& arg) -> bool { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); + return false; + }, + [&](auto& agg_method) -> bool { + auto& hash_tbl = *agg_method.hash_table; + auto [ht_mem, ht_rows] = + std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()}; + + // Need some rows in tables to have valid statistics. + if (ht_rows == 0) { + return true; + } + + const auto* reduction = _is_single_backend + ? SINGLE_BE_STREAMING_HT_MIN_REDUCTION + : STREAMING_HT_MIN_REDUCTION; + + // Find the appropriate reduction factor in our table for the current hash table sizes. + int cache_level = 0; + while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && + ht_mem >= reduction[cache_level + 1].min_ht_mem) { + ++cache_level; + } + + // Compare the number of rows in the hash table with the number of input rows that + // were aggregated into it. Exclude passed through rows from this calculation since + // they were not in hash tables. + const int64_t input_rows = _input_num_rows; + const int64_t aggregated_input_rows = input_rows - _num_rows_returned; + // TODO chenhao + // const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; + double current_reduction = static_cast(aggregated_input_rows) / + static_cast(ht_rows); + + // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be + // inaccurate, which could lead to a divide by zero below. + if (aggregated_input_rows <= 0) { + return true; + } + + // Extrapolate the current reduction factor (r) using the formula + // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data + // set, N is the number of input rows, excluding passed-through rows, and n is the + // number of rows inserted or merged into the hash tables. This is a very rough + // approximation but is good enough to be useful. + // TODO: consider collecting more statistics to better estimate reduction. + // double estimated_reduction = aggregated_input_rows >= expected_input_rows + // ? current_reduction + // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); + double min_reduction = reduction[cache_level].streaming_ht_min_reduction; + + // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); + // COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); + // return estimated_reduction > min_reduction; + _should_expand_hash_table = current_reduction > min_reduction; + return _should_expand_hash_table; + }}, _agg_data->method_variant); } diff --git a/be/src/exec/operator/join/process_hash_table_probe_impl.h b/be/src/exec/operator/join/process_hash_table_probe_impl.h index 23242a82d15080..bcc4408906bf54 100644 --- a/be/src/exec/operator/join/process_hash_table_probe_impl.h +++ b/be/src/exec/operator/join/process_hash_table_probe_impl.h @@ -726,7 +726,8 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(Block* output_b auto mark_column_mutable = IColumn::mutate( std::move(output_block->get_by_position(_parent->_mark_column_id).column)); auto* mark_column = assert_cast(mark_column_mutable.get()); - IColumn::Filter& filter = assert_cast(mark_column->get_nested_column()).get_data(); + IColumn::Filter& filter = + assert_cast(mark_column->get_nested_column()).get_data(); auto& null_map_column = mark_column->get_null_map_column(); output_block->replace_by_position(_parent->_mark_column_id, std::move(mark_column_mutable)); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_parent->_mark_join_conjuncts, output_block, diff --git a/be/src/exec/operator/nested_loop_join_probe_operator.cpp b/be/src/exec/operator/nested_loop_join_probe_operator.cpp index a3644857bb7312..b83178b4f4e816 100644 --- a/be/src/exec/operator/nested_loop_join_probe_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_probe_operator.cpp @@ -78,8 +78,8 @@ Status NestedLoopJoinProbeLocalState::close(RuntimeState* state) { void NestedLoopJoinProbeLocalState::_update_additional_flags(Block* block) { auto& p = _parent->cast(); if (p._is_mark_join) { - auto mark_column = IColumn::mutate( - std::move(block->get_by_position(block->columns() - 1).column)); + auto mark_column = + IColumn::mutate(std::move(block->get_by_position(block->columns() - 1).column)); if (mark_column->size() < block->rows()) { ColumnFilterHelper(*mark_column).resize_fill(block->rows(), 1); } diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index c44ff030326484..3d5922573b90e4 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -259,9 +259,8 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* e for (int i = 0; i < _slot_num; ++i) { MutableColumnPtr column_ptr = IColumn::mutate(std::move(block->get_by_position(i).column)); - ColumnPtr src_column = - src_block.safe_get_by_position(_slot_offsets[i]) - .column->convert_to_full_column_if_const(); + ColumnPtr src_column = src_block.safe_get_by_position(_slot_offsets[i]) + .column->convert_to_full_column_if_const(); if (column_ptr->is_nullable() && !src_column->is_nullable()) { src_column = make_nullable(src_column); } diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index 531b02e5fdc032..f621050e337d8f 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -782,11 +782,9 @@ Status FileScanner::_convert_to_output_block(Block* block) { std::vector* skip_bitmaps {nullptr}; MutableColumnPtr skip_bitmap_column; if (_should_process_skip_bitmap_col()) { - skip_bitmap_column = - IColumn::mutate(std::move(_src_block_ptr->get_by_position(_skip_bitmap_col_idx) - .column)); - auto* skip_bitmap_nullable_col_ptr = - assert_cast(skip_bitmap_column.get()); + skip_bitmap_column = IColumn::mutate( + std::move(_src_block_ptr->get_by_position(_skip_bitmap_col_idx).column)); + auto* skip_bitmap_nullable_col_ptr = assert_cast(skip_bitmap_column.get()); skip_bitmaps = &(assert_cast( skip_bitmap_nullable_col_ptr->get_nested_column_ptr().get()) ->get_data()); diff --git a/be/src/exec/sink/vtablet_block_convertor.cpp b/be/src/exec/sink/vtablet_block_convertor.cpp index 95be814299ca9c..e59fa923375998 100644 --- a/be/src/exec/sink/vtablet_block_convertor.cpp +++ b/be/src/exec/sink/vtablet_block_convertor.cpp @@ -281,7 +281,8 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B {len_column, len_type, "len"}, {nullptr, input_type, "result"}}); RETURN_IF_ERROR(func->execute(nullptr, tmp_block, {0, 1, 2}, 3, row_count)); - auto result_column = IColumn::mutate(std::move(tmp_block.get_by_position(3).column)); + auto result_column = + IColumn::mutate(std::move(tmp_block.get_by_position(3).column)); if (orig_column->is_nullable()) { orig_column = ColumnNullable::create( std::move(result_column), diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index c79880fc39ce15..cbd929824d21d6 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -187,8 +187,8 @@ struct AggregateJavaUdafData { RETURN_NOT_OK_STATUS_WITH_WARN(Jni::Env::Get(&env), "Java-Udaf get value function"); Block output_block; - output_block.insert(ColumnWithTypeAndName(result_type->create_column(), result_type, - "_result_")); + output_block.insert( + ColumnWithTypeAndName(result_type->create_column(), result_type, "_result_")); auto output_table_schema = JniDataBridge::parse_table_schema(&output_block); std::string output_nullable = result_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, diff --git a/be/src/exprs/function/array/function_array_flatten.cpp b/be/src/exprs/function/array/function_array_flatten.cpp index 36be392b154e81..3f76bcfb015e4a 100644 --- a/be/src/exprs/function/array/function_array_flatten.cpp +++ b/be/src/exprs/function/array/function_array_flatten.cpp @@ -62,10 +62,9 @@ class FunctionArrayFlatten : public IFunction { auto* src_data_type_array = assert_cast(remove_nullable(src_data_type).get()); - auto result_column_offsets = - assert_cast( - src_column_array_ptr->get_offsets_column()) - .clone(); + auto result_column_offsets = assert_cast( + src_column_array_ptr->get_offsets_column()) + .clone(); auto* offsets = assert_cast(result_column_offsets.get()) ->get_data() .data(); diff --git a/be/src/exprs/table_function/python_udtf_function.cpp b/be/src/exprs/table_function/python_udtf_function.cpp index b2e016473189be..50313a2aa31287 100644 --- a/be/src/exprs/table_function/python_udtf_function.cpp +++ b/be/src/exprs/table_function/python_udtf_function.cpp @@ -263,8 +263,8 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( // Use read_column_from_arrow for optimized conversion // This directly converts Arrow ListArray to Doris ColumnArray // No struct unwrapping needed - Python server sends the correct format! - RETURN_IF_ERROR(array_serde->read_column_from_arrow( - *array_col, list_array.get(), 0, num_input_rows, _timezone_obj)); + RETURN_IF_ERROR(array_serde->read_column_from_arrow(*array_col, list_array.get(), 0, + num_input_rows, _timezone_obj)); // Handle nullable wrapper: all array elements are non-null // (empty arrays [] are non-null, different from NULL) diff --git a/be/src/exprs/table_function/vexplode_v2.cpp b/be/src/exprs/table_function/vexplode_v2.cpp index 2e8bf9e2734903..62a4ab1d66ae92 100644 --- a/be/src/exprs/table_function/vexplode_v2.cpp +++ b/be/src/exprs/table_function/vexplode_v2.cpp @@ -69,8 +69,8 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co _multi_detail[children_column_idx].nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - auto array_column = ColumnNullable::create( - ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0)); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); array_column->insert_many_defaults(variant_column.size()); _array_columns[children_column_idx] = std::move(array_column); _multi_detail[children_column_idx].nested_type = std::make_shared(); diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h index dd58476b87cf88..b8e274be82a7bb 100644 --- a/be/src/exprs/vcase_expr.h +++ b/be/src/exprs/vcase_expr.h @@ -218,7 +218,8 @@ class VCaseExpr final : public VExpr { continue; } const auto* __restrict column_raw_data = - assert_cast(then_columns[i].get()) + assert_cast( + then_columns[i].get()) ->get_data() .data(); if constexpr (std::is_same_v || diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h index af94d11cd40aeb..4a53c29dc5da9d 100644 --- a/be/src/exprs/vcompound_pred.h +++ b/be/src/exprs/vcompound_pred.h @@ -219,9 +219,8 @@ class VCompoundPred : public VectorizedFnCall { const uint8_t* __restrict null_map_data) { if (null_map_data == nullptr) { null_map_column = ColumnUInt8::create(size, 0); - null_map_data = assert_cast(null_map_column.get()) - ->get_data() - .data(); + null_map_data = + assert_cast(null_map_column.get())->get_data().data(); } return null_map_data; }; @@ -374,10 +373,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_null_pred(const uint8_t* __restrict lhs_data, - const uint8_t* __restrict lhs_null, - const uint8_t* __restrict rhs_data, - const uint8_t* __restrict rhs_null, + void static do_null_pred(const uint8_t* __restrict lhs_data, const uint8_t* __restrict lhs_null, + const uint8_t* __restrict rhs_data, const uint8_t* __restrict rhs_null, uint8_t* __restrict res_data, uint8_t* __restrict res_null, size_t size) { #ifdef NDEBUG @@ -407,18 +404,17 @@ class VCompoundPred : public VectorizedFnCall { const ColumnPtr& column, bool has_nullable_column) const { if (has_nullable_column) { const auto* nullable_column = assert_cast(column.get()); - auto* data_column = assert_cast( - nullable_column->get_nested_column_ptr().get()) - ->get_data() - .data(); + auto* data_column = + assert_cast(nullable_column->get_nested_column_ptr().get()) + ->get_data() + .data(); auto* null_map = assert_cast( nullable_column->get_null_map_column_ptr().get()) ->get_data() .data(); return std::make_pair(data_column, null_map); } else { - auto* data_column = - assert_cast(column.get())->get_data().data(); + auto* data_column = assert_cast(column.get())->get_data().data(); return std::make_pair(data_column, nullptr); } } diff --git a/be/src/format/jni/jni_data_bridge.cpp b/be/src/format/jni/jni_data_bridge.cpp index 80fb94545c97b3..fb1052c761e874 100644 --- a/be/src/format/jni/jni_data_bridge.cpp +++ b/be/src/format/jni/jni_data_bridge.cpp @@ -120,12 +120,11 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co // Date and DateTime are deprecated and not supported. Status status = Status::OK(); switch (logical_type) { -#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ - case TYPE_INDEX: { \ - auto* data = reinterpret_cast(address.next_meta_as_ptr()); \ - status = _fill_fixed_length_column( \ - data_column, data, num_rows); \ - break; \ +#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ + case TYPE_INDEX: { \ + auto* data = reinterpret_cast(address.next_meta_as_ptr()); \ + status = _fill_fixed_length_column(data_column, data, num_rows); \ + break; \ } FOR_FIXED_LENGTH_TYPES(DISPATCH) #undef DISPATCH diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h index 42c80c9b7d4ddc..565c77b5ab0255 100644 --- a/be/src/format/table/iceberg_reader_mixin.h +++ b/be/src/format/table/iceberg_reader_mixin.h @@ -554,6 +554,7 @@ Status IcebergReaderMixin::_equality_delete_base( if (read_rows > 0) { MutableBlock mutable_block(&eq_file_block); RETURN_IF_ERROR(mutable_block.merge(tmp_block)); + eq_file_block = mutable_block.to_block(); } } } diff --git a/be/src/information_schema/schema_scanner.cpp b/be/src/information_schema/schema_scanner.cpp index d410167e9caa7d..9526f21d7796f9 100644 --- a/be/src/information_schema/schema_scanner.cpp +++ b/be/src/information_schema/schema_scanner.cpp @@ -97,8 +97,8 @@ class ObjectPool; namespace { -void insert_column_range(ColumnWithTypeAndName* dst, const ColumnWithTypeAndName& src, - size_t start, size_t length) { +void insert_column_range(ColumnWithTypeAndName* dst, const ColumnWithTypeAndName& src, size_t start, + size_t length) { DORIS_CHECK(dst->column.get() != nullptr); DORIS_CHECK(src.column.get() != nullptr); MutableColumnPtr dst_column = IColumn::mutate(std::move(dst->column)); diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index dcb6555411696e..7ab3c89e10d6de 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -566,8 +566,9 @@ Status PointQueryExecutor::_lookup_row_data() { storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; auto st = segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, row_id, column, storage_read_options, iter); - if (st.ok() && - _tablet->tablet_schema()->column_by_uid(slot->col_unique_id()).has_char_type()) { + if (st.ok() && _tablet->tablet_schema() + ->column_by_uid(slot->col_unique_id()) + .has_char_type()) { column->shrink_padding_chars(); } _result_block->replace_by_position(pos, std::move(column)); diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 14c6b400078bba..fcd5bde0f8395f 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -960,7 +960,8 @@ Status BlockAggregator::aggregate_for_sequence_column( // the process logic here is the same as MemTable::_aggregate_for_flexible_partial_update_without_seq_col() // after this function, there will be at most 2 rows for a specified key std::vector* skip_bitmaps = - &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx())->get_data(); + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filtered_block = _tablet_schema.create_block(); @@ -1030,7 +1031,8 @@ Status BlockAggregator::aggregate_for_insert_after_delete( // after this function, there will not be duplicate rows in block std::vector* skip_bitmaps = - &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx())->get_data(); + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filter_column = ColumnUInt8::create(num_rows, 1); diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index 716f6af31a1220..5c1574e85e446d 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -1415,9 +1415,8 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo for (size_t i = 0; i < column_struct.tuple_size(); i++) { size_t num_read = *n; auto sub_column_ptr = IColumn::mutate(std::move(column_struct.get_column_ptr(i))); - Defer defer_sub_column {[&] { - column_struct.get_column_ptr(i) = std::move(sub_column_ptr); - }}; + Defer defer_sub_column { + [&] { column_struct.get_column_ptr(i) = std::move(sub_column_ptr); }}; bool column_has_null = false; RETURN_IF_ERROR( _sub_column_iterators[i]->next_batch(&num_read, sub_column_ptr, &column_has_null)); diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp b/be/src/storage/segment/variant/variant_column_reader.cpp index 96c5d514b6fb48..1ac88b2479ef16 100644 --- a/be/src/storage/segment/variant/variant_column_reader.cpp +++ b/be/src/storage/segment/variant/variant_column_reader.cpp @@ -1602,8 +1602,8 @@ static void fill_nested_with_defaults(MutableColumnPtr& dst, MutableColumnPtr& s auto new_nested = dst_array->get_data_ptr()->clone_resized(sibling_array->get_data_ptr()->size()); ColumnPtr nested_column = std::move(new_nested); - auto new_array = make_nullable( - ColumnArray::create(nested_column, sibling_array->get_offsets_ptr())); + auto new_array = + make_nullable(ColumnArray::create(nested_column, sibling_array->get_offsets_ptr())); dst->insert_range_from(*new_array, 0, new_array->size()); #ifndef NDEBUG if (!dst_array->has_equal_offsets(*sibling_array)) { diff --git a/be/test/core/data_type/complex_type_test.cpp b/be/test/core/data_type/complex_type_test.cpp index 54dc360e2a8fa1..9d3baa87edecd0 100644 --- a/be/test/core/data_type/complex_type_test.cpp +++ b/be/test/core/data_type/complex_type_test.cpp @@ -20,8 +20,16 @@ #include #include +#include +#include "agent/be_exec_version_manager.h" +#include "core/assert_cast.h" #include "core/column/column.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_map.h" @@ -34,6 +42,25 @@ namespace doris { +namespace { + +std::vector serialize_column(const DataTypePtr& type, const ColumnPtr& column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + std::vector buf(type->get_uncompressed_serialized_bytes(*column, be_exec_version)); + char* end = type->serialize(*column, buf.data(), be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); + return buf; +} + +void deserialize_column(const DataTypePtr& type, const std::vector& buf, + MutableColumnPtr* column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + const char* end = type->deserialize(buf.data(), column, be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); +} + +} // namespace + TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { DataTypePtr n1 = std::make_shared(std::make_shared()); DataTypePtr n2 = std::make_shared(std::make_shared()); @@ -70,4 +97,92 @@ TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { col_a->get(0, af); EXPECT_EQ(PrimitiveType::TYPE_ARRAY, af.get_type()); } + +TEST(ComplexTypeTest, DeserializeArrayWritesBackSharedNestedColumn) { + DataTypePtr nested_type = std::make_shared(); + DataTypePtr array_type = std::make_shared(nested_type); + + auto src_column = array_type->create_column(); + src_column->insert(Field::create_field( + Array {Field::create_field(1), Field::create_field(2)})); + src_column->insert(Field::create_field(Array {Field::create_field(3)})); + auto buf = serialize_column(array_type, src_column->get_ptr()); + + ColumnPtr shared_nested_column = ColumnInt32::create(); + MutableColumnPtr dst_column = ColumnArray::create(shared_nested_column); + deserialize_column(array_type, buf, &dst_column); + + const auto& array_column = assert_cast(*dst_column); + EXPECT_EQ(2, array_column.size()); + EXPECT_EQ(0, shared_nested_column->size()); + EXPECT_EQ(3, array_column.get_data().size()); + EXPECT_EQ(2, array_column.get_offsets()[0]); + EXPECT_EQ(3, array_column.get_offsets()[1]); + + const auto& data = assert_cast(array_column.get_data()).get_data(); + EXPECT_EQ(1, data[0]); + EXPECT_EQ(2, data[1]); + EXPECT_EQ(3, data[2]); +} + +TEST(ComplexTypeTest, DeserializeMapWritesBackSharedKeyAndValueColumns) { + DataTypePtr key_type = std::make_shared(); + DataTypePtr value_type = std::make_shared(); + DataTypePtr map_type = std::make_shared(key_type, value_type); + + auto src_column = map_type->create_column(); + Map map; + map.push_back(Field::create_field( + Array {Field::create_field(10), Field::create_field(20)})); + map.push_back(Field::create_field( + Array {Field::create_field("a"), Field::create_field("b")})); + src_column->insert(Field::create_field(map)); + auto buf = serialize_column(map_type, src_column->get_ptr()); + + ColumnPtr shared_keys_column = ColumnInt32::create(); + ColumnPtr shared_values_column = ColumnString::create(); + ColumnPtr offsets_column = ColumnArray::ColumnOffsets::create(); + MutableColumnPtr dst_column = + ColumnMap::create(shared_keys_column, shared_values_column, offsets_column); + deserialize_column(map_type, buf, &dst_column); + + const auto& map_column = assert_cast(*dst_column); + EXPECT_EQ(1, map_column.size()); + EXPECT_EQ(0, shared_keys_column->size()); + EXPECT_EQ(0, shared_values_column->size()); + EXPECT_EQ(2, map_column.get_keys().size()); + EXPECT_EQ(2, map_column.get_values().size()); + + const auto& keys = assert_cast(map_column.get_keys()).get_data(); + EXPECT_EQ(10, keys[0]); + EXPECT_EQ(20, keys[1]); + EXPECT_EQ("a", map_column.get_values().get_data_at(0).to_string()); + EXPECT_EQ("b", map_column.get_values().get_data_at(1).to_string()); +} + +TEST(ComplexTypeTest, DeserializeStructWritesBackSharedChildren) { + DataTypes children_types {std::make_shared(), + std::make_shared()}; + DataTypePtr struct_type = std::make_shared(children_types); + + auto src_column = struct_type->create_column(); + src_column->insert(Field::create_field( + Tuple {Field::create_field(7), Field::create_field("seven")})); + auto buf = serialize_column(struct_type, src_column->get_ptr()); + + ColumnPtr shared_int_column = ColumnInt32::create(); + ColumnPtr shared_string_column = ColumnString::create(); + Columns shared_columns {shared_int_column, shared_string_column}; + MutableColumnPtr dst_column = ColumnStruct::create(shared_columns); + deserialize_column(struct_type, buf, &dst_column); + + const auto& struct_column = assert_cast(*dst_column); + EXPECT_EQ(1, struct_column.size()); + EXPECT_EQ(0, shared_int_column->size()); + EXPECT_EQ(0, shared_string_column->size()); + + const auto& ints = assert_cast(struct_column.get_column(0)).get_data(); + EXPECT_EQ(7, ints[0]); + EXPECT_EQ("seven", struct_column.get_column(1).get_data_at(0).to_string()); +} } // namespace doris From 41845976c748b0533bbf2a0b794488a3e517f6e5 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Fri, 8 May 2026 00:11:46 +0800 Subject: [PATCH 4/4] [fix](be) Restore COW ownership in sort aggregate and nullable conversion Issue Number: None Related PR: #63001 Problem Summary: After assume_mutable started asserting exclusive ownership, sort aggregate state still appended through immutable Block columns on the hot add/merge path, and nullable schema-change conversion could copy a stale null-map prefix after ORC/Parquet COW cloning. Keep the sort aggregate state as a MutableBlock, remove a read-only mutable assertion, and copy nullable null-map slices from the appended source range. Release note None Check List (For Author) - Test: Unit Test - ./run-be-ut.sh --run --filter=AggregateFunctionSortDataTest.merge_does_not_share_rhs_block:OrcReaderFillDataTest.SchemaChangeNullableNullMapUsesAppendedSlice:ParquetColumnConvertTest.AlignNullMapUsesAppendedSourceSlice -j 100 - ./run-regression-test.sh --run -f regression-test/suites/query_p0/runtimefilterV2/rfv2.groovy - ./build.sh --be -j 100 - build-support/clang-format.sh - build-support/check-format.sh - git diff --check - Local cluster restart and select 1/show backends - build-support/run-clang-tidy.sh --build-dir be/build_ASAN attempted; changed-line findings were fixed, remaining failures are existing ORC/test include/jni-util baseline or tooling diagnostics - Behavior changed: No - Does this need documentation: No --- .../aggregate/aggregate_function_null_v2.h | 3 +- .../exprs/aggregate/aggregate_function_sort.h | 62 +++++++++++++------ be/src/format/column_type_convert.cpp | 8 +-- be/src/format/orc/vorc_reader.cpp | 12 ++-- .../format/parquet/parquet_column_convert.h | 9 +-- .../exprs/aggregate/agg_array_agg_test.cpp | 29 +++++++++ .../format/orc/orc_reader_fill_data_test.cpp | 40 +++++++++++- .../parquet/parquet_column_convert_test.cpp | 38 ++++++++++++ 8 files changed, 166 insertions(+), 35 deletions(-) diff --git a/be/src/exprs/aggregate/aggregate_function_null_v2.h b/be/src/exprs/aggregate/aggregate_function_null_v2.h index aa2c9f3bb39792..a3b513d6014116 100644 --- a/be/src/exprs/aggregate/aggregate_function_null_v2.h +++ b/be/src/exprs/aggregate/aggregate_function_null_v2.h @@ -259,8 +259,7 @@ class AggregateFunctionNullBaseInlineV2 : public IAggregateFunctionHelperget_nested_column().assume_mutable().get(); + const IColumn* src_nested_column = &src_nullable_col->get_nested_column(); if (src_nullable_col->has_null()) { for (size_t i = 0; i < num_rows; ++i) { if (!src_null_map_data[i]) { diff --git a/be/src/exprs/aggregate/aggregate_function_sort.h b/be/src/exprs/aggregate/aggregate_function_sort.h index e001cb0c4c419d..2a7530e817fd3b 100644 --- a/be/src/exprs/aggregate/aggregate_function_sort.h +++ b/be/src/exprs/aggregate/aggregate_function_sort.h @@ -46,33 +46,27 @@ namespace doris { struct AggregateFunctionSortData { const SortDescription sort_desc; - Block block; + // The aggregate state is the sole owner of these columns and appends rows in add(), which is + // a hot path. Keep the long-lived state as MutableBlock and only materialize temporary Block + // views for APIs that require immutable Block input. + MutableBlock block; // The construct only support the template compiler, useless AggregateFunctionSortData() : sort_desc() {}; AggregateFunctionSortData(SortDescription sort_desc, const Block& block) : sort_desc(std::move(sort_desc)), block(block.clone_empty()) {} - void merge(const AggregateFunctionSortData& rhs) { - if (block.rows() == 0) { - block = rhs.block; - } else { - for (size_t i = 0; i < block.columns(); i++) { - auto column = block.get_by_position(i).column->assume_mutable(); - auto column_rhs = rhs.block.get_by_position(i).column; - column->insert_range_from(*column_rhs, 0, rhs.block.rows()); - } - } - } + void merge(const AggregateFunctionSortData& rhs) { append_block(rhs, 0, rhs.block.rows()); } void serialize(const RuntimeState* state, BufferWritable& buf) const { PBlock pblock; size_t uncompressed_bytes = 0; size_t compressed_bytes = 0; int64_t compressed_time = 0; - auto st = block.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, - &compressed_bytes, &compressed_time, - segment_v2::CompressionTypePB::NO_COMPRESSION); + auto block_view = to_block_view(); + auto st = block_view.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, &compressed_time, + segment_v2::CompressionTypePB::NO_COMPRESSION); if (!st.ok()) { throw doris::Exception(st); } @@ -88,12 +82,14 @@ struct AggregateFunctionSortData { pblock.ParseFromString(data); [[maybe_unused]] size_t uncompressed_size = 0; [[maybe_unused]] int64_t uncompressed_time = 0; - auto st = block.deserialize(pblock, &uncompressed_size, &uncompressed_time); + Block deserialized_block; + auto st = deserialized_block.deserialize(pblock, &uncompressed_size, &uncompressed_time); // If memory allocate failed during deserialize, st is not ok, throw exception here to // stop the query. if (!st.ok()) { throw doris::Exception(st); } + block = MutableBlock(std::move(deserialized_block)); } void add(const IColumn** columns, size_t columns_num, size_t row_num) { @@ -102,14 +98,40 @@ struct AggregateFunctionSortData { block.columns(), columns_num); for (size_t i = 0; i < columns_num; ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - column->insert_from(*columns[i], row_num); + block.get_column_by_position(i)->insert_from(*columns[i], row_num); } } void sort() { + auto block_view = to_block_view(); + auto sorted_block = block_view.clone_empty(); HybridSorter hybrid_sorter; - sort_block(block, block, sort_desc, hybrid_sorter, block.rows()); + sort_block(block_view, sorted_block, sort_desc, hybrid_sorter, block_view.rows()); + block = MutableBlock(std::move(sorted_block)); + } + +private: + void append_block(const AggregateFunctionSortData& rhs, size_t start, size_t length) { + DCHECK_EQ(block.columns(), rhs.block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + DCHECK(block.get_datatype_by_position(i)->equals( + *rhs.block.get_datatype_by_position(i))) + << "lhs type: " << block.get_datatype_by_position(i)->get_name() + << ", rhs type: " << rhs.block.get_datatype_by_position(i)->get_name(); + block.get_column_by_position(i)->insert_range_from(*rhs.block.get_column_by_position(i), + start, length); + } + } + + Block to_block_view() const { + ColumnsWithTypeAndName columns_with_schema; + columns_with_schema.reserve(block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + columns_with_schema.emplace_back( + static_cast(*block.get_column_by_position(i)).get_ptr(), + block.get_datatype_by_position(i), ""); + } + return {std::move(columns_with_schema)}; } }; @@ -177,7 +199,7 @@ class AggregateFunctionSort final ColumnRawPtrs arguments_nested; for (int i = 0; i < _arguments.size() - _sort_desc.size(); i++) { arguments_nested.emplace_back( - this->data(place).block.get_by_position(i).column.get()); + this->data(place).block.get_column_by_position(i).get()); } _nested_func->add_batch_single_place(arguments_nested[0]->size(), diff --git a/be/src/format/column_type_convert.cpp b/be/src/format/column_type_convert.cpp index cd71ffb5babb33..b7a8388b5be771 100644 --- a/be/src/format/column_type_convert.cpp +++ b/be/src/format/column_type_convert.cpp @@ -117,10 +117,10 @@ ColumnPtr ColumnTypeConverter::get_column(const DataTypePtr& src_type, ColumnPtr _cached_src_column->assume_mutable()->clear(); if (dst_type->is_nullable()) { - // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will - // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. - // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto* doris_nullable_column = static_cast(dst_column.get()); + // Seed the source nullable wrapper with the destination's current null map. Under the + // assert-mutability COW contract ColumnNullable::create() mutates/clones the subcolumns, so + // readers that append file nulls must copy back only the newly appended null-map slice. + const auto* doris_nullable_column = static_cast(dst_column.get()); return ColumnNullable::create(_cached_src_column, doris_nullable_column->get_null_map_column_ptr()); } diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp index d20a290a75add7..cff7d595110263 100644 --- a/be/src/format/orc/vorc_reader.cpp +++ b/be/src/format/orc/vorc_reader.cpp @@ -133,7 +133,7 @@ static void fill_orc_null_map(ColumnNullable* nullable_column, const orc::Column } static void align_orc_null_map(const ColumnPtr& src_column, ColumnNullable* dst_nullable_column, - size_t new_rows) { + size_t src_null_map_start, size_t new_rows) { auto& dst_null_map = dst_nullable_column->get_null_map_column(); const size_t old_rows = dst_nullable_column->get_nested_column().size(); const size_t expected_rows = old_rows + new_rows; @@ -143,8 +143,9 @@ static void align_orc_null_map(const ColumnPtr& src_column, ColumnNullable* dst_ DCHECK_EQ(dst_null_map.size(), old_rows); if (src_column->is_nullable()) { const auto* src_nullable = assert_cast(src_column.get()); - DCHECK_GE(src_nullable->size(), new_rows); - dst_null_map.insert_range_from(src_nullable->get_null_map_column(), 0, new_rows); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); } else { dst_null_map.insert_many_vals(0, new_rows); } @@ -2217,11 +2218,13 @@ Status OrcReader::_orc_column_to_doris_column( mutable_resolved_column = IColumn::mutate(std::move(resolved_column)); } + size_t src_null_map_start = 0; if (mutable_resolved_column->is_nullable()) { SCOPED_RAW_TIMER(&_statistics.decode_null_map_time); auto* nullable_column = reinterpret_cast(mutable_resolved_column.get()); data_column = nullable_column->get_nested_column_ptr(); + src_null_map_start = nullable_column->get_null_map_column().size(); fill_orc_null_map(nullable_column, cvb, num_values); } else { if (cvb->hasNulls) { @@ -2250,9 +2253,10 @@ Status OrcReader::_orc_column_to_doris_column( doris_column = IColumn::mutate(std::move(doris_column)); auto converted_column = doris_column->assume_mutable(); if (converted_column->is_nullable()) { + const size_t new_rows = remove_nullable(resolved_column)->size(); align_orc_null_map(resolved_column, reinterpret_cast(converted_column.get()), - resolved_column->size()); + src_null_map_start, new_rows); } return converter->convert(resolved_column, converted_column); } else { diff --git a/be/src/format/parquet/parquet_column_convert.h b/be/src/format/parquet/parquet_column_convert.h index 6608c542e4a79e..ee7dfd577733b6 100644 --- a/be/src/format/parquet/parquet_column_convert.h +++ b/be/src/format/parquet/parquet_column_convert.h @@ -212,7 +212,7 @@ inline size_t get_mutable_inner_column_size(const ColumnPtr& column) { } inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_rows, - size_t new_rows) { + size_t new_rows, size_t src_null_map_start = 0) { if (!dst_column->is_nullable()) { return; } @@ -227,8 +227,9 @@ inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t DCHECK_EQ(dst_null_map.size(), old_rows); if (src_column->is_nullable()) { const auto* src_nullable = assert_cast(src_column.get()); - DCHECK_GE(src_nullable->size(), new_rows); - dst_null_map.insert_range_from(src_nullable->get_null_map_column(), 0, new_rows); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); } else { dst_null_map.insert_many_vals(0, new_rows); } @@ -322,7 +323,7 @@ class PhysicalToLogicalConverter { auto converted_column = dst_logical_col->assume_mutable(); RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column)); align_null_map(src_logical_column, dst_logical_col, dst_old_rows, - get_mutable_inner_column_size(dst_logical_col) - dst_old_rows); + get_mutable_inner_column_size(dst_logical_col) - dst_old_rows, src_old_rows); return Status::OK(); } diff --git a/be/test/exprs/aggregate/agg_array_agg_test.cpp b/be/test/exprs/aggregate/agg_array_agg_test.cpp index 101328496df850..6b27a2b55b03fd 100644 --- a/be/test/exprs/aggregate/agg_array_agg_test.cpp +++ b/be/test/exprs/aggregate/agg_array_agg_test.cpp @@ -42,6 +42,7 @@ #include "exprs/aggregate/agg_function_test.h" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" +#include "exprs/aggregate/aggregate_function_sort.h" #include "gtest/gtest_pred_impl.h" namespace doris { @@ -192,4 +193,32 @@ TEST_F(AggregateFunctionArrayAggTest, test_array_agg_aint64_foreach) { ColumnWithTypeAndName(std::move(array_array_column), array_array_data_type, "column")); } +TEST(AggregateFunctionSortDataTest, merge_does_not_share_rhs_block) { + auto data_type = std::make_shared(); + Block prototype({ColumnWithTypeAndName(data_type->create_column(), data_type, "value"), + ColumnWithTypeAndName(data_type->create_column(), data_type, "sort_key")}); + SortDescription sort_desc {SortColumnDescription(1, 1, 1)}; + + AggregateFunctionSortData lhs(sort_desc, prototype); + AggregateFunctionSortData rhs1(sort_desc, prototype); + AggregateFunctionSortData rhs2(sort_desc, prototype); + + auto values = ColumnInt64::create(); + values->insert_value(10); + values->insert_value(20); + auto sort_keys = ColumnInt64::create(); + sort_keys->insert_value(2); + sort_keys->insert_value(1); + const IColumn* row0[] = {values.get(), sort_keys.get()}; + const IColumn* row1[] = {values.get(), sort_keys.get()}; + + rhs1.add(row0, 2, 0); + rhs2.add(row1, 2, 1); + + lhs.merge(rhs1); + ASSERT_NO_THROW(lhs.merge(rhs2)); + ASSERT_EQ(lhs.block.rows(), 2); + ASSERT_EQ(rhs1.block.rows(), 1); +} + } // namespace doris diff --git a/be/test/format/orc/orc_reader_fill_data_test.cpp b/be/test/format/orc/orc_reader_fill_data_test.cpp index 12c1dd209c585b..4fdc44b36bbebb 100644 --- a/be/test/format/orc/orc_reader_fill_data_test.cpp +++ b/be/test/format/orc/orc_reader_fill_data_test.cpp @@ -19,6 +19,7 @@ #include +#include "core/assert_cast.h" #include "core/column/column_array.h" #include "core/column/column_struct.h" #include "core/data_type/data_type_array.h" @@ -124,6 +125,43 @@ TEST_F(OrcReaderFillDataTest, TestFillLongColumnWithNull) { } } +TEST_F(OrcReaderFillDataTest, SchemaChangeNullableNullMapUsesAppendedSlice) { + std::vector values = {10, 20, 30}; + std::vector nulls = {true, false, true}; + auto batch = create_long_batch(values.size(), values, nulls); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); + + auto nested_column = ColumnFloat64::create(); + nested_column->insert_value(1); + nested_column->insert_value(2); + auto null_map_column = ColumnUInt8::create(); + null_map_column->insert_value(0); + null_map_column->insert_value(0); + ColumnPtr doris_column = + ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + auto data_type = make_nullable(std::make_shared()); + + TFileScanRangeParams params; + TFileRangeDesc range; + auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true); + + Status status = reader->_orc_column_to_doris_column( + "test_schema_change_nullable", doris_column, data_type, const_node, orc_type_ptr.get(), + batch.get(), values.size()); + + ASSERT_TRUE(status.ok()) << status.to_string(); + const auto* nullable_column = assert_cast(doris_column.get()); + ASSERT_EQ(nullable_column->size(), 5); + + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { // Array类型测试 { @@ -478,4 +516,4 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { "+-------------------+\n"); } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/parquet/parquet_column_convert_test.cpp b/be/test/format/parquet/parquet_column_convert_test.cpp index e50d28ef0e7930..64b821b694013f 100644 --- a/be/test/format/parquet/parquet_column_convert_test.cpp +++ b/be/test/format/parquet/parquet_column_convert_test.cpp @@ -22,6 +22,9 @@ #include #include +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" #include "util/timezone_utils.h" namespace doris::parquet { @@ -119,4 +122,39 @@ TEST(ParquetColumnConvertTest, LookupPathMatchesOriginal) { } } +TEST(ParquetColumnConvertTest, AlignNullMapUsesAppendedSourceSlice) { + auto dst_nested_column = ColumnFloat64::create(); + dst_nested_column->insert_value(1); + dst_nested_column->insert_value(2); + auto dst_null_map_column = ColumnUInt8::create(); + dst_null_map_column->insert_value(0); + dst_null_map_column->insert_value(0); + ColumnPtr dst_column = + ColumnNullable::create(std::move(dst_nested_column), std::move(dst_null_map_column)); + + auto src_nested_column = ColumnInt64::create(); + for (int i = 0; i < 5; ++i) { + src_nested_column->insert_value(i); + } + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + align_null_map(src_column, dst_column, 2, 3, 2); + + const auto* nullable_column = assert_cast(dst_column.get()); + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + } // namespace doris::parquet