diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 2bb156325443e3..bde0f01e9d0fdf 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -581,7 +581,7 @@ MutableColumns Block::mutate_columns() { MutableColumns columns(num_columns); for (size_t i = 0; i < num_columns; ++i) { DCHECK(data[i].type); - columns[i] = data[i].column ? (*std::move(data[i].column)).mutate() + columns[i] = data[i].column ? IColumn::mutate(std::move(data[i].column)) : data[i].type->create_column(); } return columns; @@ -655,9 +655,11 @@ void Block::clear_column_data(int64_t column_size) noexcept { } for (auto& d : data) { if (d.column) { - // Temporarily disable reference count check because a column might be referenced multiple times within a block. - // Queries like this: `select c, c from t1;` - (*std::move(d.column)).assume_mutable()->clear(); + if (d.column->is_exclusive()) { + d.column->assume_mutable()->clear(); + } else { + d.column = d.column->clone_empty(); + } } } } @@ -1085,7 +1087,13 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector& char_ for (auto idx : char_type_idx) { if (idx < data.size()) { auto& col_and_name = this->get_by_position(idx); - col_and_name.column->assume_mutable()->shrink_padding_chars(); + if (col_and_name.column->is_exclusive()) { + col_and_name.column->assume_mutable()->shrink_padding_chars(); + } else { + auto mutable_col = std::move(*col_and_name.column).mutate(); + mutable_col->shrink_padding_chars(); + col_and_name.column = std::move(mutable_col); + } } } } diff --git a/be/src/core/column/column.cpp b/be/src/core/column/column.cpp index b0056e3d4377bd..3fea47f93887ec 100644 --- a/be/src/core/column/column.cpp +++ b/be/src/core/column/column.cpp @@ -232,10 +232,11 @@ bool is_column_const(const IColumn& column) { void IColumn::check_const_only_in_top_level() const { ColumnCallback throw_if_const = [&](WrappedPtr& column) { - if (is_column_const(*column)) { + const ColumnPtr& col = const_cast(column); + if (is_column_const(*col)) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "const column is not allowed to be nested, but got {}", - column->get_name()); + col->get_name()); } }; const_cast(this)->for_each_subcolumn(throw_if_const); diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h index d20ecc9d820846..c48c7a55da84cd 100644 --- a/be/src/core/column/column.h +++ b/be/src/core/column/column.h @@ -581,16 +581,20 @@ class IColumn : public COW { MutablePtr mutate() const&& { MutablePtr res = shallow_mutate(); - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } static MutablePtr mutate(Ptr ptr) { MutablePtr res = ptr->shallow_mutate(); /// Now use_count is 2. ptr.reset(); /// Reset use_count to 1. - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } diff --git a/be/src/core/column/column_array.cpp b/be/src/core/column/column_array.cpp index 6de4d96cc326f7..7022d8f5aaaf70 100644 --- a/be/src/core/column/column_array.cpp +++ b/be/src/core/column/column_array.cpp @@ -63,7 +63,9 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of // } // #endif check_const_only_in_top_level(); - const auto* offsets_concrete = typeid_cast(offsets.get()); + // Use const access to avoid triggering assume_mutable_ref() during construction. + const auto* offsets_concrete = + typeid_cast(static_cast(offsets).get()); if (!offsets_concrete) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "offsets_column must be a ColumnUInt64"); @@ -98,6 +100,21 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } +ColumnArray::ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column) { + if (is_column_const(*nested_column)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "const column is not allowed to be nested, but got {}", + nested_column->get_name()); + } + if (is_column_const(*offsets_column)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "const column is not allowed to be nested, but got {}", + offsets_column->get_name()); + } + static_cast(data) = std::move(nested_column); + static_cast(offsets) = std::move(offsets_column); +} + void ColumnArray::shrink_padding_chars() { data->shrink_padding_chars(); } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index c11547bdbf5e2d..81c75f774d243a 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -75,6 +75,10 @@ class ColumnArray final : public COWHelper { /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr&& nested_column); + /** Create an array column with shared (possibly non-exclusive) nested column and offsets. */ + struct SharedTag {}; + ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column); + ColumnArray(const ColumnArray&) = default; ColumnArray() = default; @@ -98,12 +102,16 @@ class ColumnArray final : public COWHelper { using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) { - return ColumnArray::create(nested_column->assume_mutable(), - offsets_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + return Base::create(SharedTag {}, nested_column, offsets_column); } static MutablePtr create(const ColumnPtr& nested_column) { - return ColumnArray::create(nested_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + ColumnPtr empty_offsets = ColumnOffsets::create(); + return Base::create(SharedTag {}, nested_column, std::move(empty_offsets)); } template empty() != create_with_empty) { + const IColumn& col = get_data_column(); + if (col.empty() != create_with_empty) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, " "create_with_empty: {}.", - data->size(), create_with_empty); + col.size(), create_with_empty); } - if (data->size() != 1 && !create_with_empty) { + if (col.size() != 1 && !create_with_empty) { throw doris::Exception( ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, must be 1.", - data->size()); + col.size()); } } @@ -108,7 +109,10 @@ void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_ } void ColumnConst::replace_float_special_values() { - data->replace_float_special_values(); + // COW: get exclusive ownership of data before mutating + auto mutable_data = IColumn::mutate(std::move(static_cast(data))); + mutable_data->replace_float_special_values(); + data = std::move(mutable_data); } std::pair check_column_const_set_readability(const IColumn& column, diff --git a/be/src/core/column/column_const.h b/be/src/core/column/column_const.h index 92a86628526384..7f648ece468dd1 100644 --- a/be/src/core/column/column_const.h +++ b/be/src/core/column/column_const.h @@ -240,7 +240,8 @@ class ColumnConst final : public COWHelper { bool has_enough_capacity(const IColumn& src) const override { return true; } int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override { - auto rhs_const_column = assert_cast(rhs); + const auto& rhs_const_column = + assert_cast(rhs); const auto* this_nullable = check_and_get_column(data.get()); const auto* rhs_nullable = @@ -321,7 +322,11 @@ class ColumnConst final : public COWHelper { size_t deserialize_impl(const char* pos) override { ++s; - return data->deserialize_impl(pos); + ColumnPtr owned = std::move(static_cast(data)); + auto mutable_data = IColumn::mutate(std::move(owned)); + size_t ret = mutable_data->deserialize_impl(pos); + data = std::move(mutable_data); + return ret; } void replace_float_special_values() override; diff --git a/be/src/core/column/column_map.cpp b/be/src/core/column/column_map.cpp index 48db377d888b75..30019da155f3b6 100644 --- a/be/src/core/column/column_map.cpp +++ b/be/src/core/column/column_map.cpp @@ -518,35 +518,54 @@ void ColumnMap::insert_range_from_ignore_overflow(const IColumn& src, size_t sta } ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const { - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); + // For const filter we must clone subcolumns so the original ColumnMap remains intact. + // IColumn::mutate(copy) clones if use_count>1, returns self if exclusive. + auto offsets_mut = IColumn::mutate(static_cast(offsets_column)); + MutableColumnPtr offsets_copy = offsets_mut->clone_empty(); + offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(IColumn::mutate(static_cast(keys_column)), + std::move(offsets_mut)) + ->filter(filt, result_size_hint); + auto v_arr = ColumnArray::create(IColumn::mutate(static_cast(values_column)), + std::move(offsets_copy)) + ->filter(filt, result_size_hint); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), assert_cast(*k_arr).get_offsets_ptr()); } size_t ColumnMap::filter(const Filter& filter) { - MutableColumnPtr copied_off = offsets_column->clone_empty(); - copied_off->insert_range_from(*offsets_column, 0, offsets_column->size()); - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filter); - ColumnArray::create(values_column->assume_mutable(), copied_off->assume_mutable()) - ->filter(filter); - return get_offsets().size(); + // Move subcolumns out of this ColumnMap to get exclusive ownership, then write back. + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + auto offsets_mut = IColumn::mutate(std::move(static_cast(offsets_column))); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + // Clone offsets for values (both keys and values share the same offsets structure) + MutableColumnPtr copied_off = offsets_mut->clone_empty(); + copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(std::move(keys_mut), std::move(offsets_mut)); + k_arr->filter(filter); + auto v_arr = ColumnArray::create(std::move(values_mut), std::move(copied_off)); + v_arr->filter(filter); + // Put filtered subcolumns back + static_cast(keys_column) = k_arr->get_data_ptr(); + static_cast(offsets_column) = k_arr->get_offsets_ptr(); + static_cast(values_column) = v_arr->get_data_ptr(); + // Use const access to avoid assume_mutable_ref() on the just-written-back offsets_column + // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of scope) + return static_cast(offsets_column)->size(); } MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { - // Make a temp column array - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); + // Const permute: clone subcolumns so the original ColumnMap remains intact. + auto offsets_mut = IColumn::mutate(static_cast(offsets_column)); + MutableColumnPtr offsets_copy = offsets_mut->clone_empty(); + offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(IColumn::mutate(static_cast(keys_column)), + std::move(offsets_mut)) + ->permute(perm, limit); + auto v_arr = ColumnArray::create(IColumn::mutate(static_cast(values_column)), + std::move(offsets_copy)) + ->permute(perm, limit); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), @@ -554,23 +573,27 @@ MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const } Status ColumnMap::deduplicate_keys(bool recursive) { - const auto inner_rows = keys_column->size(); - const auto rows = offsets_column->size(); + const IColumn& ck = *static_cast(keys_column); + const IColumn& co = *static_cast(offsets_column); + const auto inner_rows = ck.size(); + const auto rows = co.size(); if (recursive) { - auto values_column_ = values_column; + const IColumn::Ptr& values_ptr = static_cast(values_column); + IColumn::Ptr values_column_ = values_ptr; if (values_column_->is_nullable()) { - values_column_ = (assert_cast(*values_column)).get_nested_column_ptr(); + values_column_ = + assert_cast(*values_column_).get_nested_column_ptr(); } if (auto* values_map = check_and_get_column(values_column_.get())) { - RETURN_IF_ERROR(values_map->deduplicate_keys(recursive)); + RETURN_IF_ERROR(const_cast(values_map)->deduplicate_keys(recursive)); } } DorisVector serialized_keys(inner_rows); - const size_t max_one_row_byte_size = keys_column->get_max_row_byte_size(); + const size_t max_one_row_byte_size = ck.get_max_row_byte_size(); size_t total_bytes = max_one_row_byte_size * inner_rows; Arena pool; @@ -579,7 +602,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { // reach mem limit, don't serialize in batch const char* begin = nullptr; for (size_t i = 0; i != inner_rows; ++i) { - serialized_keys[i] = keys_column->serialize_value_into_arena(i, pool, begin); + serialized_keys[i] = ck.serialize_value_into_arena(i, pool, begin); } } else { auto* serialized_key_buffer = reinterpret_cast(pool.alloc(total_bytes)); @@ -590,7 +613,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { serialized_keys[i].size = 0; } - keys_column->serialize(serialized_keys.data(), inner_rows); + ck.serialize(serialized_keys.data(), inner_rows); } auto new_offsets = COffsets::create(); @@ -598,7 +621,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { auto& new_offsets_data = new_offsets->get_data(); IColumn::Filter filter(inner_rows, 1); - auto& offsets = get_offsets(); + const auto& offsets = static_cast(this)->get_offsets(); Offset64 offset = 0; bool has_duplicated_key = false; @@ -636,8 +659,12 @@ Status ColumnMap::deduplicate_keys(bool recursive) { if (has_duplicated_key) { offsets_column = std::move(new_offsets); - keys_column->filter(filter); - values_column->filter(filter); + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + keys_mut->filter(filter); + static_cast(keys_column) = std::move(keys_mut); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + values_mut->filter(filter); + static_cast(values_column) = std::move(values_mut); } return Status::OK(); diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index 12f8fe4f8184ab..e1fb77ca9056b6 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -61,8 +61,9 @@ class ColumnMap final : public COWHelper { static MutablePtr create(const ColumnPtr& keys, const ColumnPtr& values, const ColumnPtr& offsets) { - return ColumnMap::create(keys->assume_mutable(), values->assume_mutable(), - offsets->assume_mutable()); + // Mutate to ensure exclusive ownership required by the constructor's non-const WrappedPtr access. + return ColumnMap::create(IColumn::mutate(keys), IColumn::mutate(values), + IColumn::mutate(offsets)); } template (get_null_map_column()).get_data().data(); if (_nested_column->support_replace_column_null_data()) { // nullmap process is slow, replace null data to default value to avoid nullmap process - _nested_column->assume_mutable()->replace_column_null_data(real_null_data); + // This is an intentional in-place mutation inside a logically-const hash computation: + // null positions are overwritten with defaults so the inner hash loop needs no null checks. + auto nested_mut = std::move(*static_cast(_nested_column)).mutate(); + nested_mut->replace_column_null_data(real_null_data); + static_cast(const_cast(_nested_column)) = std::move(nested_mut); _nested_column->update_crc32c_batch(hashes, nullptr); } else { auto s = size(); @@ -380,12 +384,15 @@ size_t ColumnNullable::filter(const Filter& filter) { Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { auto* nullable_col_ptr = assert_cast(col_ptr); - WrappedPtr nest_col_ptr = nullable_col_ptr->_nested_column; + // Access the nested column via const path to avoid assume_mutable_ref (which requires + // exclusive ownership). The output col_ptr was just created, so its nested column is exclusive. + IColumn* nest_col_raw = const_cast( + static_cast(nullable_col_ptr->_nested_column).get()); /// `get_null_map_data` will set `_need_update_has_null` to true auto& res_nullmap = nullable_col_ptr->get_null_map_data(); - RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_ptr.get())); + RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_raw)); DCHECK(res_nullmap.empty()); res_nullmap.resize(sel_size); auto& cur_nullmap = get_null_map_column().get_data(); diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index 4a9d82102a4f45..9566b36cea3c32 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -64,8 +64,10 @@ class ColumnNullable final : public COWHelper { */ using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column_, const ColumnPtr& null_map_) { - return ColumnNullable::create(nested_column_->assume_mutable(), - null_map_->assume_mutable()); + // Mutate to ensure exclusive ownership: the constructor accesses subcolumns via non-const + // WrappedPtr path which requires use_count() == 1. IColumn::mutate(Ptr) safely clones if + // shared (use_count > 1) or takes ownership if already exclusive. + return ColumnNullable::create(IColumn::mutate(nested_column_), IColumn::mutate(null_map_)); } template diff --git a/be/src/core/column/column_varbinary.h b/be/src/core/column/column_varbinary.h index 673059194face5..caad77e28ad44f 100644 --- a/be/src/core/column/column_varbinary.h +++ b/be/src/core/column/column_varbinary.h @@ -44,7 +44,12 @@ class ColumnVarbinary final : public COWHelper { private: ColumnVarbinary() = default; ColumnVarbinary(const size_t n) : _data(n) {} - ColumnVarbinary(const ColumnVarbinary& src) : _data(src._data.begin(), src._data.end()) {} + ColumnVarbinary(const ColumnVarbinary& src) { + _data.reserve(src._data.size()); + for (const auto& value : src._data) { + insert_data(value.data(), value.size()); + } + } public: std::string get_name() const override { return "ColumnVarbinary"; } diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index dd9429c9ebfd5b..48fa8731f45f8f 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -484,7 +484,7 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { auto& finalized_object = assert_cast(*finalized); return finalized_object.apply_for_columns(std::forward(func)); } - auto new_root = func(get_root())->assume_mutable(); + auto new_root = std::move(*func(get_root())).mutate(); auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& subcolumn : subcolumns) { @@ -492,16 +492,16 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { continue; } auto new_subcolumn = func(subcolumn->data.get_finalized_column_ptr()); - if (!res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(), + if (!res->add_sub_column(subcolumn->path, std::move(*new_subcolumn).mutate(), subcolumn->data.get_least_common_type())) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error", subcolumn->path.get_path()); } } auto sparse_column = func(serialized_sparse_column); - res->serialized_sparse_column = sparse_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(std::move(sparse_column)); auto doc_value_column = func(serialized_doc_value_column); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_doc_value_column = IColumn::mutate(std::move(doc_value_column)); res->num_rows = res->serialized_sparse_column->size(); ENABLE_CHECK_CONSISTENCY(res.get()); return res; @@ -942,6 +942,10 @@ bool ColumnVariant::Subcolumn::is_null_at(size_t n) const { } ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + return true; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -972,6 +976,11 @@ void ColumnVariant::Subcolumn::get(size_t n, FieldWithDataType& res) const { ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + res = FieldWithDataType(Field()); + return; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -2059,14 +2068,13 @@ Status ColumnVariant::serialize_sparse_columns( /// directly as NestedGroup data by the writer (VariantColumnWriterImpl). void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumns) const { entry->data.finalize(); - auto nested_column = entry->data.get_finalized_column_ptr()->assume_mutable(); + auto nested_column = std::move(*entry->data.get_finalized_column_ptr()).mutate(); auto* nested_column_nullable = assert_cast(nested_column.get()); auto* nested_column_array = - assert_cast(nested_column_nullable->get_nested_column_ptr().get()); + assert_cast(&nested_column_nullable->get_nested_column()); auto& offset = nested_column_array->get_offsets_ptr(); - auto* nested_object_nullable = assert_cast( - nested_column_array->get_data_ptr()->assume_mutable().get()); + auto* nested_object_nullable = assert_cast(&nested_column_array->get_data()); auto& nested_object_column = assert_cast(nested_object_nullable->get_nested_column()); PathInData nested_path = entry->path; @@ -2082,13 +2090,18 @@ void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumn path_builder.append(nested_entry->path.get_parts(), true); auto subnested_column = ColumnArray::create( ColumnNullable::create(nested_entry->data.get_finalized_column_ptr(), - nested_object_nullable->get_null_map_column_ptr()), + static_cast(nested_object_nullable) + ->get_null_map_column() + .get_ptr()), offset); - auto nullable_subnested_column = ColumnNullable::create( - std::move(subnested_column), nested_column_nullable->get_null_map_column_ptr()); + auto nullable_subnested_column = + ColumnNullable::create(std::move(subnested_column), + static_cast(nested_column_nullable) + ->get_null_map_column() + .get_ptr()); auto type = make_nullable( std::make_shared(nested_entry->data.least_common_type.get())); - Subcolumn subcolumn(nullable_subnested_column->assume_mutable(), type, is_nullable); + Subcolumn subcolumn(std::move(nullable_subnested_column), type, is_nullable); res_subcolumns.add(path_builder.build(), subcolumn); } } @@ -2101,7 +2114,24 @@ void ColumnVariant::clear_sparse_column() { } #endif - serialized_sparse_column->clear(); + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); +} + +void ColumnVariant::ensure_binary_columns_rows() { + auto resize_if_empty = [this](WrappedPtr& column) { + const auto& const_column = static_cast(column); + if (const_column->size() == num_rows) { + return; + } + CHECK(const_column->empty()) + << "ColumnVariant binary column size mismatch, rows: " << num_rows + << ", column rows: " << const_column->size(); + auto mutable_column = IColumn::mutate(std::move(static_cast(column))); + mutable_column->resize(num_rows); + column = std::move(mutable_column); + }; + resize_if_empty(serialized_sparse_column); + resize_if_empty(serialized_doc_value_column); } Status ColumnVariant::convert_typed_path_to_storage_type( @@ -2216,6 +2246,7 @@ Status ColumnVariant::pick_subcolumns_to_sparse_column( } void ColumnVariant::finalize(FinalizeMode mode) { + ensure_binary_columns_rows(); if (is_finalized() && mode == FinalizeMode::READ_MODE) { _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2263,6 +2294,7 @@ void ColumnVariant::finalize(FinalizeMode mode) { std::swap(subcolumns, new_subcolumns); _prev_positions.clear(); + ensure_binary_columns_rows(); ENABLE_CHECK_CONSISTENCY(this); } @@ -2313,7 +2345,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { ENABLE_CHECK_CONSISTENCY(res.get()); return res; } - auto new_root = get_root()->filter(filter, count)->assume_mutable(); + auto new_root = std::move(*get_root()->filter(filter, count)).mutate(); auto new_column = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& entry : subcolumns) { @@ -2321,7 +2353,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { continue; } auto subcolumn = entry->data.get_finalized_column().filter(filter, -1); - new_column->add_sub_column(entry->path, subcolumn->assume_mutable(), + new_column->add_sub_column(entry->path, std::move(*subcolumn).mutate(), entry->data.get_least_common_type()); } new_column->serialized_sparse_column = serialized_sparse_column->filter(filter, count); @@ -2368,8 +2400,10 @@ void ColumnVariant::clear() { // we must keep root column exist empty.create_root(Subcolumn(0, is_nullable, true)); std::swap(empty, subcolumns); - serialized_sparse_column->clear(); - serialized_doc_value_column->clear(); + // Reassign to fresh empty columns to avoid requiring exclusive ownership. + // The existing columns may be shared (use_count > 1) so we cannot clear them in-place. + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); + serialized_doc_value_column = ColumnPtr(create_binary_column_fn()); num_rows = 0; _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2769,10 +2803,26 @@ void ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null MutableColumnPtr ColumnVariant::clone() const { auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode); + // Copy typed_path_count and nested_path_count so the subcolumn limit logic is consistent. + res->typed_path_count = typed_path_count; + res->nested_path_count = nested_path_count; Subcolumns new_subcolumns; for (const auto& subcolumn : subcolumns) { - auto new_subcolumn = subcolumn->data; - if (subcolumn->data.is_root) { + // Struct-copy all metadata (num_rows, num_of_defaults_in_prefix, + // current_num_of_defaults, data_types, etc.), then deep-clone data WrappedPtrs. + Subcolumn new_subcolumn = subcolumn->data; + for (auto& wp : new_subcolumn.data) { + static_cast(wp) = + std::move(*static_cast(wp)).mutate(); + } + // Flush pending lazy defaults into actual data so that the cloned subcolumn + // is self-consistent (current_num_of_defaults == 0 after clone). + if (new_subcolumn.current_num_of_defaults > 0) { + size_t pending = new_subcolumn.current_num_of_defaults; + new_subcolumn.current_num_of_defaults = 0; + new_subcolumn.insert_many_defaults(pending); + } + if (subcolumn->data.is_root || subcolumn->path.empty()) { new_subcolumns.create_root(std::move(new_subcolumn)); } else if (!new_subcolumns.add(subcolumn->path, std::move(new_subcolumn))) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error in clone()", @@ -2783,13 +2833,8 @@ MutableColumnPtr ColumnVariant::clone() const { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "root is nullptr in clone()"); } res->subcolumns = std::move(new_subcolumns); - auto&& column = serialized_sparse_column->get_ptr(); - auto sparse_column = std::move(*column).mutate(); - res->serialized_sparse_column = sparse_column->assume_mutable(); - - auto&& new_doc_value_column = serialized_doc_value_column->get_ptr(); - auto doc_value_column = std::move(*new_doc_value_column).mutate(); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(serialized_sparse_column->get_ptr()); + res->serialized_doc_value_column = IColumn::mutate(serialized_doc_value_column->get_ptr()); res->set_num_rows(num_rows); ENABLE_CHECK_CONSISTENCY(res.get()); diff --git a/be/src/core/column/column_variant.h b/be/src/core/column/column_variant.h index 16ced2f529118f..1ae92afd54cccc 100644 --- a/be/src/core/column/column_variant.h +++ b/be/src/core/column/column_variant.h @@ -325,7 +325,7 @@ class ColumnVariant final : public COWHelper { if (subcolumns.empty()) { return nullptr; } - return subcolumns.get_mutable_root()->data.get_finalized_column_ptr()->assume_mutable(); + return std::move(*subcolumns.get_mutable_root()->data.get_finalized_column_ptr()).mutate(); } void serialize_one_row_to_string(int64_t row, std::string* output, @@ -354,6 +354,8 @@ class ColumnVariant final : public COWHelper { void clear_sparse_column(); + void ensure_binary_columns_rows(); + // root is null or type nothing bool is_null_root() const; @@ -409,8 +411,12 @@ class ColumnVariant final : public COWHelper { ColumnPtr get_sparse_column() const { return serialized_sparse_column; } + IColumn& get_sparse_column_mutable() { return *serialized_sparse_column; } + ColumnPtr get_doc_value_column() const { return serialized_doc_value_column; } + IColumn& get_doc_value_column_mutable() { return *serialized_doc_value_column; } + // use sparse_subcolumns_schema to record sparse column's path info and type static MutableColumnPtr create_binary_column_fn() { return ColumnMap::create(ColumnString::create(), ColumnString::create(), diff --git a/be/src/core/cow.h b/be/src/core/cow.h index fcac631aa83ce1..a0dd93bf545d20 100644 --- a/be/src/core/cow.h +++ b/be/src/core/cow.h @@ -25,6 +25,9 @@ #include #include +#include "common/exception.h" +#include "common/status.h" + namespace doris { /** Copy-on-write shared ptr. @@ -313,9 +316,19 @@ class COW { public: MutablePtr mutate() const&& { return shallow_mutate(); } - MutablePtr assume_mutable() const { return const_cast(this)->get_ptr(); } + MutablePtr assume_mutable() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(this)->get_ptr(); + } - Derived& assume_mutable_ref() const { return const_cast(*derived()); } + Derived& assume_mutable_ref() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(*derived()); + } protected: /// It works as immutable_ptr if it is const and as mutable_ptr if it is non const. diff --git a/be/src/core/data_type/data_type_array.cpp b/be/src/core/data_type/data_type_array.cpp index c30cabe26c745b..3c7545c7490d45 100644 --- a/be/src/core/data_type/data_type_array.cpp +++ b/be/src/core/data_type/data_type_array.cpp @@ -120,8 +120,9 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column memcpy(offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // children - auto nested_column = data_column->get_data_ptr()->assume_mutable(); + auto nested_column = std::move(*data_column->get_data_ptr()).mutate(); buf = get_nested_type()->deserialize(buf, &nested_column, be_exec_version); + data_column->get_data_ptr() = std::move(nested_column); return buf; } diff --git a/be/src/core/data_type/data_type_map.cpp b/be/src/core/data_type/data_type_map.cpp index 0932bf47c218bd..043fd7a70248f3 100644 --- a/be/src/core/data_type/data_type_map.cpp +++ b/be/src/core/data_type/data_type_map.cpp @@ -135,10 +135,12 @@ const char* DataTypeMap::deserialize(const char* buf, MutableColumnPtr* column, memcpy(map_offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // key value - auto nested_keys_column = map_column->get_keys_ptr()->assume_mutable(); - auto nested_values_column = map_column->get_values_ptr()->assume_mutable(); + auto nested_keys_column = std::move(*map_column->get_keys_ptr()).mutate(); + auto nested_values_column = std::move(*map_column->get_values_ptr()).mutate(); buf = get_key_type()->deserialize(buf, &nested_keys_column, be_exec_version); buf = get_value_type()->deserialize(buf, &nested_values_column, be_exec_version); + map_column->get_keys_ptr() = std::move(nested_keys_column); + map_column->get_values_ptr() = std::move(nested_values_column); return buf; } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/core/data_type/data_type_struct.cpp b/be/src/core/data_type/data_type_struct.cpp index 0770899d661129..873e3159ae536c 100644 --- a/be/src/core/data_type/data_type_struct.cpp +++ b/be/src/core/data_type/data_type_struct.cpp @@ -214,8 +214,9 @@ const char* DataTypeStruct::deserialize(const char* buf, MutableColumnPtr* colum auto* struct_column = assert_cast(origin_column); DCHECK(elems.size() == struct_column->tuple_size()); for (size_t i = 0; i < elems.size(); ++i) { - auto child_column = struct_column->get_column_ptr(i)->assume_mutable(); + auto child_column = std::move(*struct_column->get_column_ptr(i)).mutate(); buf = elems[i]->deserialize(buf, &child_column, be_exec_version); + struct_column->get_column_ptr(i) = std::move(child_column); } return buf; } diff --git a/be/src/exec/common/arrow_column_to_doris_column.cpp b/be/src/exec/common/arrow_column_to_doris_column.cpp index cd6e959596791b..645376ee12d7a7 100644 --- a/be/src/exec/common/arrow_column_to_doris_column.cpp +++ b/be/src/exec/common/arrow_column_to_doris_column.cpp @@ -100,10 +100,12 @@ Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arr Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const cctz::time_zone& ctz) { - RETURN_IF_ERROR(type->get_serde()->read_column_from_arrow( - doris_column->assume_mutable_ref(), arrow_column, arrow_batch_cur_idx, - arrow_batch_cur_idx + num_elements, ctz)); - return Status::OK(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); + auto status = type->get_serde()->read_column_from_arrow( + *mutable_column, arrow_column, arrow_batch_cur_idx, arrow_batch_cur_idx + num_elements, + ctz); + doris_column = std::move(mutable_column); + return status; } } // namespace doris diff --git a/be/src/exec/common/hash_table/hash_map_context.h b/be/src/exec/common/hash_table/hash_map_context.h index 479256a475ce3c..6d355e7d9561fa 100644 --- a/be/src/exec/common/hash_table/hash_map_context.h +++ b/be/src/exec/common/hash_table/hash_map_context.h @@ -955,7 +955,7 @@ struct MethodKeysFixed : public MethodBase { const auto* nullmap = assert_cast(*nullmap_columns[j]).get_data().data(); // make sure null cell is filled by 0x0 - key_columns[j]->assume_mutable()->replace_column_null_data(nullmap); + const_cast(key_columns[j])->replace_column_null_data(nullmap); } auto* __restrict current = result_data + offset; for (size_t i = 0; i < row_numbers; ++i) { diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 767f107649126c..09ac52075cd9ce 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -435,7 +435,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(), data_type_object.enable_doc_mode()); - variant->create_root(arg.type, arg.column->assume_mutable()); + variant->create_root(arg.type, std::move(*arg.column).mutate()); ColumnPtr nullable = ColumnNullable::create( variant->get_ptr(), check_and_get_column(arg.column.get())->get_null_map_column_ptr()); @@ -2119,9 +2119,8 @@ void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length, } } column_variant.incr_num_rows(); - auto sparse_column = column_variant.get_sparse_column(); - if (sparse_column->size() == old_num_rows) { - sparse_column->assume_mutable()->insert_default(); + if (column_variant.get_sparse_column()->size() == old_num_rows) { + column_variant.get_sparse_column_mutable().insert_default(); } #ifndef NDEBUG column_variant.check_consistency(); @@ -2218,10 +2217,10 @@ Status _parse_and_materialize_variant_columns(Block& block, for (size_t i = 0; i < variant_pos.size(); ++i) { auto column_ref = block.get_by_position(variant_pos[i]).column; bool is_nullable = column_ref->is_nullable(); - MutableColumnPtr var_column = column_ref->assume_mutable(); + MutableColumnPtr var_column = std::move(*column_ref).mutate(); if (is_nullable) { const auto& nullable = assert_cast(*column_ref); - var_column = nullable.get_nested_column_ptr()->assume_mutable(); + var_column = std::move(*nullable.get_nested_column_ptr()).mutate(); } auto& var = assert_cast(*var_column); var_column->finalize(); @@ -2265,7 +2264,7 @@ Status _parse_and_materialize_variant_columns(Block& block, auto expected_root_type = make_nullable(std::make_shared()); var.ensure_root_node_type(expected_root_type); - variant_column = var.assume_mutable(); + variant_column = std::move(var_column); } // Wrap variant with nullmap if it is nullable diff --git a/be/src/exec/exchange/local_exchanger.cpp b/be/src/exec/exchange/local_exchanger.cpp index 620aae737050d6..a248940dc63c81 100644 --- a/be/src/exec/exchange/local_exchanger.cpp +++ b/be/src/exec/exchange/local_exchanger.cpp @@ -167,6 +167,7 @@ Status ShuffleExchanger::get_block(RuntimeState* state, Block* block, bool* eos, mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( block, partitioned_block.first->_data_block); RETURN_IF_ERROR(get_data()); + block->set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); } @@ -212,7 +213,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const std::vectorsize() > 0); + DCHECK(shuffle_idx_to_instance_idx && !shuffle_idx_to_instance_idx->empty()); const auto& map = *shuffle_idx_to_instance_idx; int32_t enqueue_rows = 0; for (const auto& it : map) { @@ -425,6 +426,7 @@ Status BroadcastExchanger::get_block(RuntimeState* state, Block* block, bool* eo RETURN_IF_ERROR(mutable_block.add_rows(&block_wrapper->_data_block, partitioned_block.second.offset_start, partitioned_block.second.length)); + block->set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); @@ -573,6 +575,9 @@ Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, Block* block mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( block, partitioned_block.first->_data_block); RETURN_IF_ERROR(get_data()); + if (mutable_block.rows() > 0) { + block->set_columns(std::move(mutable_block.mutable_columns())); + } } return Status::OK(); } diff --git a/be/src/exec/operator/aggregation_sink_operator.cpp b/be/src/exec/operator/aggregation_sink_operator.cpp index f6a9c2cdc4211d..0808361ad74f86 100644 --- a/be/src/exec/operator/aggregation_sink_operator.cpp +++ b/be/src/exec/operator/aggregation_sink_operator.cpp @@ -299,16 +299,20 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(Block* block) { for (int i = 0; i < key_size; ++i) { if constexpr (for_spill) { - key_columns[i] = block->get_by_position(i).column.get(); key_locs[i] = i; } else { int& result_column_id = key_locs[i]; RETURN_IF_ERROR( Base::_shared_state->probe_expr_ctxs[i]->execute(block, &result_column_id)); block->replace_by_position_if_const(result_column_id); - key_columns[i] = block->get_by_position(result_column_id).column.get(); } - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(key_locs[i]).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(key_locs[i]).column = std::move(mutable_col); + key_columns[i] = block->get_by_position(key_locs[i]).column.get(); + } } size_t rows = block->rows(); @@ -491,8 +495,13 @@ Status AggSinkLocalState::_execute_with_serialized_key_helper(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_col); + } key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp index 58f47001185983..8cb58b2d532b95 100644 --- a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp +++ b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp @@ -175,8 +175,11 @@ Status BucketedAggSinkLocalState::_execute_with_serialized_key(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/cache_source_operator.cpp b/be/src/exec/operator/cache_source_operator.cpp index aec8206f54b682..06731ff8ed54c0 100644 --- a/be/src/exec/operator/cache_source_operator.cpp +++ b/be/src/exec/operator/cache_source_operator.cpp @@ -156,7 +156,9 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = output_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*output_block)); + auto mutable_block = MutableBlock::build_mutable_block(block); + RETURN_IF_ERROR(mutable_block.merge(*output_block)); + block->set_columns(std::move(mutable_block.mutable_columns())); local_state._current_query_cache_rows += output_block->rows(); auto mem_consume = output_block->allocated_bytes(); local_state._current_query_cache_bytes += mem_consume; @@ -179,7 +181,9 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = hit_cache_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*hit_cache_block)); + auto mutable_block = MutableBlock::build_mutable_block(block); + RETURN_IF_ERROR(mutable_block.merge(*hit_cache_block)); + block->set_columns(std::move(mutable_block.mutable_columns())); if (!local_state._hit_cache_column_orders.empty()) { auto datas = block->get_columns_with_type_and_name(); block->clear(); diff --git a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp index 298896401d6f3e..92c11cf2896154 100644 --- a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp @@ -162,7 +162,13 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = IColumn::mutate( + std::move(in_block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_col); + key_columns[i] = in_block->get_by_position(result_column_id).column.get(); + } result_idxs[i] = result_column_id; } } @@ -210,18 +216,22 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( if (out_block->rows() + _distinct_row.size() > batch_size) { size_t split_size = batch_size - out_block->rows(); for (int i = 0; i < key_size; ++i) { - auto output_dst = out_block->get_by_position(i).column->assume_mutable(); + auto output_dst = + IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(output_dst, _distinct_row, 0, split_size); - auto cache_dst = _cache_block.get_by_position(i).column->assume_mutable(); + out_block->get_by_position(i).column = std::move(output_dst); + auto cache_dst = + IColumn::mutate(std::move(_cache_block.get_by_position(i).column)); key_columns[i]->append_data_by_selector(cache_dst, _distinct_row, split_size, _distinct_row.size()); + _cache_block.get_by_position(i).column = std::move(cache_dst); } } else { for (int i = 0; i < key_size; ++i) { - auto output_column = out_block->get_by_position(i).column; - auto dst = output_column->assume_mutable(); + auto dst = IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(dst, _distinct_row); + out_block->get_by_position(i).column = std::move(dst); } } } diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp b/be/src/exec/operator/hashjoin_build_sink.cpp index 9f1a05876f8bb5..eb68668a043eb9 100644 --- a/be/src/exec/operator/hashjoin_build_sink.cpp +++ b/be/src/exec/operator/hashjoin_build_sink.cpp @@ -574,7 +574,9 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo for (auto& data : block) { data.column = std::move(*data.column).mutate()->convert_column_if_overflow(); if (p._need_finalize_variant_column) { - std::move(*data.column).mutate()->finalize(); + auto mutable_column = IColumn::mutate(std::move(data.column)); + mutable_column->finalize(); + data.column = std::move(mutable_column); } } diff --git a/be/src/exec/operator/hashjoin_build_sink.h b/be/src/exec/operator/hashjoin_build_sink.h index dcc76031c2c6df..0da7dddc1106e4 100644 --- a/be/src/exec/operator/hashjoin_build_sink.h +++ b/be/src/exec/operator/hashjoin_build_sink.h @@ -230,7 +230,7 @@ struct ProcessHashTableBuild { // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_build_raw_ptrs.size() == 1 && null_map && *has_null_key) { - _build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data()); + const_cast(_build_raw_ptrs[0])->replace_column_null_data(null_map->data()); } hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, diff --git a/be/src/exec/operator/join/process_hash_table_probe_impl.h b/be/src/exec/operator/join/process_hash_table_probe_impl.h index 5bfd2ff4e0cbfc..bcc4408906bf54 100644 --- a/be/src/exec/operator/join/process_hash_table_probe_impl.h +++ b/be/src/exec/operator/join/process_hash_table_probe_impl.h @@ -164,7 +164,10 @@ void ProcessHashTableProbe::probe_side_output_column(MutableColumns& for (int i = 0; i < _left_output_slot_flags.size(); ++i) { if (_left_output_slot_flags[i]) { if (_parent_operator->need_finalize_variant_column()) { - std::move(*probe_block.get_by_position(i).column).mutate()->finalize(); + auto mutable_column = + IColumn::mutate(std::move(probe_block.get_by_position(i).column)); + mutable_column->finalize(); + probe_block.get_by_position(i).column = std::move(mutable_column); } } @@ -200,7 +203,8 @@ typename HashTableType::State ProcessHashTableProbe::_init_probe_sid // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_parent->_probe_columns.size() == 1 && null_map) { if (simd::contain_one(null_map, probe_rows)) { - _parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map); + const_cast(_parent->_probe_columns[0]) + ->replace_column_null_data(null_map); } } @@ -650,9 +654,11 @@ Status ProcessHashTableProbe::finalize_block_with_filter(Block* outp ->get_data_column_ptr(); auto& src = source_block->get_by_position(column_id).column; - auto dst = output_block->get_by_position(output_column_id).column->assume_mutable(); + auto dst = IColumn::mutate( + std::move(output_block->get_by_position(output_column_id).column)); dst->clear(); insert_with_indexs(dst, src, container, all_match_one); + output_block->get_by_position(output_column_id).column = std::move(dst); } }; do_lazy_materialize(_right_output_slot_flags, _build_indexs, (int)_right_col_idx, @@ -717,14 +723,17 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(Block* output_b return Status::OK(); } - auto mark_column_mutable = - output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable(); - auto& mark_column = assert_cast(*mark_column_mutable); - IColumn::Filter& filter = assert_cast(mark_column.get_nested_column()).get_data(); + auto mark_column_mutable = IColumn::mutate( + std::move(output_block->get_by_position(_parent->_mark_column_id).column)); + auto* mark_column = assert_cast(mark_column_mutable.get()); + IColumn::Filter& filter = + assert_cast(mark_column->get_nested_column()).get_data(); + auto& null_map_column = mark_column->get_null_map_column(); + output_block->replace_by_position(_parent->_mark_column_id, std::move(mark_column_mutable)); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_parent->_mark_join_conjuncts, output_block, - mark_column.get_null_map_column(), filter)); + null_map_column, filter)); uint8_t* mark_filter_data = filter.data(); - uint8_t* mark_null_map = mark_column.get_null_map_data().data(); + uint8_t* mark_null_map = mark_column->get_null_map_data().data(); if (is_null_aware_join) { // For null aware anti/semi join, if the equal conjuncts was not matched and the build side has null value, diff --git a/be/src/exec/operator/nested_loop_join_probe_operator.cpp b/be/src/exec/operator/nested_loop_join_probe_operator.cpp index 7a3be55cbb1988..b83178b4f4e816 100644 --- a/be/src/exec/operator/nested_loop_join_probe_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_probe_operator.cpp @@ -78,10 +78,12 @@ Status NestedLoopJoinProbeLocalState::close(RuntimeState* state) { void NestedLoopJoinProbeLocalState::_update_additional_flags(Block* block) { auto& p = _parent->cast(); if (p._is_mark_join) { - auto mark_column = block->get_by_position(block->columns() - 1).column->assume_mutable(); + auto mark_column = + IColumn::mutate(std::move(block->get_by_position(block->columns() - 1).column)); if (mark_column->size() < block->rows()) { ColumnFilterHelper(*mark_column).resize_fill(block->rows(), 1); } + block->replace_by_position(block->columns() - 1, std::move(mark_column)); } } diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp index 2168c24a262fd7..d03b3898360a7a 100644 --- a/be/src/exec/operator/operator.cpp +++ b/be/src/exec/operator/operator.cpp @@ -362,8 +362,8 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, MutableBlock mutable_block = VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_output_row_descriptor); + auto& mutable_columns = mutable_block.mutable_columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); DCHECK_EQ(mutable_columns.size(), local_state->_projections.size()) << debug_string(); for (int i = 0; i < mutable_columns.size(); ++i) { ColumnPtr column_ptr; @@ -379,8 +379,8 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, insert_column_datas(mutable_columns[i], column_ptr, rows); } DCHECK(mutable_block.rows() == rows); - output_block->set_columns(std::move(mutable_columns)); } + output_block->set_columns(std::move(mutable_columns)); local_state->_estimate_memory_usage += bytes_usage; diff --git a/be/src/exec/operator/repeat_operator.cpp b/be/src/exec/operator/repeat_operator.cpp index 82ffa633056a41..b0aa6989a35f34 100644 --- a/be/src/exec/operator/repeat_operator.cpp +++ b/be/src/exec/operator/repeat_operator.cpp @@ -154,6 +154,7 @@ Status RepeatLocalState::get_repeated_block(Block* input_block, int repeat_id_id RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, output_columns, repeat_id_idx)); DCHECK_EQ(cur_col, output_column_size); + output_block->set_columns(std::move(m_block.mutable_columns())); return Status::OK(); } @@ -237,6 +238,7 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, Block* output_block, bo std::size_t cur_col = 0; RETURN_IF_ERROR( local_state.add_grouping_id_column(rows, cur_col, columns, _repeat_id_idx)); + output_block->set_columns(std::move(m_block.mutable_columns())); _repeat_id_idx++; if (_repeat_id_idx >= _repeat_id_list_size) { diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index 030e49b54d48c0..3d5922573b90e4 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -21,6 +21,7 @@ #include +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "exec/operator/operator.h" #include "runtime/runtime_profile.h" @@ -256,10 +257,16 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* e if (src_block.rows()) { // block->check_number_of_rows(); for (int i = 0; i < _slot_num; ++i) { - MutableColumnPtr column_ptr = std::move(*block->get_by_position(i).column).mutate(); - column_ptr->insert_range_from( - *src_block.safe_get_by_position(_slot_offsets[i]).column, 0, - src_block.rows()); + MutableColumnPtr column_ptr = + IColumn::mutate(std::move(block->get_by_position(i).column)); + ColumnPtr src_column = src_block.safe_get_by_position(_slot_offsets[i]) + .column->convert_to_full_column_if_const(); + if (column_ptr->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(column_ptr->is_nullable() == src_column->is_nullable()); + column_ptr->insert_range_from(*src_column, 0, src_block.rows()); + block->replace_by_position(i, std::move(column_ptr)); } DCHECK_EQ(block->columns(), _dest_tuple_desc->slots().size()); RETURN_IF_ERROR(local_state.filter_block(local_state._conjuncts, block)); diff --git a/be/src/exec/operator/streaming_aggregation_operator.cpp b/be/src/exec/operator/streaming_aggregation_operator.cpp index 5744b288a4487e..b15e6de0d3f70d 100644 --- a/be/src/exec/operator/streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/streaming_aggregation_operator.cpp @@ -330,8 +330,11 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo in_block->get_by_position(result_column_id).column = in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(in_block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/table_function_operator.cpp b/be/src/exec/operator/table_function_operator.cpp index 09e74f580dd1c2..fd97e8d69c68a8 100644 --- a/be/src/exec/operator/table_function_operator.cpp +++ b/be/src/exec/operator/table_function_operator.cpp @@ -560,6 +560,7 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state, Block* o for (auto index : p._useless_slot_indexs) { columns[index]->insert_many_defaults(row_size - columns[index]->size()); } + output_block->set_columns(std::move(columns)); { SCOPED_TIMER(_filter_timer); // 3. eval conjuncts diff --git a/be/src/exec/operator/union_sink_operator.h b/be/src/exec/operator/union_sink_operator.h index 4842ab6b243903..bdfb4a7303126e 100644 --- a/be/src/exec/operator/union_sink_operator.h +++ b/be/src/exec/operator/union_sink_operator.h @@ -168,9 +168,10 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXset_columns(std::move(mblock.mutable_columns())); } return Status::OK(); } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exec/operator/union_source_operator.cpp b/be/src/exec/operator/union_source_operator.cpp index a484f1e4a324ba..9547c9a8184bcf 100644 --- a/be/src/exec/operator/union_source_operator.cpp +++ b/be/src/exec/operator/union_source_operator.cpp @@ -177,6 +177,9 @@ Status UnionSourceOperatorX::get_next_const(RuntimeState* state, Block* block) { tmp_block.clear(); } } + if (mblock.rows() > 0) { + block->set_columns(std::move(mblock.mutable_columns())); + } // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);" // the const expr will be in output expr cause the union node return a empty block. so here we diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index f97bce17a8c6a4..a62cf420794e57 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -1090,7 +1090,8 @@ Status RowIdStorageReader::read_doris_format_row( } } else { for (int x = 0; x < slots.size(); ++x) { - MutableColumnPtr column = result_block.get_by_position(x).column->assume_mutable(); + MutableColumnPtr column = + IColumn::mutate(std::move(result_block.get_by_position(x).column)); IteratorKey iterator_key {.tablet_id = tablet_id, .rowset_id = rowset_id, .segment_id = segment_id, @@ -1106,6 +1107,7 @@ Status RowIdStorageReader::read_doris_format_row( full_read_schema, &slots[x], row_id, column, iterator_item.storage_read_options, iterator_item.iterator)); } + result_block.replace_by_position(x, std::move(column)); } } return Status::OK(); diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index 5f1d248c1e1f4d..f621050e337d8f 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -438,8 +438,10 @@ Status FileScanner::_process_runtime_filters_partition_prune(bool& can_filter_al if (!first_column_filled) { // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 // The following process may be tricky and time-consuming, but we have no other way. - _runtime_filter_partition_prune_block.get_by_position(0).column->assume_mutable()->resize( - partition_value_column_size); + auto column = IColumn::mutate( + std::move(_runtime_filter_partition_prune_block.get_by_position(0).column)); + column->resize(partition_value_column_size); + _runtime_filter_partition_prune_block.replace_by_position(0, std::move(column)); } IColumn::Filter result_filter(_runtime_filter_partition_prune_block.rows(), 1); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_runtime_filter_partition_prune_ctxs, nullptr, @@ -778,11 +780,11 @@ Status FileScanner::_convert_to_output_block(Block* block) { auto& mutable_output_columns = mutable_output_block.mutable_columns(); std::vector* skip_bitmaps {nullptr}; + MutableColumnPtr skip_bitmap_column; if (_should_process_skip_bitmap_col()) { - auto* skip_bitmap_nullable_col_ptr = - assert_cast(_src_block_ptr->get_by_position(_skip_bitmap_col_idx) - .column->assume_mutable() - .get()); + skip_bitmap_column = IColumn::mutate( + std::move(_src_block_ptr->get_by_position(_skip_bitmap_col_idx).column)); + auto* skip_bitmap_nullable_col_ptr = assert_cast(skip_bitmap_column.get()); skip_bitmaps = &(assert_cast( skip_bitmap_nullable_col_ptr->get_nested_column_ptr().get()) ->get_data()); @@ -799,6 +801,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { } } } + _src_block_ptr->replace_by_position(_skip_bitmap_col_idx, std::move(skip_bitmap_column)); } // for (auto slot_desc : _output_tuple_desc->slots()) { @@ -865,6 +868,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { mutable_output_columns[j]->insert_range_from(*column_ptr, 0, rows); ctx_idx++; } + block->set_columns(std::move(mutable_output_columns)); // after do the dest block insert operation, clear _src_block to remove the reference of origin column _src_block_ptr->clear(); diff --git a/be/src/exec/scan/meta_scanner.cpp b/be/src/exec/scan/meta_scanner.cpp index adf1aabe4b8903..52892882f7bcbb 100644 --- a/be/src/exec/scan/meta_scanner.cpp +++ b/be/src/exec/scan/meta_scanner.cpp @@ -112,21 +112,14 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof columns.resize(column_size); for (auto i = 0; i < column_size; i++) { if (mem_reuse) { - columns[i] = block->get_by_position(i).column->assume_mutable(); + columns[i] = IColumn::mutate(std::move(block->get_by_position(i).column)); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } } // fill block RETURN_IF_ERROR(_fill_block_with_remote_data(columns)); - if (_meta_eos == true) { - if (block->rows() == 0) { - *eof = true; - } - break; - } - // Before really use the Block, must clear other ptr of column in block - // So here need do std::move and clear in `columns` + const bool empty_result = columns.empty() || columns.front()->empty(); if (!mem_reuse) { int column_index = 0; for (const auto slot_desc : _tuple_desc->slots()) { @@ -135,7 +128,13 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof slot_desc->col_name())); } } else { - columns.clear(); + block->set_columns(std::move(columns)); + } + if (_meta_eos == true) { + if (empty_result) { + *eof = true; + } + break; } VLOG_ROW << "VMetaScanNode output rows: " << block->rows(); } while (block->rows() == 0 && !(*eof)); diff --git a/be/src/exec/scan/scanner.cpp b/be/src/exec/scan/scanner.cpp index 97f12d1195c4a0..51199bf71e4917 100644 --- a/be/src/exec/scan/scanner.cpp +++ b/be/src/exec/scan/scanner.cpp @@ -216,7 +216,7 @@ Status Scanner::_do_projections(Block* origin_block, Block* output_block) { if (mutable_columns[i]->is_nullable() != column_ptr->is_nullable()) { throw Exception(ErrorCode::INTERNAL_ERROR, "Nullable mismatch"); } - mutable_columns[i] = column_ptr->assume_mutable(); + mutable_columns[i] = IColumn::mutate(std::move(column_ptr)); } output_block->set_columns(std::move(mutable_columns)); diff --git a/be/src/exec/scan/scanner.h b/be/src/exec/scan/scanner.h index bd56dc0f08ffd2..ca64b98d24a2e4 100644 --- a/be/src/exec/scan/scanner.h +++ b/be/src/exec/scan/scanner.h @@ -115,8 +115,9 @@ class Scanner { if (_padding_block.empty()) { _padding_block.swap(_origin_block); } else if (_origin_block.rows()) { - RETURN_IF_ERROR( - MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block)); + auto mutable_block = MutableBlock::build_mutable_block(&_padding_block); + RETURN_IF_ERROR(mutable_block.merge(_origin_block)); + _padding_block.set_columns(std::move(mutable_block.mutable_columns())); } return Status::OK(); } diff --git a/be/src/exec/sink/vtablet_block_convertor.cpp b/be/src/exec/sink/vtablet_block_convertor.cpp index b567b599cfa3bf..e59fa923375998 100644 --- a/be/src/exec/sink/vtablet_block_convertor.cpp +++ b/be/src/exec/sink/vtablet_block_convertor.cpp @@ -238,8 +238,8 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B } } - const auto* tmp_column_ptr = check_and_get_column(*orig_column); - const auto& tmp_real_column_ptr = + auto tmp_column_ptr = check_and_get_column(*orig_column); + auto tmp_real_column_ptr = tmp_column_ptr == nullptr ? orig_column : (tmp_column_ptr->get_nested_column_ptr()); const auto* column_string = assert_cast(tmp_real_column_ptr.get()); const auto* null_map = @@ -281,13 +281,22 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B {len_column, len_type, "len"}, {nullptr, input_type, "result"}}); RETURN_IF_ERROR(func->execute(nullptr, tmp_block, {0, 1, 2}, 3, row_count)); - column_string = - assert_cast(tmp_block.get_by_position(3).column.get()); - orig_column = - orig_column->is_nullable() - ? ColumnNullable::create(tmp_block.get_by_position(3).column, - tmp_column_ptr->get_null_map_column_ptr()) - : std::move(tmp_block.get_by_position(3).column); + auto result_column = + IColumn::mutate(std::move(tmp_block.get_by_position(3).column)); + if (orig_column->is_nullable()) { + orig_column = ColumnNullable::create( + std::move(result_column), + IColumn::mutate(tmp_column_ptr->get_null_map_column_ptr())); + } else { + orig_column = std::move(result_column); + } + tmp_column_ptr = check_and_get_column(*orig_column); + tmp_real_column_ptr = tmp_column_ptr == nullptr + ? orig_column + : tmp_column_ptr->get_nested_column_ptr(); + column_string = assert_cast(tmp_real_column_ptr.get()); + null_map = tmp_column_ptr == nullptr ? nullptr + : tmp_column_ptr->get_null_map_data().data(); } for (size_t j = 0; j < row_count; ++j) { auto row = rows ? (*rows)[j] : j; diff --git a/be/src/exec/sort/partition_sorter.cpp b/be/src/exec/sort/partition_sorter.cpp index 64422a202c236f..89be3b90dc6fb1 100644 --- a/be/src/exec/sort/partition_sorter.cpp +++ b/be/src/exec/sort/partition_sorter.cpp @@ -121,6 +121,7 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ if (current->impl->is_last(step) && current->impl->pos == 0) { if (merged_rows != 0) { // return directly for next time's read swap whole block + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } // swap and return block directly when we should get all data from cursor @@ -147,6 +148,7 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ } } + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } @@ -178,6 +180,7 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch // rank() maybe need check when have get a distinct row // so when the cmp_res is get a distinct row, need check have output all rows num if (_get_enough_data()) { + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } *_previous_row = *current; @@ -196,6 +199,7 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch } } + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } diff --git a/be/src/exec/sort/vsorted_run_merger.cpp b/be/src/exec/sort/vsorted_run_merger.cpp index ce4440c3178343..b4c142cd4f1287 100644 --- a/be/src/exec/sort/vsorted_run_merger.cpp +++ b/be/src/exec/sort/vsorted_run_merger.cpp @@ -194,6 +194,7 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { current->next(); if (_need_more_data(current)) { do_insert(); + output_block->set_columns(std::move(merged_columns)); return Status::OK(); } } diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index 42b3bc87af6d9d..cbd929824d21d6 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -187,7 +187,8 @@ struct AggregateJavaUdafData { RETURN_NOT_OK_STATUS_WITH_WARN(Jni::Env::Get(&env), "Java-Udaf get value function"); Block output_block; - output_block.insert(ColumnWithTypeAndName(to.get_ptr(), result_type, "_result_")); + output_block.insert( + ColumnWithTypeAndName(result_type->create_column(), result_type, "_result_")); auto output_table_schema = JniDataBridge::parse_table_schema(&output_block); std::string output_nullable = result_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, @@ -203,7 +204,11 @@ struct AggregateJavaUdafData { .with_arg(output_map) .call(&output_address)); - return JniDataBridge::fill_block(&output_block, {0}, output_address); + RETURN_IF_ERROR(JniDataBridge::fill_block(&output_block, {0}, output_address)); + const auto& result_column = output_block.get_by_position(0).column; + DORIS_CHECK(result_column->size() == 1); + to.insert_from(*result_column, 0); + return Status::OK(); } private: diff --git a/be/src/exprs/aggregate/aggregate_function_null_v2.h b/be/src/exprs/aggregate/aggregate_function_null_v2.h index aa2c9f3bb39792..a3b513d6014116 100644 --- a/be/src/exprs/aggregate/aggregate_function_null_v2.h +++ b/be/src/exprs/aggregate/aggregate_function_null_v2.h @@ -259,8 +259,7 @@ class AggregateFunctionNullBaseInlineV2 : public IAggregateFunctionHelperget_nested_column().assume_mutable().get(); + const IColumn* src_nested_column = &src_nullable_col->get_nested_column(); if (src_nullable_col->has_null()) { for (size_t i = 0; i < num_rows; ++i) { if (!src_null_map_data[i]) { diff --git a/be/src/exprs/aggregate/aggregate_function_sort.h b/be/src/exprs/aggregate/aggregate_function_sort.h index e001cb0c4c419d..2a7530e817fd3b 100644 --- a/be/src/exprs/aggregate/aggregate_function_sort.h +++ b/be/src/exprs/aggregate/aggregate_function_sort.h @@ -46,33 +46,27 @@ namespace doris { struct AggregateFunctionSortData { const SortDescription sort_desc; - Block block; + // The aggregate state is the sole owner of these columns and appends rows in add(), which is + // a hot path. Keep the long-lived state as MutableBlock and only materialize temporary Block + // views for APIs that require immutable Block input. + MutableBlock block; // The construct only support the template compiler, useless AggregateFunctionSortData() : sort_desc() {}; AggregateFunctionSortData(SortDescription sort_desc, const Block& block) : sort_desc(std::move(sort_desc)), block(block.clone_empty()) {} - void merge(const AggregateFunctionSortData& rhs) { - if (block.rows() == 0) { - block = rhs.block; - } else { - for (size_t i = 0; i < block.columns(); i++) { - auto column = block.get_by_position(i).column->assume_mutable(); - auto column_rhs = rhs.block.get_by_position(i).column; - column->insert_range_from(*column_rhs, 0, rhs.block.rows()); - } - } - } + void merge(const AggregateFunctionSortData& rhs) { append_block(rhs, 0, rhs.block.rows()); } void serialize(const RuntimeState* state, BufferWritable& buf) const { PBlock pblock; size_t uncompressed_bytes = 0; size_t compressed_bytes = 0; int64_t compressed_time = 0; - auto st = block.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, - &compressed_bytes, &compressed_time, - segment_v2::CompressionTypePB::NO_COMPRESSION); + auto block_view = to_block_view(); + auto st = block_view.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, &compressed_time, + segment_v2::CompressionTypePB::NO_COMPRESSION); if (!st.ok()) { throw doris::Exception(st); } @@ -88,12 +82,14 @@ struct AggregateFunctionSortData { pblock.ParseFromString(data); [[maybe_unused]] size_t uncompressed_size = 0; [[maybe_unused]] int64_t uncompressed_time = 0; - auto st = block.deserialize(pblock, &uncompressed_size, &uncompressed_time); + Block deserialized_block; + auto st = deserialized_block.deserialize(pblock, &uncompressed_size, &uncompressed_time); // If memory allocate failed during deserialize, st is not ok, throw exception here to // stop the query. if (!st.ok()) { throw doris::Exception(st); } + block = MutableBlock(std::move(deserialized_block)); } void add(const IColumn** columns, size_t columns_num, size_t row_num) { @@ -102,14 +98,40 @@ struct AggregateFunctionSortData { block.columns(), columns_num); for (size_t i = 0; i < columns_num; ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - column->insert_from(*columns[i], row_num); + block.get_column_by_position(i)->insert_from(*columns[i], row_num); } } void sort() { + auto block_view = to_block_view(); + auto sorted_block = block_view.clone_empty(); HybridSorter hybrid_sorter; - sort_block(block, block, sort_desc, hybrid_sorter, block.rows()); + sort_block(block_view, sorted_block, sort_desc, hybrid_sorter, block_view.rows()); + block = MutableBlock(std::move(sorted_block)); + } + +private: + void append_block(const AggregateFunctionSortData& rhs, size_t start, size_t length) { + DCHECK_EQ(block.columns(), rhs.block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + DCHECK(block.get_datatype_by_position(i)->equals( + *rhs.block.get_datatype_by_position(i))) + << "lhs type: " << block.get_datatype_by_position(i)->get_name() + << ", rhs type: " << rhs.block.get_datatype_by_position(i)->get_name(); + block.get_column_by_position(i)->insert_range_from(*rhs.block.get_column_by_position(i), + start, length); + } + } + + Block to_block_view() const { + ColumnsWithTypeAndName columns_with_schema; + columns_with_schema.reserve(block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + columns_with_schema.emplace_back( + static_cast(*block.get_column_by_position(i)).get_ptr(), + block.get_datatype_by_position(i), ""); + } + return {std::move(columns_with_schema)}; } }; @@ -177,7 +199,7 @@ class AggregateFunctionSort final ColumnRawPtrs arguments_nested; for (int i = 0; i < _arguments.size() - _sort_desc.size(); i++) { arguments_nested.emplace_back( - this->data(place).block.get_by_position(i).column.get()); + this->data(place).block.get_column_by_position(i).get()); } _nested_func->add_batch_single_place(arguments_nested[0]->size(), diff --git a/be/src/exprs/function/array/function_array_flatten.cpp b/be/src/exprs/function/array/function_array_flatten.cpp index 03086f37008788..3f76bcfb015e4a 100644 --- a/be/src/exprs/function/array/function_array_flatten.cpp +++ b/be/src/exprs/function/array/function_array_flatten.cpp @@ -55,23 +55,23 @@ class FunctionArrayFlatten : public IFunction { auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto* src_column_array_ptr = - assert_cast(remove_nullable(src_column)->assume_mutable().get()); - ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; + assert_cast(remove_nullable(src_column).get()); + const ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; DataTypePtr src_data_type = block.get_by_position(arguments[0]).type; auto* src_data_type_array = assert_cast(remove_nullable(src_data_type).get()); - auto result_column_offsets = - assert_cast(src_column_array_ptr->get_offsets_column()) - .clone(); + auto result_column_offsets = assert_cast( + src_column_array_ptr->get_offsets_column()) + .clone(); auto* offsets = assert_cast(result_column_offsets.get()) ->get_data() .data(); while (src_data_type_array->get_nested_type()->get_primitive_type() == TYPE_ARRAY) { - nested_src_column_array_ptr = assert_cast( - remove_nullable(src_column_array_ptr->get_data_ptr())->assume_mutable().get()); + nested_src_column_array_ptr = assert_cast( + remove_nullable(src_column_array_ptr->get_data_ptr()).get()); for (size_t i = 0; i < input_rows_count; ++i) { offsets[i] = nested_src_column_array_ptr->get_offsets()[offsets[i] - 1]; diff --git a/be/src/exprs/function/cast/cast_to_variant.h b/be/src/exprs/function/cast/cast_to_variant.h index acc8ed9e7f6492..c616a2d764cf20 100644 --- a/be/src/exprs/function/cast/cast_to_variant.h +++ b/be/src/exprs/function/cast/cast_to_variant.h @@ -29,19 +29,34 @@ inline Status cast_from_variant_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const NullMap::value_type* null_map, const DataTypePtr& data_type_to) { - const auto& col_with_type_and_name = block.get_by_position(arguments[0]); - const auto& col_from = col_with_type_and_name.column; + auto& col_with_type_and_name = block.get_by_position(arguments[0]); + auto& col_from = col_with_type_and_name.column; const IColumn* variant_column = col_from.get(); if (const auto* nullable = check_and_get_column(*variant_column)) { variant_column = &nullable->get_nested_column(); } - const auto& variant = assert_cast(*variant_column); - ColumnPtr col_to = data_type_to->create_column(); - if (!variant.is_finalized()) { + if (!assert_cast(*variant_column).is_finalized()) { // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - variant.assume_mutable()->finalize(); + auto mutable_column = IColumn::mutate(std::move(col_with_type_and_name.column)); + if (auto* nullable = check_and_get_column(*mutable_column)) { + const auto& const_nullable = assert_cast(*nullable); + auto nested_column = IColumn::mutate(const_nullable.get_nested_column_ptr()); + assert_cast(*nested_column).finalize(); + ColumnPtr nested_column_ptr = std::move(nested_column); + nullable->change_nested_column(nested_column_ptr); + } else { + assert_cast(*mutable_column).finalize(); + } + col_with_type_and_name.column = std::move(mutable_column); + } + + variant_column = col_with_type_and_name.column.get(); + if (const auto* nullable = check_and_get_column(*variant_column)) { + variant_column = &nullable->get_nested_column(); } + const auto& variant = assert_cast(*variant_column); + ColumnPtr col_to = data_type_to->create_column(); // It's important to convert as many elements as possible in this context. For instance, // if the root of this variant column is a number column, converting it to a number column @@ -152,7 +167,7 @@ struct CastToVariant { auto variant = ColumnVariant::create( variant_type ? variant_type->variant_max_subcolumns_count() : 0, variant_type ? variant_type->enable_doc_mode() : false); - variant->create_root(from_type, col_from->assume_mutable()); + variant->create_root(from_type, IColumn::mutate(col_from)); block.replace_by_position(result, std::move(variant)); return Status::OK(); } @@ -186,4 +201,4 @@ WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, }; } -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/be/src/exprs/function/function.cpp b/be/src/exprs/function/function.cpp index f1e44bb4c2ac40..590eb63829200d 100644 --- a/be/src/exprs/function/function.cpp +++ b/be/src/exprs/function/function.cpp @@ -67,8 +67,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } if (!mutable_result_null_map_column) { - mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); + mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); } NullMap& result_null_map = @@ -80,6 +79,12 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } } + // Commit merged null map back: result_null_map_column was moved into + // mutable_result_null_map_column when merging 2+ nullable args with nulls. + if (mutable_result_null_map_column) { + result_null_map_column = std::move(mutable_result_null_map_column); + } + if (!result_null_map_column) { if (is_column_const(*src)) { return ColumnConst::create( diff --git a/be/src/exprs/function/function_bitmap.cpp b/be/src/exprs/function/function_bitmap.cpp index 3f2c388efb85bf..35341f297640b0 100644 --- a/be/src/exprs/function/function_bitmap.cpp +++ b/be/src/exprs/function/function_bitmap.cpp @@ -681,11 +681,11 @@ void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count) { - auto* nullable = assert_cast(src.get()); - ColumnPtr src_not_nullable = nullable->get_nested_column_ptr(); - MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable(); + MutableColumnPtr mutable_src = IColumn::mutate(std::move(src)); + auto* nullable = assert_cast(mutable_src.get()); + auto* src_not_nullable_mutable = &nullable->get_nested_column(); auto* __restrict count_data = - assert_cast(src_not_nullable_mutable.get())->get_data().data(); + assert_cast(src_not_nullable_mutable)->get_data().data(); for (const auto& arg : args) { const ColumnWithTypeAndName& elem = block.get_by_position(arg); @@ -712,7 +712,7 @@ ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, } } - return src; + return mutable_src; } Status execute_bitmap_op_count_null_to_zero( diff --git a/be/src/exprs/function/function_variant_element.cpp b/be/src/exprs/function/function_variant_element.cpp index 344f05555c6035..4d56ffc4ea5909 100644 --- a/be/src/exprs/function/function_variant_element.cpp +++ b/be/src/exprs/function/function_variant_element.cpp @@ -148,8 +148,7 @@ class FunctionVariantElement : public IFunction { const auto& src_sparse_data_values = assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast(*target_ptr->get_sparse_column()->assume_mutable()) - .get_offsets(); + assert_cast(target_ptr->get_sparse_column_mutable()).get_offsets(); auto [sparse_data_paths, sparse_data_values] = target_ptr->get_sparse_data_paths_and_values(); StringRef prefix_ref(path.get_path()); @@ -190,7 +189,7 @@ class FunctionVariantElement : public IFunction { sparse_data_offsets.push_back(sparse_data_paths->size()); } target_ptr->get_subcolumns().create_root(root); - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); target_ptr->set_num_rows(src_ptr->size()); } @@ -211,9 +210,9 @@ class FunctionVariantElement : public IFunction { // Ordinary Variant extraction keeps the selected prefix in sparse data, matching the // source branch behavior. Only doc-mode columns keep extracted data in doc_value. auto& extracted_offsets = - assert_cast(*(write_to_doc_value ? target_ptr->get_doc_value_column() - : target_ptr->get_sparse_column()) - ->assume_mutable()) + assert_cast(write_to_doc_value + ? target_ptr->get_doc_value_column_mutable() + : target_ptr->get_sparse_column_mutable()) .get_offsets(); auto [extracted_paths, extracted_values] = write_to_doc_value ? target_ptr->get_doc_value_data_paths_and_values() @@ -251,9 +250,9 @@ class FunctionVariantElement : public IFunction { } target_ptr->get_subcolumns().create_root(root); if (write_to_doc_value) { - target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_sparse_column_mutable().resize(src_ptr->size()); } else { - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); } target_ptr->set_num_rows(src_ptr->size()); } @@ -323,7 +322,7 @@ class FunctionVariantElement : public IFunction { if (new_subcolumns.empty() && !nodes.empty()) { CHECK_EQ(nodes.size(), 1); new_subcolumns.create_root(ColumnVariant::Subcolumn { - nodes[0]->data.get_finalized_column_ptr()->assume_mutable(), + IColumn::mutate(nodes[0]->data.get_finalized_column_ptr()), nodes[0]->data.get_least_common_type(), true, true}); auto container = ColumnVariant::create(src.max_subcolumns_count(), src.enable_doc_mode(), @@ -349,12 +348,12 @@ class FunctionVariantElement : public IFunction { } result_col->insert_range_from(*container, 0, container->size()); } - *result = result_col->get_ptr(); // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - (*result)->assume_mutable()->finalize(); + result_col->finalize(); VLOG_DEBUG << "dump new object " << static_cast(result_col.get())->debug_string() << ", path " << path.get_path(); + *result = std::move(result_col); return Status::OK(); } } diff --git a/be/src/exprs/table_function/python_udtf_function.cpp b/be/src/exprs/table_function/python_udtf_function.cpp index a116a3d6785297..50313a2aa31287 100644 --- a/be/src/exprs/table_function/python_udtf_function.cpp +++ b/be/src/exprs/table_function/python_udtf_function.cpp @@ -250,8 +250,7 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( if (_return_type->is_nullable()) { nullable_col = assert_cast(array_col_ptr.get()); - array_col = assert_cast( - nullable_col->get_nested_column_ptr()->assume_mutable().get()); + array_col = assert_cast(&nullable_col->get_nested_column()); } else { array_col = assert_cast(array_col_ptr.get()); } @@ -264,8 +263,8 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( // Use read_column_from_arrow for optimized conversion // This directly converts Arrow ListArray to Doris ColumnArray // No struct unwrapping needed - Python server sends the correct format! - RETURN_IF_ERROR(array_serde->read_column_from_arrow( - array_col->assume_mutable_ref(), list_array.get(), 0, num_input_rows, _timezone_obj)); + RETURN_IF_ERROR(array_serde->read_column_from_arrow(*array_col, list_array.get(), 0, + num_input_rows, _timezone_obj)); // Handle nullable wrapper: all array elements are non-null // (empty arrays [] are non-null, different from NULL) diff --git a/be/src/exprs/table_function/udf_table_function.cpp b/be/src/exprs/table_function/udf_table_function.cpp index 4b6037f7ab1771..414766ef9157c3 100644 --- a/be/src/exprs/table_function/udf_table_function.cpp +++ b/be/src/exprs/table_function/udf_table_function.cpp @@ -123,10 +123,12 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { .with_arg(output_map) .call(&output_address)); RETURN_IF_ERROR(JniDataBridge::fill_block(block, {_result_column_idx}, output_address)); + _array_result_column = + IColumn::mutate(std::move(block->get_by_position(_result_column_idx).column)); block->erase(_result_column_idx); if (!extract_column_array_info(*_array_result_column, _array_column_detail)) { return Status::NotSupported("column type {} not supported now", - block->get_by_position(_result_column_idx).column->get_name()); + _array_result_column->get_name()); } return Status::OK(); } diff --git a/be/src/exprs/table_function/vexplode.cpp b/be/src/exprs/table_function/vexplode.cpp index 680e5ccff66ed1..0b8556229a4ee3 100644 --- a/be/src/exprs/table_function/vexplode.cpp +++ b/be/src/exprs/table_function/vexplode.cpp @@ -45,7 +45,8 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _detail.output_as_variant = true; _detail.variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -62,9 +63,10 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu _detail.nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), - ColumnUInt8::create(0)); - _array_column->assume_mutable()->insert_many_defaults(variant_column.size()); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); + array_column->insert_many_defaults(variant_column.size()); + _array_column = std::move(array_column); _detail.nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/table_function/vexplode_v2.cpp b/be/src/exprs/table_function/vexplode_v2.cpp index b21802690a84b8..62a4ab1d66ae92 100644 --- a/be/src/exprs/table_function/vexplode_v2.cpp +++ b/be/src/exprs/table_function/vexplode_v2.cpp @@ -51,7 +51,8 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _multi_detail[children_column_idx].output_as_variant = true; _multi_detail[children_column_idx].variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -68,10 +69,10 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co _multi_detail[children_column_idx].nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_columns[children_column_idx] = ColumnNullable::create( - ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0)); - _array_columns[children_column_idx]->assume_mutable()->insert_many_defaults( - variant_column.size()); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); + array_column->insert_many_defaults(variant_column.size()); + _array_columns[children_column_idx] = std::move(array_column); _multi_detail[children_column_idx].nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h index 382193276cad29..b8e274be82a7bb 100644 --- a/be/src/exprs/vcase_expr.h +++ b/be/src/exprs/vcase_expr.h @@ -217,9 +217,9 @@ class VCaseExpr final : public VExpr { if (!then_columns[i]) { continue; } - auto* __restrict column_raw_data = - assert_cast( - then_columns[i]->assume_mutable().get()) + const auto* __restrict column_raw_data = + assert_cast( + then_columns[i].get()) ->get_data() .data(); if constexpr (std::is_same_v || diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h index 5e83b0bb0283b7..4a53c29dc5da9d 100644 --- a/be/src/exprs/vcompound_pred.h +++ b/be/src/exprs/vcompound_pred.h @@ -180,8 +180,8 @@ class VCompoundPred : public VectorizedFnCall { } ColumnPtr rhs_column = nullptr; - uint8_t* __restrict rhs_data_column = nullptr; - uint8_t* __restrict rhs_null_map = nullptr; + const uint8_t* __restrict rhs_data_column = nullptr; + const uint8_t* __restrict rhs_null_map = nullptr; bool rhs_is_nullable = false; bool rhs_all_true = false; bool rhs_all_false = false; @@ -216,31 +216,36 @@ class VCompoundPred : public VectorizedFnCall { }; auto create_null_map_column = [&](ColumnPtr& null_map_column, - uint8_t* __restrict null_map_data) { + const uint8_t* __restrict null_map_data) { if (null_map_data == nullptr) { null_map_column = ColumnUInt8::create(size, 0); - null_map_data = assert_cast(null_map_column->assume_mutable().get()) - ->get_data() - .data(); + null_map_data = + assert_cast(null_map_column.get())->get_data().data(); } return null_map_data; }; auto vector_vector = [&]() { + MutableColumnPtr mutable_result_column; + uint8_t* __restrict result_data_column = nullptr; + const uint8_t* __restrict other_data_column = rhs_data_column; if (lhs_column->use_count() == 1) { - result_column = lhs_column; + mutable_result_column = IColumn::mutate(std::move(lhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } else if (rhs_column->use_count() == 1) { - result_column = rhs_column; - auto tmp_column = rhs_data_column; - rhs_data_column = lhs_data_column; - lhs_data_column = tmp_column; + mutable_result_column = IColumn::mutate(std::move(rhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); + other_data_column = lhs_data_column; } else { - auto col_res = lhs_column->clone_resized(size); - lhs_data_column = assert_cast(col_res.get())->get_data().data(); - result_column = std::move(col_res); + mutable_result_column = lhs_column->clone_resized(size); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } - do_not_null_pred(lhs_data_column, rhs_data_column, size); + do_not_null_pred(result_data_column, other_data_column, size); + result_column = std::move(mutable_result_column); }; auto vector_vector_null = [&]() { auto col_res = ColumnUInt8::create(size); @@ -349,7 +354,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_not_null_pred(uint8_t* __restrict lhs, uint8_t* __restrict rhs, size_t size) { + void static do_not_null_pred(uint8_t* __restrict lhs, const uint8_t* __restrict rhs, + size_t size) { #ifdef NDEBUG #if defined(__clang__) #pragma clang loop vectorize(enable) @@ -367,8 +373,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_null_pred(uint8_t* __restrict lhs_data, uint8_t* __restrict lhs_null, - uint8_t* __restrict rhs_data, uint8_t* __restrict rhs_null, + void static do_null_pred(const uint8_t* __restrict lhs_data, const uint8_t* __restrict lhs_null, + const uint8_t* __restrict rhs_data, const uint8_t* __restrict rhs_null, uint8_t* __restrict res_data, uint8_t* __restrict res_null, size_t size) { #ifdef NDEBUG @@ -394,22 +400,21 @@ class VCompoundPred : public VectorizedFnCall { [](const VExprSPtr& arg) -> bool { return arg->is_constant(); }); } - std::pair _get_raw_data_and_null_map(ColumnPtr column, - bool has_nullable_column) const { + std::pair _get_raw_data_and_null_map( + const ColumnPtr& column, bool has_nullable_column) const { if (has_nullable_column) { - auto* nullable_column = assert_cast(column->assume_mutable().get()); + const auto* nullable_column = assert_cast(column.get()); auto* data_column = - assert_cast(nullable_column->get_nested_column_ptr().get()) - ->get_data() - .data(); - auto* null_map = - assert_cast(nullable_column->get_null_map_column_ptr().get()) + assert_cast(nullable_column->get_nested_column_ptr().get()) ->get_data() .data(); + auto* null_map = assert_cast( + nullable_column->get_null_map_column_ptr().get()) + ->get_data() + .data(); return std::make_pair(data_column, null_map); } else { - auto* data_column = - assert_cast(column->assume_mutable().get())->get_data().data(); + auto* data_column = assert_cast(column.get())->get_data().data(); return std::make_pair(data_column, nullptr); } } diff --git a/be/src/format/arrow/arrow_stream_reader.cpp b/be/src/format/arrow/arrow_stream_reader.cpp index b91608ee3fafa1..d5b53dff3306e5 100644 --- a/be/src/format/arrow/arrow_stream_reader.cpp +++ b/be/src/format/arrow/arrow_stream_reader.cpp @@ -113,7 +113,7 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo } RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + *columns[c], column, 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } @@ -121,6 +121,7 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo *read_rows += batch.num_rows(); } + block->set_columns(std::move(columns)); *eof = (*read_rows == 0); return Status::OK(); } diff --git a/be/src/format/column_type_convert.cpp b/be/src/format/column_type_convert.cpp index cd71ffb5babb33..b7a8388b5be771 100644 --- a/be/src/format/column_type_convert.cpp +++ b/be/src/format/column_type_convert.cpp @@ -117,10 +117,10 @@ ColumnPtr ColumnTypeConverter::get_column(const DataTypePtr& src_type, ColumnPtr _cached_src_column->assume_mutable()->clear(); if (dst_type->is_nullable()) { - // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will - // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. - // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto* doris_nullable_column = static_cast(dst_column.get()); + // Seed the source nullable wrapper with the destination's current null map. Under the + // assert-mutability COW contract ColumnNullable::create() mutates/clones the subcolumns, so + // readers that append file nulls must copy back only the newly appended null-map slice. + const auto* doris_nullable_column = static_cast(dst_column.get()); return ColumnNullable::create(_cached_src_column, doris_nullable_column->get_null_map_column_ptr()); } diff --git a/be/src/format/column_type_convert.h b/be/src/format/column_type_convert.h index 04003c098f0d30..554e5a0c3662a2 100644 --- a/be/src/format/column_type_convert.h +++ b/be/src/format/column_type_convert.h @@ -44,6 +44,20 @@ namespace doris::converter { enum FileFormat { COMMON, ORC, PARQUET }; +// Helper: get the inner (non-nullable) mutable column from an exclusively-owned dst_col. +// - For non-nullable dst_col: returns a raw pointer to the column itself. +// - For nullable dst_col: returns a raw pointer to the nested (non-null) column. +// Must only be called when dst_col has exclusive ownership (use_count == 1). +// Returns IColumn* (raw pointer) to avoid creating a second owning MutableColumnPtr, +// which would violate COW invariant (use_count > 1). +inline IColumn* get_mutable_inner_col(MutableColumnPtr& dst_col) { + if (dst_col->is_nullable()) { + return static_cast(dst_col.get())->get_nested_column_ptr().get(); + } else { + return dst_col.get(); + } +} + template constexpr bool is_decimal_type() { return type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || @@ -165,13 +179,13 @@ class IntegerToIntegerConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { if constexpr (sizeof(DstCppType) < sizeof(SrcCppType)) { SrcCppType src_value = src_data[i]; @@ -212,7 +226,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -223,7 +237,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcCppType src_value = src_data[i]; if constexpr (is_integer_type()) { @@ -248,11 +262,11 @@ class BooleanToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i] != 0 ? "TRUE" : "FALSE"; string_col.insert_data(value.data(), value.size()); @@ -269,7 +283,7 @@ class NumericToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -279,7 +293,7 @@ class NumericToStringConverter : public ColumnTypeConverter { size_t rows = from_col->size(); size_t start_idx = to_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { if constexpr (SrcPrimitiveType == TYPE_FLOAT || SrcPrimitiveType == TYPE_DOUBLE) { if (fileFormat == FileFormat::ORC && std::isnan(src_data[i])) { @@ -318,11 +332,11 @@ class DecimalToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i].to_string(_scale); string_col.insert_data(value.data(), value.size()); @@ -339,11 +353,11 @@ class TimeToStringConverter : public ColumnTypeConverter { using SrcCppType = typename PrimitiveTypeTraits::CppType; using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); char buf[50]; for (int i = 0; i < rows; ++i) { int len = (reinterpret_cast(src_data[i])).to_buffer(buf); @@ -571,19 +585,19 @@ class CastStringConverter : public ColumnTypeConverter { } NullMap* null_map = nullptr; - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; if (dst_col->is_nullable()) { auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = nullable->get_nested_column_ptr().get(); null_map = &nullable->get_null_map_data(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } size_t rows = string_col->size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(to_col.get())->get_data(); + auto& data = assert_cast(to_col)->get_data(); CastParameters params; for (int i = 0; i < rows; ++i) { bool can_cast = false; @@ -628,7 +642,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -639,7 +653,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { const auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const SrcCppType& src_value = src_data[i]; @@ -680,13 +694,13 @@ class TimeV2Converter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const auto& src_value = reinterpret_cast(src_data[i]); auto& dst_value = reinterpret_cast(data[start_idx + i]); @@ -718,7 +732,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { using DstDorisType = typename PrimitiveTypeTraits::ColumnType::value_type; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -729,7 +743,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); auto max_result = DataTypeDecimal::get_max_digits_number(_precision); auto multiplier = DataTypeDecimal::get_scale_multiplier(_scale); @@ -804,13 +818,13 @@ class DecimalToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -889,13 +903,13 @@ class DecimalToDecimalConverter : public ColumnTypeConverter { bool narrow_integral = (_to_precision - _to_scale) < (_from_precision - _from_scale); ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcNativeType src_value = src_data[i].value; @@ -983,15 +997,15 @@ class VarBinaryConverter : public ColumnTypeConverter { from_col = &assert_cast(*src_col); } - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; // nullmap flag seems have been handled in upper level if (dst_col->is_nullable()) { const auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = const_cast(nullable)->get_nested_column_ptr().get(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } - auto* to_dst_column = assert_cast(to_col.get()); + auto* to_dst_column = assert_cast(to_col); for (size_t i = 0; i < from_col->size(); ++i) { auto string_ref = from_col->get_data_at(i); diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp index 539132c7c9f003..90340afafe1739 100644 --- a/be/src/format/csv/csv_reader.cpp +++ b/be/src/format/csv/csv_reader.cpp @@ -65,6 +65,19 @@ enum class FileCachePolicy : uint8_t; namespace doris { +namespace { + +size_t columns_byte_size(const std::vector& columns) { + size_t bytes = 0; + for (const auto& column : columns) { + DCHECK(column.get() != nullptr); + bytes += column->byte_size(); + } + return bytes; +} + +} // namespace + void EncloseCsvTextFieldSplitter::do_split(const Slice& line, std::vector* splitted_values) { const char* data = line.data; const auto& column_sep_positions = _text_line_reader_ctx->column_sep_positions(); @@ -437,7 +450,8 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) block->set_columns(std::move(mutate_columns)); } else { auto columns = block->mutate_columns(); - while (rows < batch_size && !_line_reader_eof && (block->bytes() < max_block_bytes)) { + while (rows < batch_size && !_line_reader_eof && + (columns_byte_size(columns) < max_block_bytes)) { const uint8_t* ptr = nullptr; size_t size = 0; RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx)); @@ -457,7 +471,7 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) } if (size == 0) { if (!_line_reader_eof && _state->is_read_csv_empty_line_as_null()) { - RETURN_IF_ERROR(_fill_empty_line(block, columns, &rows)); + RETURN_IF_ERROR(_fill_empty_line(columns, &rows)); } // Read empty line, continue continue; @@ -467,7 +481,7 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) if (!success) { continue; } - RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), block, columns, &rows)); + RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), columns, &rows)); } block->set_columns(std::move(columns)); } @@ -719,8 +733,8 @@ Status CsvReader::_deserialize_one_cell(DataTypeSerDeSPtr serde, IColumn& column return serde->deserialize_one_cell_from_csv(column, slice, _options); } -Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows) { +Status CsvReader::_fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows) { bool is_success = false; RETURN_IF_ERROR(_line_split_to_values(line, &is_success)); @@ -738,10 +752,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } if (_use_nullable_string_opt[i]) { @@ -758,15 +769,11 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, return Status::OK(); } -Status CsvReader::_fill_empty_line(Block* block, std::vector& columns, - size_t* rows) { +Status CsvReader::_fill_empty_line(std::vector& columns, size_t* rows) { for (int i = 0; i < _file_slot_descs.size(); ++i) { IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } auto& null_column = assert_cast(*col_ptr); null_column.insert_data(nullptr, 0); diff --git a/be/src/format/csv/csv_reader.h b/be/src/format/csv/csv_reader.h index 077f089e5e9a18..f619ce4d4a85e5 100644 --- a/be/src/format/csv/csv_reader.h +++ b/be/src/format/csv/csv_reader.h @@ -232,9 +232,9 @@ class CsvReader : public TableFormatReader { private: Status _create_decompressor(); Status _create_file_reader(bool need_schema); - Status _fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows); - Status _fill_empty_line(Block* block, std::vector& columns, size_t* rows); + Status _fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows); + Status _fill_empty_line(std::vector& columns, size_t* rows); Status _line_split_to_values(const Slice& line, bool* success); void _split_line(const Slice& line); void _init_system_properties(); diff --git a/be/src/format/jni/jni_data_bridge.cpp b/be/src/format/jni/jni_data_bridge.cpp index 9d5e37978a0118..fb1052c761e874 100644 --- a/be/src/format/jni/jni_data_bridge.cpp +++ b/be/src/format/jni/jni_data_bridge.cpp @@ -105,24 +105,27 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0 return Status::InternalError("Unsupported type {} in java side", data_type->get_name()); } + auto mutable_doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column; - if (doris_column->is_nullable()) { - auto* nullable_column = - reinterpret_cast(doris_column->assume_mutable().get()); + if (mutable_doris_column->is_nullable()) { + auto* nullable_column = assert_cast(mutable_doris_column.get()); data_column = nullable_column->get_nested_column_ptr(); NullMap& null_map = nullable_column->get_null_map_data(); size_t origin_size = null_map.size(); null_map.resize(origin_size + num_rows); memcpy(null_map.data() + origin_size, static_cast(null_map_ptr), num_rows); } else { - data_column = doris_column->assume_mutable(); + data_column = mutable_doris_column->get_ptr(); } // Date and DateTime are deprecated and not supported. + Status status = Status::OK(); switch (logical_type) { -#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ - case TYPE_INDEX: \ - return _fill_fixed_length_column( \ - data_column, reinterpret_cast(address.next_meta_as_ptr()), num_rows); +#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ + case TYPE_INDEX: { \ + auto* data = reinterpret_cast(address.next_meta_as_ptr()); \ + status = _fill_fixed_length_column(data_column, data, num_rows); \ + break; \ + } FOR_FIXED_LENGTH_TYPES(DISPATCH) #undef DISPATCH case PrimitiveType::TYPE_STRING: @@ -130,19 +133,27 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co case PrimitiveType::TYPE_CHAR: [[fallthrough]]; case PrimitiveType::TYPE_VARCHAR: - return _fill_string_column(address, data_column, num_rows); + status = _fill_string_column(address, data_column, num_rows); + break; case PrimitiveType::TYPE_ARRAY: - return _fill_array_column(address, data_column, data_type, num_rows); + status = _fill_array_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_MAP: - return _fill_map_column(address, data_column, data_type, num_rows); + status = _fill_map_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_STRUCT: - return _fill_struct_column(address, data_column, data_type, num_rows); + status = _fill_struct_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_VARBINARY: - return _fill_varbinary_column(address, data_column, num_rows); + status = _fill_varbinary_column(address, data_column, num_rows); + break; default: - return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name()); + status = Status::InvalidArgument("Unsupported type {} in jni scanner", + data_type->get_name()); + break; } - return Status::OK(); + doris_column = std::move(mutable_doris_column); + return status; } Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, diff --git a/be/src/format/lance/lance_rust_reader.cpp b/be/src/format/lance/lance_rust_reader.cpp index 166bbd52dcc519..2eed2356734ca3 100644 --- a/be/src/format/lance/lance_rust_reader.cpp +++ b/be/src/format/lance/lance_rust_reader.cpp @@ -230,6 +230,7 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool const auto num_columns = record_batch->num_columns(); // Convert Arrow columns to Doris Block columns (same pattern as PaimonCppReader) + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); @@ -238,16 +239,17 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); try { RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + *columns[block_pos], record_batch->column(c).get(), 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert Lance arrow to block: {}", e.what()); } } + block->set_columns(std::move(columns)); *read_rows = num_rows; *eof = false; return Status::OK(); diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp index bcb1a8d70f4b3f..cff7d595110263 100644 --- a/be/src/format/orc/vorc_reader.cpp +++ b/be/src/format/orc/vorc_reader.cpp @@ -116,6 +116,40 @@ namespace doris { // TODO: we need to determine it by test. static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits::max(); static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = ""; + +static void fill_orc_null_map(ColumnNullable* nullable_column, const orc::ColumnVectorBatch* cvb, + size_t num_values) { + NullMap& map_data_column = nullable_column->get_null_map_data(); + const auto origin_size = map_data_column.size(); + map_data_column.resize(origin_size + num_values); + if (cvb->hasNulls) { + const auto* cvb_nulls = cvb->notNull.data(); + for (int i = 0; i < num_values; ++i) { + map_data_column[origin_size + i] = !cvb_nulls[i]; + } + } else { + memset(map_data_column.data() + origin_size, 0, num_values); + } +} + +static void align_orc_null_map(const ColumnPtr& src_column, ColumnNullable* dst_nullable_column, + size_t src_null_map_start, size_t new_rows) { + auto& dst_null_map = dst_nullable_column->get_null_map_column(); + const size_t old_rows = dst_nullable_column->get_nested_column().size(); + const size_t expected_rows = old_rows + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_rows); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} // Because HIVE 0.11 & 0.12 does not support precision and scale for decimal // The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0) // We should set a default precision and scale for these orc files. @@ -2018,13 +2052,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle key column: if still missing, fill with default values if (key_is_missing) { // Fill key column with default values (nulls or empty values) - auto mutable_key_column = doris_key_column->assume_mutable(); + auto mutable_key_column = IColumn::mutate(std::move(doris_key_column)); if (mutable_key_column->is_nullable()) { auto* nullable_column = static_cast(mutable_key_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_key_column->insert_many_defaults(element_size); } + doris_key_column = std::move(mutable_key_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2035,13 +2070,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle value column: if still missing, fill with default values if (value_is_missing) { // Fill value column with default values (nulls or empty values) - auto mutable_value_column = doris_value_column->assume_mutable(); + auto mutable_value_column = IColumn::mutate(std::move(doris_value_column)); if (mutable_value_column->is_nullable()) { auto* nullable_column = static_cast(mutable_value_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_value_column->insert_many_defaults(element_size); } + doris_value_column = std::move(mutable_value_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2106,8 +2142,10 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, "Child field of '{}' is not nullable, but is missing in orc file", col_name); } - reinterpret_cast(doris_field->assume_mutable().get()) + auto mutable_field = IColumn::mutate(std::move(doris_field)); + reinterpret_cast(mutable_field.get()) ->insert_many_defaults(num_values); + doris_field = std::move(mutable_field); } for (auto read_field : read_fields) { @@ -2172,45 +2210,64 @@ Status OrcReader::_orc_column_to_doris_column( resolved_column = converter->get_column(src_type, doris_column, data_type); resolved_type = converter->get_type(); - if (resolved_column->is_nullable()) { + MutableColumnPtr mutable_resolved_column; + if (converter->is_consistent()) { + resolved_column.reset(); + mutable_resolved_column = IColumn::mutate(std::move(doris_column)); + } else { + mutable_resolved_column = IColumn::mutate(std::move(resolved_column)); + } + + size_t src_null_map_start = 0; + if (mutable_resolved_column->is_nullable()) { SCOPED_RAW_TIMER(&_statistics.decode_null_map_time); auto* nullable_column = - reinterpret_cast(resolved_column->assume_mutable().get()); + reinterpret_cast(mutable_resolved_column.get()); data_column = nullable_column->get_nested_column_ptr(); - - NullMap& map_data_column = nullable_column->get_null_map_data(); - auto origin_size = map_data_column.size(); - map_data_column.resize(origin_size + num_values); - if (cvb->hasNulls) { - const auto* cvb_nulls = cvb->notNull.data(); - for (int i = 0; i < num_values; ++i) { - map_data_column[origin_size + i] = !cvb_nulls[i]; - } - } else { - memset(map_data_column.data() + origin_size, 0, num_values); - } + src_null_map_start = nullable_column->get_null_map_column().size(); + fill_orc_null_map(nullable_column, cvb, num_values); } else { if (cvb->hasNulls) { return Status::InternalError("Not nullable column {} has null values in orc file", col_name); } - data_column = resolved_column->assume_mutable(); + data_column = std::move(mutable_resolved_column); } RETURN_IF_ERROR(_fill_doris_data_column( col_name, data_column, remove_nullable(resolved_type), root_node, orc_column_type, cvb, num_values)); - // resolve schema change + + if (mutable_resolved_column) { + data_column.reset(); + resolved_column = std::move(mutable_resolved_column); + } else { + resolved_column = std::move(data_column); + } + + if (converter->is_consistent()) { + doris_column = std::move(resolved_column); + return Status::OK(); + } + + doris_column = IColumn::mutate(std::move(doris_column)); auto converted_column = doris_column->assume_mutable(); + if (converted_column->is_nullable()) { + const size_t new_rows = remove_nullable(resolved_column)->size(); + align_orc_null_map(resolved_column, + reinterpret_cast(converted_column.get()), + src_null_map_start, new_rows); + } return converter->convert(resolved_column, converted_column); } else { - auto mutable_column = doris_column->assume_mutable(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); if (mutable_column->is_nullable()) { auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(num_values); } else { mutable_column->insert_many_defaults(num_values); } + doris_column = std::move(mutable_column); } return Status::OK(); diff --git a/be/src/format/parquet/parquet_column_convert.h b/be/src/format/parquet/parquet_column_convert.h index f56ad295bab968..ee7dfd577733b6 100644 --- a/be/src/format/parquet/parquet_column_convert.h +++ b/be/src/format/parquet/parquet_column_convert.h @@ -194,6 +194,47 @@ struct ConvertParams { } }; +inline IColumn* get_mutable_inner_column(ColumnPtr& column) { + column = IColumn::mutate(std::move(column)); + auto mutable_column = column->assume_mutable(); + if (mutable_column->is_nullable()) { + return &assert_cast(mutable_column.get())->get_nested_column(); + } + return mutable_column.get(); +} + +inline size_t get_mutable_inner_column_size(const ColumnPtr& column) { + if (column->is_nullable()) { + const auto* nullable = assert_cast(column.get()); + return nullable->get_nested_column().size(); + } + return column->size(); +} + +inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_rows, + size_t new_rows, size_t src_null_map_start = 0) { + if (!dst_column->is_nullable()) { + return; + } + + dst_column = IColumn::mutate(std::move(dst_column)); + auto* dst_nullable = assert_cast(dst_column->assume_mutable().get()); + auto& dst_null_map = dst_nullable->get_null_map_column(); + const size_t expected_rows = old_rows + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_rows); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} + /** * Convert parquet physical column to logical column * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), @@ -254,26 +295,36 @@ class PhysicalToLogicalConverter { PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); } if (is_consistent() && _logical_converter->is_consistent()) { + dst_logical_col = std::move(src_physical_col); return Status::OK(); } + if (_logical_converter->is_consistent()) { + const size_t old_rows = get_mutable_inner_column_size(dst_logical_col); + RETURN_IF_ERROR(physical_convert(src_physical_col, dst_logical_col)); + align_null_map(src_physical_col, dst_logical_col, old_rows, + get_mutable_inner_column_size(dst_logical_col) - old_rows); + return Status::OK(); + } + ColumnPtr src_logical_column; if (is_consistent()) { - if (dst_logical_type->is_nullable()) { - auto doris_nullable_column = - assert_cast(dst_logical_col.get()); - src_logical_column = - ColumnNullable::create(_cached_src_physical_column, - doris_nullable_column->get_null_map_column_ptr()); - } else { - src_logical_column = _cached_src_physical_column; - } + src_logical_column = src_physical_col; } else { src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, dst_logical_type); } + const size_t src_old_rows = get_mutable_inner_column_size(src_logical_column); RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); + align_null_map(src_physical_col, src_logical_column, src_old_rows, + get_mutable_inner_column_size(src_logical_column) - src_old_rows); + + dst_logical_col = IColumn::mutate(std::move(dst_logical_col)); + const size_t dst_old_rows = get_mutable_inner_column_size(dst_logical_col); auto converted_column = dst_logical_col->assume_mutable(); - return _logical_converter->convert(src_logical_column, converted_column); + RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column)); + align_null_map(src_logical_column, dst_logical_col, dst_old_rows, + get_mutable_inner_column_size(dst_logical_col) - dst_old_rows, src_old_rows); + return Status::OK(); } virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, @@ -283,6 +334,11 @@ class PhysicalToLogicalConverter { DataTypePtr& get_physical_type() { return _cached_src_physical_type; } + bool read_directly_into_dst_logical_column() { + return !_convert_params->is_type_compatibility && is_consistent() && + _logical_converter->is_consistent(); + } + virtual bool is_consistent() { return false; } virtual bool support() { return true; } @@ -319,14 +375,14 @@ class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); size_t rows = from_col->size(); // always comes from tparquet::Type::INT32 auto& src_data = assert_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { data[start_idx + i] = static_cast(src_data[i]); } @@ -378,13 +434,13 @@ class UnsignedIntegerConverter : public PhysicalToLogicalConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); auto& src_data = assert_cast(from_col.get())->get_data(); size_t rows = src_data.size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; i++) { StorageCppType src_value = src_data[i]; @@ -405,12 +461,12 @@ class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); auto* src_data = assert_cast(from_col.get()); size_t length = src_data->size(); size_t num_values = length / _type_length; - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); auto& offsets = string_col.get_offsets(); auto& chars = string_col.get_chars(); @@ -441,12 +497,12 @@ class Float16PhysicalConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); const auto* src_data = assert_cast(from_col.get()); size_t length = src_data->size(); size_t num_values = length / _type_length; - auto* to_float_column = assert_cast(to_col.get()); + auto* to_float_column = assert_cast(to_col); size_t start_idx = to_float_column->size(); to_float_column->resize(start_idx + num_values); auto& to_float_column_data = to_float_column->get_data(); @@ -528,15 +584,8 @@ class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { uint8_col = &assert_cast(*src_physical_col); } - MutableColumnPtr to_col = nullptr; - // nullmap flag seems have been handled in upper level - if (src_logical_column->is_nullable()) { - const auto* nullable = assert_cast(src_logical_column.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); - } else { - to_col = src_logical_column->assume_mutable(); - } - auto* to_varbinary_column = assert_cast(to_col.get()); + IColumn* to_col = get_mutable_inner_column(src_logical_column); + auto* to_varbinary_column = assert_cast(to_col); size_t length = uint8_col->size(); size_t num_values = length / _type_length; const auto* ptr = uint8_col->get_data().data(); @@ -561,7 +610,7 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); #define M(FixedTypeLength, ValueCopyType) \ case FixedTypeLength: \ @@ -612,13 +661,13 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { } template - Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { + Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { size_t rows = src_col->size() / fixed_type_length; auto* buf = static_cast(src_col.get())->get_data().data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); size_t offset = 0; for (int i = 0; i < rows; i++) { // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -645,7 +694,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto buf = static_cast(src_col.get())->get_chars().data(); @@ -653,7 +702,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); for (int i = 0; i < rows; i++) { size_t len = offset[i] - offset[i - 1]; // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -678,7 +727,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = typename DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto* src_data = @@ -686,7 +735,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto* data = static_cast*>(dst_col.get())->get_data().data(); + auto* data = static_cast*>(dst_col)->get_data().data(); for (int i = 0; i < rows; i++) { ValueCopyType value; @@ -706,14 +755,14 @@ class NumberToDecimal : public PhysicalToLogicalConverter { class Int32ToDate : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->reserve(start_idx + rows); auto& src_data = static_cast(src_col.get())->get_data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); date_day_offset_dict& date_dict = date_day_offset_dict::get(); for (int i = 0; i < rows; i++) { @@ -727,14 +776,14 @@ class Int32ToDate : public PhysicalToLogicalConverter { struct Int64ToTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); auto src_data = static_cast(src_col.get())->get_data().data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { int64_t x = src_data[i]; @@ -760,14 +809,14 @@ struct Int64ToTimestamp : public PhysicalToLogicalConverter { struct Int64ToTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); const auto& src_data = assert_cast(src_col.get())->get_data(); - auto& dest_data = assert_cast(dst_col.get())->get_data(); + auto& dest_data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { @@ -784,14 +833,14 @@ struct Int64ToTimestampTz : public PhysicalToLogicalConverter { struct Int96toTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); auto& src_data = static_cast(src_col.get())->get_data(); auto ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { ParquetInt96 src_cell_data = ParquetInt96_data[i]; @@ -818,14 +867,14 @@ struct Int96toTimestamp : public PhysicalToLogicalConverter { struct Int96toTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); const auto& src_data = assert_cast(src_col.get())->get_data(); auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = assert_cast(dst_col.get())->get_data(); + auto& data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { diff --git a/be/src/format/parquet/vparquet_column_reader.cpp b/be/src/format/parquet/vparquet_column_reader.cpp index ba7d42a5aed84e..1deffec6a04633 100644 --- a/be/src/format/parquet/vparquet_column_reader.cpp +++ b/be/src/format/parquet/vparquet_column_reader.cpp @@ -328,12 +328,11 @@ Status ScalarColumnReader::_read_values(size_t num_ MutableColumnPtr data_column; std::vector null_map; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - assert_cast(const_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); @@ -411,12 +410,11 @@ Status ScalarColumnReader::_read_nested_column( // Handle nullable columns MutableColumnPtr data_column; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - const_cast(assert_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); } else { @@ -550,6 +548,10 @@ Status ScalarColumnReader::read_column_data( ColumnPtr resolved_column = _converter->get_physical_column(_field_schema->physical_type, _field_schema->data_type, doris_column, type, is_dict_filter); + if (_converter->read_directly_into_dst_logical_column()) { + DCHECK_EQ(resolved_column.get(), doris_column.get()); + resolved_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); _def_levels.clear(); @@ -658,6 +660,7 @@ Status ArrayColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -713,6 +716,7 @@ Status MapColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -789,6 +793,7 @@ Status StructColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -986,6 +991,7 @@ Status StructColumnReader::read_column_data( auto& doris_field = doris_struct.get_column_ptr(idx); auto& doris_type = doris_struct_type->get_element(idx); DCHECK(doris_type->is_nullable()); + doris_field = IColumn::mutate(std::move(doris_field)); auto mutable_column = doris_field->assume_mutable(); auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(missing_column_sz); diff --git a/be/src/format/parquet/vparquet_column_reader.h b/be/src/format/parquet/vparquet_column_reader.h index 9d9fd2280c88f8..8673361eb46dd6 100644 --- a/be/src/format/parquet/vparquet_column_reader.h +++ b/be/src/format/parquet/vparquet_column_reader.h @@ -482,6 +482,7 @@ class SkipReadingReader : public ParquetColumnReader { // Simulate reading without actually reading data // Fill with default/null values based on column type + doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column = doris_column->assume_mutable(); if (real_column_size > 0) { diff --git a/be/src/format/table/equality_delete.cpp b/be/src/format/table/equality_delete.cpp index 82deb7bd59c20a..dc94d8151f2048 100644 --- a/be/src/format/table/equality_delete.cpp +++ b/be/src/format/table/equality_delete.cpp @@ -68,9 +68,8 @@ Status SimpleEqualityDelete::filter_data_block( const NullMap& null_map = reinterpret_cast(column_and_type.column.get()) ->get_null_map_data(); - _hybrid_set->find_batch_nullable( - remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, - *_single_filter); + _hybrid_set->find_batch_nullable(*remove_nullable(column_and_type.column), rows, null_map, + *_single_filter); if (_hybrid_set->contain_null()) { auto* filter_data = _single_filter->data(); for (size_t i = 0; i < rows; ++i) { @@ -78,8 +77,7 @@ Status SimpleEqualityDelete::filter_data_block( } } } else { - _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, - *_single_filter); + _hybrid_set->find_batch(*column_and_type.column, rows, *_single_filter); } // should reverse _filter auto* filter_data = filter.data(); diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h index 42c80c9b7d4ddc..565c77b5ab0255 100644 --- a/be/src/format/table/iceberg_reader_mixin.h +++ b/be/src/format/table/iceberg_reader_mixin.h @@ -554,6 +554,7 @@ Status IcebergReaderMixin::_equality_delete_base( if (read_rows > 0) { MutableBlock mutable_block(&eq_file_block); RETURN_IF_ERROR(mutable_block.merge(tmp_block)); + eq_file_block = mutable_block.to_block(); } } } diff --git a/be/src/format/table/paimon_cpp_reader.cpp b/be/src/format/table/paimon_cpp_reader.cpp index 4925bbb3e7a9bd..e628c30af737ba 100644 --- a/be/src/format/table/paimon_cpp_reader.cpp +++ b/be/src/format/table/paimon_cpp_reader.cpp @@ -117,6 +117,7 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool auto record_batch = std::move(import_result).ValueUnsafe(); const auto num_rows = static_cast(record_batch->num_rows()); const auto num_columns = record_batch->num_columns(); + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); if (field->name() == VALUE_KIND_FIELD) { @@ -128,16 +129,17 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool // Skip columns that are not in the block (e.g., partition columns handled elsewhere) continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); try { RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + *columns[block_pos], record_batch->column(c).get(), 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } } + block->set_columns(std::move(columns)); *read_rows = num_rows; *eof = false; return Status::OK(); diff --git a/be/src/format/table/remote_doris_reader.cpp b/be/src/format/table/remote_doris_reader.cpp index 5280b655a63ef8..487aad2869b90d 100644 --- a/be/src/format/table/remote_doris_reader.cpp +++ b/be/src/format/table/remote_doris_reader.cpp @@ -72,6 +72,7 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo auto batch = chunk.data; auto num_rows = batch->num_rows(); auto num_columns = batch->num_columns(); + auto columns = block->mutate_columns(); for (int c = 0; c < num_columns; ++c) { arrow::Array* column = batch->column(c).get(); @@ -82,10 +83,10 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo } try { - const ColumnWithTypeAndName& column_with_name = - block->get_by_position((*_col_name_to_block_idx)[column_name]); + auto block_pos = (*_col_name_to_block_idx)[column_name]; + const ColumnWithTypeAndName& column_with_name = block->get_by_position(block_pos); RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + *columns[block_pos], column, 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError( "Failed to convert from arrow to block, column_name: {}, e: {}", column_name, @@ -93,6 +94,7 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo } } + block->set_columns(std::move(columns)); *read_rows += num_rows; return Status::OK(); diff --git a/be/src/information_schema/schema_active_queries_scanner.cpp b/be/src/information_schema/schema_active_queries_scanner.cpp index 00f0c5b5de763e..de0844af8abc93 100644 --- a/be/src/information_schema/schema_active_queries_scanner.cpp +++ b/be/src/information_schema/schema_active_queries_scanner.cpp @@ -133,6 +133,7 @@ Status SchemaActiveQueriesScanner::get_next_block_internal(Block* block, bool* e int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_active_query_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_authentication_integrations_scanner.cpp b/be/src/information_schema/schema_authentication_integrations_scanner.cpp index 95359b58264d15..4cbf55b198d31b 100644 --- a/be/src/information_schema/schema_authentication_integrations_scanner.cpp +++ b/be/src/information_schema/schema_authentication_integrations_scanner.cpp @@ -137,6 +137,7 @@ Status SchemaAuthenticationIntegrationsScanner::get_next_block_internal(Block* b MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_authentication_integrations_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_backend_active_tasks.cpp b/be/src/information_schema/schema_backend_active_tasks.cpp index b41f116b7550af..ddb15b84aa409d 100644 --- a/be/src/information_schema/schema_backend_active_tasks.cpp +++ b/be/src/information_schema/schema_backend_active_tasks.cpp @@ -89,10 +89,11 @@ Status SchemaBackendActiveTasksScanner::get_next_block_internal(Block* block, bo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_task_stats_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp index 3c7b1ec0bc5c9a..5b25a84304d1bb 100644 --- a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp +++ b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp @@ -85,6 +85,7 @@ Status SchemaBackendKerberosTicketCacheScanner::get_next_block_internal(Block* b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_info_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp index fec899c252a933..18e490f09b3fed 100644 --- a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp +++ b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp @@ -145,6 +145,7 @@ Status SchemaCatalogMetaCacheStatsScanner::get_next_block_internal(Block* block, int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_database_properties_scanner.cpp b/be/src/information_schema/schema_database_properties_scanner.cpp index c73dd9301e056d..d1427fe43e915f 100644 --- a/be/src/information_schema/schema_database_properties_scanner.cpp +++ b/be/src/information_schema/schema_database_properties_scanner.cpp @@ -149,6 +149,7 @@ Status SchemaDatabasePropertiesScanner::get_next_block_internal(Block* block, bo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_dbproperties_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { *eos = false; diff --git a/be/src/information_schema/schema_file_cache_statistics.cpp b/be/src/information_schema/schema_file_cache_statistics.cpp index 0b69766bbeeae9..5be2df30d53b11 100644 --- a/be/src/information_schema/schema_file_cache_statistics.cpp +++ b/be/src/information_schema/schema_file_cache_statistics.cpp @@ -77,6 +77,7 @@ Status SchemaFileCacheStatisticsScanner::get_next_block_internal(Block* block, b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_stats_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_partitions_scanner.cpp b/be/src/information_schema/schema_partitions_scanner.cpp index 834fd928f7126e..87c0ce078b787d 100644 --- a/be/src/information_schema/schema_partitions_scanner.cpp +++ b/be/src/information_schema/schema_partitions_scanner.cpp @@ -210,6 +210,7 @@ Status SchemaPartitionsScanner::get_next_block_internal(Block* block, bool* eos) int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_partitions_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_role_mappings_scanner.cpp b/be/src/information_schema/schema_role_mappings_scanner.cpp index 31e58e6cbe9fb5..84d0e26eb44393 100644 --- a/be/src/information_schema/schema_role_mappings_scanner.cpp +++ b/be/src/information_schema/schema_role_mappings_scanner.cpp @@ -134,6 +134,7 @@ Status SchemaRoleMappingsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_role_mappings_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_scanner.cpp b/be/src/information_schema/schema_scanner.cpp index 981956330ce7f5..9526f21d7796f9 100644 --- a/be/src/information_schema/schema_scanner.cpp +++ b/be/src/information_schema/schema_scanner.cpp @@ -95,6 +95,24 @@ namespace doris { class ObjectPool; +namespace { + +void insert_column_range(ColumnWithTypeAndName* dst, const ColumnWithTypeAndName& src, size_t start, + size_t length) { + DORIS_CHECK(dst->column.get() != nullptr); + DORIS_CHECK(src.column.get() != nullptr); + MutableColumnPtr dst_column = IColumn::mutate(std::move(dst->column)); + ColumnPtr src_column = src.column->convert_to_full_column_if_const(); + if (dst_column->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(dst_column->is_nullable() == src_column->is_nullable()); + dst_column->insert_range_from(*src_column, start, length); + dst->column = std::move(dst_column); +} + +} // namespace + SchemaScanner::SchemaScanner(const std::vector& columns, TSchemaTableType::type type) : _is_init(false), _columns(columns), _schema_table_type(type) {} @@ -115,10 +133,8 @@ Status SchemaScanner::get_next_block(RuntimeState* state, Block* block, bool* eo DCHECK(_async_thread_running == false); RETURN_IF_ERROR(_scanner_status.status()); for (size_t i = 0; i < block->columns(); i++) { - std::move(*block->get_by_position(i).column) - .mutate() - ->insert_range_from(*_data_block->get_by_position(i).column, 0, - _data_block->rows()); + insert_column_range(&block->get_by_position(i), _data_block->get_by_position(i), 0, + _data_block->rows()); } _data_block->clear_column_data(); *eos = _eos; @@ -295,11 +311,10 @@ void SchemaScanner::_init_block(Block* src_block) { Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, const std::vector& datas) { const ColumnDesc& col_desc = _columns[pos]; - MutableColumnPtr column_ptr; - column_ptr = std::move(*block->get_by_position(pos).column).assume_mutable(); + MutableColumnPtr column_ptr = IColumn::mutate(std::move(block->get_by_position(pos).column)); IColumn* col_ptr = column_ptr.get(); - auto* nullable_column = reinterpret_cast(col_ptr); + auto* nullable_column = assert_cast(col_ptr); // Resize in advance to improve insertion efficiency. size_t fill_num = datas.size(); @@ -440,6 +455,7 @@ Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, } } } + block->replace_by_position(pos, std::move(column_ptr)); return Status::OK(); } @@ -454,8 +470,8 @@ std::string SchemaScanner::get_db_from_full_name(const std::string& full_name) { Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* block, PrimitiveType type) { MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable(); - auto* nullable_column = reinterpret_cast(mutable_col_ptr.get()); + mutable_col_ptr = IColumn::mutate(std::move(block->get_by_position(col_index).column)); + auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); switch (type) { @@ -510,6 +526,7 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* bloc } } nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); return Status::OK(); } diff --git a/be/src/information_schema/schema_scanner_helper.cpp b/be/src/information_schema/schema_scanner_helper.cpp index 9ec2cdcd7cbaa2..7907dc264b66fd 100644 --- a/be/src/information_schema/schema_scanner_helper.cpp +++ b/be/src/information_schema/schema_scanner_helper.cpp @@ -19,6 +19,7 @@ #include "cctz/time_zone.h" #include "core/block/block.h" +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "core/data_type/primitive_type.h" #include "core/string_ref.h" @@ -31,29 +32,31 @@ namespace doris { void SchemaScannerHelper::insert_string_value(int col_index, std::string_view str_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_data(str_val.data(), str_val.size()); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, const std::vector& datas, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp, const cctz::time_zone& ctz, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); @@ -64,41 +67,46 @@ void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_bool_value(int col_index, bool bool_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(bool_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int32_value(int col_index, int32_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int64_value(int col_index, int64_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_double_value(int col_index, double double_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(double_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } } // namespace doris diff --git a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp index 2a223c144ba5fa..1fcc0cb838ad93 100644 --- a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp +++ b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp @@ -170,6 +170,7 @@ Status SchemaSqlBlockRuleStatusScanner::get_next_block_internal(Block* block, bo MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_sql_block_rule_status_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_table_options_scanner.cpp b/be/src/information_schema/schema_table_options_scanner.cpp index 096f0860bfc3bd..717cb91cccfa29 100644 --- a/be/src/information_schema/schema_table_options_scanner.cpp +++ b/be/src/information_schema/schema_table_options_scanner.cpp @@ -167,6 +167,7 @@ Status SchemaTableOptionsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_tableoptions_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_table_properties_scanner.cpp b/be/src/information_schema/schema_table_properties_scanner.cpp index 0affe500b35f7b..e89153542a190c 100644 --- a/be/src/information_schema/schema_table_properties_scanner.cpp +++ b/be/src/information_schema/schema_table_properties_scanner.cpp @@ -161,6 +161,7 @@ Status SchemaTablePropertiesScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_tableproperties_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp index c2c5ceab41ceb2..6b3141e404bf27 100644 --- a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp +++ b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp @@ -132,10 +132,11 @@ Status SchemaTableStreamConsumptionScanner::get_next_block_internal(Block* block MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_table_stream_consumption_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_table_streams_scanner.cpp b/be/src/information_schema/schema_table_streams_scanner.cpp index 288d4e56c9a876..48299c7a1783c6 100644 --- a/be/src/information_schema/schema_table_streams_scanner.cpp +++ b/be/src/information_schema/schema_table_streams_scanner.cpp @@ -132,10 +132,11 @@ Status SchemaTableStreamsScanner::get_next_block_internal(Block* block, bool* eo int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_table_streams_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_view_dependency_scanner.cpp b/be/src/information_schema/schema_view_dependency_scanner.cpp index 1aa6ce614312f7..3723f4f9e5e2a3 100644 --- a/be/src/information_schema/schema_view_dependency_scanner.cpp +++ b/be/src/information_schema/schema_view_dependency_scanner.cpp @@ -133,6 +133,7 @@ Status SchemaViewDependencyScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_view_dependency_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_group_privileges.cpp b/be/src/information_schema/schema_workload_group_privileges.cpp index d0dab55965c3d1..854e151fd2521d 100644 --- a/be/src/information_schema/schema_workload_group_privileges.cpp +++ b/be/src/information_schema/schema_workload_group_privileges.cpp @@ -128,6 +128,7 @@ Status SchemaWorkloadGroupPrivilegesScanner::get_next_block_internal(Block* bloc MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR( mblock.add_rows(_workload_groups_privs_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp index 175b1dbd080e81..f790bf913bb75c 100644 --- a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp +++ b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp @@ -80,10 +80,11 @@ Status SchemaBackendWorkloadGroupResourceUsage::get_next_block_internal(Block* b int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_workload_groups_scanner.cpp b/be/src/information_schema/schema_workload_groups_scanner.cpp index 5ad1b744e975e6..b2dd403f48652b 100644 --- a/be/src/information_schema/schema_workload_groups_scanner.cpp +++ b/be/src/information_schema/schema_workload_groups_scanner.cpp @@ -139,6 +139,7 @@ Status SchemaWorkloadGroupsScanner::get_next_block_internal(Block* block, bool* int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_workload_groups_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp index 040b747bb435c4..bc5d5f9c229e4c 100644 --- a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp +++ b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp @@ -129,6 +129,7 @@ Status SchemaWorkloadSchedulePolicyScanner::get_next_block_internal(Block* block int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); MutableBlock mblock = MutableBlock::build_mutable_block(block); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + block->set_columns(std::move(mblock.mutable_columns())); _row_idx += current_batch_rows; *eos = _row_idx == _total_rows; diff --git a/be/src/load/memtable/memtable.cpp b/be/src/load/memtable/memtable.cpp index 588d8543d7b4b4..3bdcaa1ef961d7 100644 --- a/be/src/load/memtable/memtable.cpp +++ b/be/src/load/memtable/memtable.cpp @@ -453,12 +453,11 @@ void MemTable::_sort_one_column(DorisVector>& row_in } template -void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, - int row_pos) { +void MemTable::_finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos) { // move key columns for (size_t i = 0; i < _tablet_schema->num_key_columns(); ++i) { - _output_mutable_block.get_column_by_position(i)->insert_from(*block_data[i].column.get(), - row->_row_pos); + _output_mutable_block.get_column_by_position(i)->insert_from( + *mutable_block.get_column_by_position(i), row->_row_pos); } if (row->has_init_agg()) { // get value columns from agg_places @@ -490,7 +489,7 @@ void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& // move columns for rows do not need agg for (size_t i = _tablet_schema->num_key_columns(); i < _num_columns; ++i) { _output_mutable_block.get_column_by_position(i)->insert_from( - *block_data[i].column.get(), row->_row_pos); + *mutable_block.get_column_by_position(i), row->_row_pos); } } if constexpr (!is_final) { @@ -527,7 +526,6 @@ void MemTable::_aggregate() { Block in_block = _input_mutable_block.to_block(); MutableBlock mutable_block = MutableBlock::build_mutable_block(&in_block); _vec_row_comparator->set_block(&mutable_block); - auto& block_data = in_block.get_columns_with_type_and_name(); DorisVector> temp_row_in_blocks; temp_row_in_blocks.reserve(_last_sorted_pos); //only init agg if needed @@ -558,7 +556,7 @@ void MemTable::_aggregate() { if (!temp_row_in_blocks.empty()) { // The rows from the previous batch of _row_in_blocks have been merged into temp_row_in_blocks, // now call finalize to write the aggregation results into _output_mutable_block. - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } temp_row_in_blocks.push_back(cur_row_ptr); @@ -567,15 +565,15 @@ void MemTable::_aggregate() { } if (!temp_row_in_blocks.empty()) { // finalize the last low - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, row_pos); + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } } else { DCHECK(_delete_sign_col_idx != -1); if (_seq_col_idx_in_block == -1) { - _aggregate_for_flexible_partial_update_without_seq_col( - block_data, mutable_block, temp_row_in_blocks); + _aggregate_for_flexible_partial_update_without_seq_col(mutable_block, + temp_row_in_blocks); } else { - _aggregate_for_flexible_partial_update_with_seq_col(block_data, mutable_block, + _aggregate_for_flexible_partial_update_with_seq_col(mutable_block, temp_row_in_blocks); } } @@ -593,8 +591,7 @@ void MemTable::_aggregate() { template void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { std::shared_ptr prev_row {nullptr}; int row_pos = -1; auto& skip_bitmaps = @@ -609,12 +606,12 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( auto finalize_rows = [&]() { if (row_with_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_with_delete_sign); - _finalize_one_row(row_with_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_with_delete_sign.get(), mutable_block, ++row_pos); row_with_delete_sign = nullptr; } if (row_without_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_without_delete_sign); - _finalize_one_row(row_without_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_without_delete_sign.get(), mutable_block, ++row_pos); row_without_delete_sign = nullptr; } // _arena.clear(); @@ -670,15 +667,14 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( template void MemTable::_aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { // For flexible partial update, when table has sequence column, we don't do any aggregation // in memtable. These duplicate rows will be aggregated in VerticalSegmentWriter int row_pos = -1; for (const auto& row_ptr : *_row_in_blocks) { RowInBlock* row = row_ptr.get(); temp_row_in_blocks.push_back(row_ptr); - _finalize_one_row(row, block_data, ++row_pos); + _finalize_one_row(row, mutable_block, ++row_pos); } } diff --git a/be/src/load/memtable/memtable.h b/be/src/load/memtable/memtable.h index 42f96dd4f5f769..ad20667527fed1 100644 --- a/be/src/load/memtable/memtable.h +++ b/be/src/load/memtable/memtable.h @@ -262,7 +262,7 @@ class MemTable { void _sort_one_column(DorisVector>& row_in_blocks, Tie& tie, std::function cmp); template - void _finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, int row_pos); + void _finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos); void _init_row_for_agg(RowInBlock* row, MutableBlock& mutable_block); void _clear_row_agg(RowInBlock* row); @@ -271,12 +271,12 @@ class MemTable { template void _aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); template void _aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); Status _put_into_output(Block& in_block); diff --git a/be/src/runtime/query_cache/query_cache.cpp b/be/src/runtime/query_cache/query_cache.cpp index d79acfa7ef788d..06817adf1544ce 100644 --- a/be/src/runtime/query_cache/query_cache.cpp +++ b/be/src/runtime/query_cache/query_cache.cpp @@ -17,6 +17,8 @@ #include "runtime/query_cache/query_cache.h" +#include "common/logging.h" + namespace doris { std::vector* QueryCacheHandle::get_cache_slot_orders() { @@ -43,7 +45,10 @@ void QueryCache::insert(const CacheKey& key, int64_t version, CacheResult& res, CacheResult cache_result; for (auto& block_data : res) { cache_result.emplace_back(Block::create_unique())->swap(block_data->clone_empty()); - (void)MutableBlock(cache_result.back().get()).merge(*block_data); + MutableBlock mutable_block(cache_result.back().get()); + auto st = mutable_block.merge(*block_data); + DORIS_CHECK(st.ok()); + cache_result.back()->set_columns(std::move(mutable_block.mutable_columns())); } auto cache_value_ptr = std::make_unique(version, std::move(cache_result), slot_orders); diff --git a/be/src/runtime/result_block_buffer.cpp b/be/src/runtime/result_block_buffer.cpp index ba7f135ce762d5..aebea97ea1ee90 100644 --- a/be/src/runtime/result_block_buffer.cpp +++ b/be/src/runtime/result_block_buffer.cpp @@ -214,10 +214,12 @@ Status ResultBlockBuffer::add_batch(RuntimeState* state, (batch_size + _last_batch_bytes) <= config::thrift_max_message_size) { if constexpr (std::is_same_v) { auto last_block = _result_batch_queue.back(); + auto mutable_columns = last_block->mutate_columns(); for (size_t i = 0; i < last_block->columns(); i++) { - last_block->mutate_columns()[i]->insert_range_from( - *result->get_by_position(i).column, 0, num_rows); + mutable_columns[i]->insert_range_from(*result->get_by_position(i).column, 0, + num_rows); } + last_block->set_columns(std::move(mutable_columns)); } else { std::vector& back_rows = _result_batch_queue.back()->result_batch.rows; diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 441284a251b3c8..7ab3c89e10d6de 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -558,20 +558,21 @@ Status PointQueryExecutor::_lookup_row_data() { int pos = _reusable->get_col_uid_to_idx().at(cid); auto row_id = static_cast(row_loc.row_id); MutableColumnPtr column = - _result_block->get_by_position(pos).column->assume_mutable(); + IColumn::mutate(std::move(_result_block->get_by_position(pos).column)); std::unique_ptr iter; SlotDescriptor* slot = _reusable->tuple_desc()->slots()[pos]; StorageReadOptions storage_read_options; storage_read_options.stats = &_read_stats; storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; - RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, - row_id, column, - storage_read_options, iter)); - if (_tablet->tablet_schema() - ->column_by_uid(slot->col_unique_id()) - .has_char_type()) { + auto st = segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, row_id, + column, storage_read_options, iter); + if (st.ok() && _tablet->tablet_schema() + ->column_by_uid(slot->col_unique_id()) + .has_char_type()) { column->shrink_padding_chars(); } + _result_block->replace_by_position(pos, std::move(column)); + RETURN_IF_ERROR(st); } } } @@ -583,10 +584,13 @@ Status PointQueryExecutor::_lookup_row_data() { // SlotDescriptor{id=9, col=v2, colUniqueId=2 ...} // thus missing in include_col_uids and missing_col_uids for (size_t i = 0; i < _result_block->columns(); ++i) { - auto column = _result_block->get_by_position(i).column; + const auto& column = _result_block->get_by_position(i).column; int padding_rows = _row_hits - cast_set(column->size()); if (padding_rows > 0) { - column->assume_mutable()->insert_many_defaults(padding_rows); + auto mutable_column = + IColumn::mutate(std::move(_result_block->get_by_position(i).column)); + mutable_column->insert_many_defaults(padding_rows); + _result_block->replace_by_position(i, std::move(mutable_column)); } } } diff --git a/be/src/storage/iterator/block_reader.cpp b/be/src/storage/iterator/block_reader.cpp index e50ca8a9c831b1..82358ca7c85899 100644 --- a/be/src/storage/iterator/block_reader.cpp +++ b/be/src/storage/iterator/block_reader.cpp @@ -400,6 +400,7 @@ Status BlockReader::_replace_key_next_block(Block* block, bool* eof) { } } _merged_rows += merged_row; + block->set_columns(std::move(target_columns)); return Status::OK(); } @@ -580,9 +581,10 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { LOG(WARNING) << "tablet_id: " << tablet()->tablet_id() << " delete sign idx " << delete_sign_idx << " not invalid, skip filter delete in base compaction"; + block->set_columns(std::move(target_columns)); return Status::OK(); } - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); reinterpret_cast(delete_filter_column.get())->resize(target_block_row); auto* __restrict filter_data = @@ -603,6 +605,7 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { } } auto target_columns_size = target_columns.size(); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; diff --git a/be/src/storage/iterator/vertical_block_reader.cpp b/be/src/storage/iterator/vertical_block_reader.cpp index aa90c83ccb0a3d..335584997f0f92 100644 --- a/be/src/storage/iterator/vertical_block_reader.cpp +++ b/be/src/storage/iterator/vertical_block_reader.cpp @@ -413,6 +413,7 @@ Status VerticalBlockReader::_agg_key_next_block(Block* block, bool* eof) { break; } LOG(WARNING) << "next failed: " << res; + block->set_columns(std::move(target_columns)); return res; } DCHECK(_next_row.block->columns() == block->columns()); @@ -484,11 +485,12 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { int delete_sign_idx = block->columns() - 1; DCHECK(delete_sign_idx > 0); auto target_columns = block->mutate_columns(); - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); - reinterpret_cast(delete_filter_column.get())->resize(block_rows); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); + auto* delete_filter_data_column = + reinterpret_cast(delete_filter_column.get()); + delete_filter_data_column->resize(block_rows); - auto* __restrict filter_data = - reinterpret_cast(delete_filter_column.get())->get_data().data(); + auto* __restrict filter_data = delete_filter_data_column->get_data().data(); auto* __restrict delete_data = reinterpret_cast(target_columns[delete_sign_idx].get()) ->get_data() @@ -517,12 +519,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { row_source_idx++; } + const auto column_to_keep = target_columns.size(); + block->set_columns(std::move(target_columns)); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; block->insert(column_with_type_and_name); - RETURN_IF_ERROR( - Block::filter_block(block, target_columns.size(), target_columns.size())); + RETURN_IF_ERROR(Block::filter_block(block, column_to_keep, column_to_keep)); _stats.rows_del_filtered += block_rows - block->rows(); if (UNLIKELY(_reader_context.record_rowids)) { DCHECK_EQ(_block_row_locations.size(), block->rows() + delete_count); @@ -562,6 +566,7 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { RETURN_IF_ERROR(mask_iter->unique_key_next_batch(&batches, _reader_context.batch_size, &actual_rows)); if (actual_rows == 0) { + block->set_columns(std::move(target_columns)); *eof = true; _eof = true; return Status::OK(); @@ -605,6 +610,7 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { break; } LOG(WARNING) << "next failed: " << res; + block->set_columns(std::move(target_columns)); return res; } const auto& src_block = _next_row.block; diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 7b97ecfc081167..fcd5bde0f8395f 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -38,6 +38,18 @@ #include "storage/utils.h" namespace doris { +namespace { + +ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + +} // namespace + Status PartialUpdateInfo::init(int64_t tablet_id, int64_t txn_id, const TabletSchema& tablet_schema, UniqueKeyUpdateModePB unique_key_update_mode, PartialUpdateNewRowPolicyPB policy, @@ -325,7 +337,10 @@ Status FixedReadPlan::read_columns_by_plan( } } bool has_row_column = tablet_schema.has_row_store_for_all_columns(); - auto mutable_columns = block.mutate_columns(); + MutableColumns mutable_columns; + if (!has_row_column) { + mutable_columns = block.mutate_columns(); + } uint32_t read_idx = 0; for (const auto& [rowset_id, segment_row_mappings] : plan) { for (const auto& [segment_id, mappings] : segment_row_mappings) { @@ -360,7 +375,9 @@ Status FixedReadPlan::read_columns_by_plan( } } } - block.set_columns(std::move(mutable_columns)); + if (!has_row_column) { + block.set_columns(std::move(mutable_columns)); + } return Status::OK(); } @@ -797,8 +814,7 @@ void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, i ->get_data() .back(); const auto& new_row_skip_bitmap = - assert_cast( - src_block->get_by_position(cid).column->assume_mutable().get()) + assert_cast(src_block->get_by_position(cid).column.get()) ->get_data()[rid]; cur_skip_bitmap &= new_row_skip_bitmap; continue; @@ -943,11 +959,9 @@ Status BlockAggregator::aggregate_for_sequence_column( DCHECK_EQ(block->columns(), _tablet_schema.num_columns()); // the process logic here is the same as MemTable::_aggregate_for_flexible_partial_update_without_seq_col() // after this function, there will be at most 2 rows for a specified key - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filtered_block = _tablet_schema.create_block(); @@ -1016,11 +1030,9 @@ Status BlockAggregator::aggregate_for_insert_after_delete( // there will be at most 2 rows for a specified key in block when control flow reaches here // after this function, there will not be duplicate rows in block - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filter_column = ColumnUInt8::create(num_rows, 1); diff --git a/be/src/storage/schema_change/schema_change.cpp b/be/src/storage/schema_change/schema_change.cpp index f2583e3bcfc2d2..0f88bf70ec1dcc 100644 --- a/be/src/storage/schema_change/schema_change.cpp +++ b/be/src/storage/schema_change/schema_change.cpp @@ -172,14 +172,18 @@ class MultiBlockMerger { if (i == rows - 1 || _cmp.compare(row_refs[i], row_refs[i + 1])) { for (int j = 0; j < key_number; j++) { - finalized_block.get_by_position(j).column->assume_mutable()->insert_from( - *row_ref.get_column(j), row_ref.position); + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); + column->insert_from(*row_ref.get_column(j), row_ref.position); + column_ptr = std::move(column); } for (int j = key_number; j < columns; j++) { + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); agg_functions[j - key_number]->insert_result_into( - agg_places[j - key_number], - finalized_block.get_by_position(j).column->assume_mutable_ref()); + agg_places[j - key_number], *column); + column_ptr = std::move(column); agg_functions[j - key_number]->reset(agg_places[j - key_number]); } @@ -225,12 +229,14 @@ class MultiBlockMerger { int limit = std::min(ALTER_TABLE_BATCH_SIZE, rows - i); for (int idx = 0; idx < columns; idx++) { - auto column = finalized_block.get_by_position(idx).column->assume_mutable(); + auto& column_ptr = finalized_block.get_by_position(idx).column; + auto column = column_ptr->assume_mutable(); for (int j = 0; j < limit; j++) { auto row_ref = pushed_row_refs[i + j]; column->insert_from(*row_ref.get_column(idx), row_ref.position); } + column_ptr = std::move(column); } RETURN_IF_ERROR(rowset_writer->add_block(&finalized_block)); finalized_block.clear_column_data(); @@ -382,6 +388,7 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { column = column->convert_to_predicate_column_if_dictionary(); column->insert_duplicate_fields(value, row_num); } + new_block->get_by_position(idx).column = std::move(column); } else { // same type, just swap column swap_idx_list.emplace_back(_schema_mapping[idx].ref_column_idx, idx); @@ -398,21 +405,20 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { if (ref_col_nullable != new_col_nullable) { // not nullable to nullable if (new_col_nullable) { - auto* new_nullable_col = - assert_cast(new_col->assume_mutable().get()); + auto mutable_new_col = new_col->assume_mutable(); + auto* new_nullable_col = assert_cast(mutable_new_col.get()); new_nullable_col->change_nested_column(ref_col); new_nullable_col->get_null_map_data().resize_fill(ref_col->size()); + new_col = std::move(mutable_new_col); } else { // nullable to not nullable: // suppose column `c_phone` is originally varchar(16) NOT NULL, // then do schema change `alter table test modify column c_phone int not null`, // the cast expr of schema change is `CastExpr(CAST String to Nullable(Int32))`, // so need to handle nullable to not nullable here - auto* ref_nullable_col = - assert_cast(ref_col->assume_mutable().get()); - - new_col = ref_nullable_col->get_nested_column_ptr(); + const auto& ref_nullable_col = assert_cast(*ref_col); + new_col = ref_nullable_col.get_nested_column_ptr(); } } else { new_block->get_by_position(it.second).column = diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index 20e20879d087b8..5c1574e85e446d 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -77,6 +77,7 @@ #include "util/bitmap.h" #include "util/block_compression.h" #include "util/concurrency_stats.h" +#include "util/defer_op.h" #include "util/rle_encoding.h" // for RleDecoder #include "util/slice.h" @@ -993,7 +994,8 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto column_offsets_ptr = column_map.get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(column_offsets_ptr); }}; bool offsets_has_null = false; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offsets_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); @@ -1005,10 +1007,12 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start - 1]); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto key_ptr = column_map.get_keys().assume_mutable(); - auto val_ptr = column_map.get_values().assume_mutable(); if (num_items > 0) { + auto key_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto val_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(key_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(val_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual key/value data, fill with defaults key_ptr->insert_many_defaults(num_items); @@ -1021,9 +1025,6 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, val_ptr, &val_has_null)); DCHECK(num_read == num_items); } - - column_map.get_keys_ptr() = std::move(key_ptr); - column_map.get_values_ptr() = std::move(val_ptr); } if (dst->is_nullable()) { @@ -1078,9 +1079,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t return Status::OK(); } // resolve ColumnMap and nullable wrapper - const auto* column_map = check_and_get_column( + auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto offsets_ptr = column_map->get_offsets_column().assume_mutable(); + auto offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(offsets_ptr); }}; auto& offsets = static_cast(*offsets_ptr); size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back(); @@ -1164,8 +1166,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t } // 6. read key/value elements for non-empty sizes - auto keys_ptr = column_map->get_keys().assume_mutable(); - auto vals_ptr = column_map->get_values().assume_mutable(); + auto keys_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto vals_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(keys_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(vals_ptr); }}; size_t this_run = sizes[0]; auto start_idx = starts_data[0]; @@ -1410,12 +1414,13 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); for (size_t i = 0; i < column_struct.tuple_size(); i++) { size_t num_read = *n; - auto sub_column_ptr = column_struct.get_column(i).assume_mutable(); + auto sub_column_ptr = IColumn::mutate(std::move(column_struct.get_column_ptr(i))); + Defer defer_sub_column { + [&] { column_struct.get_column_ptr(i) = std::move(sub_column_ptr); }}; bool column_has_null = false; RETURN_IF_ERROR( _sub_column_iterators[i]->next_batch(&num_read, sub_column_ptr, &column_has_null)); DCHECK(num_read == *n); - column_struct.get_column_ptr(i) = std::move(sub_column_ptr); } if (dst->is_nullable()) { @@ -1770,11 +1775,12 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo return Status::OK(); } - const auto* column_array = check_and_get_column( + auto& column_array = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); bool offsets_has_null = false; - auto column_offsets_ptr = column_array->get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_array.get_offsets_ptr())); + Defer defer_offsets {[&] { column_array.get_offsets_ptr() = std::move(column_offsets_ptr); }}; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offset_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); if (*n == 0) { @@ -1784,8 +1790,9 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo RETURN_IF_ERROR(_offset_iterator->_calculate_offsets(start, column_offsets)); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto column_items_ptr = column_array->get_data().assume_mutable(); if (num_items > 0) { + auto column_items_ptr = IColumn::mutate(std::move(column_array.get_data_ptr())); + Defer defer_items {[&] { column_array.get_data_ptr() = std::move(column_items_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual item data, fill with defaults column_items_ptr->insert_many_defaults(num_items); diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index ac6841a6cf2651..ba9d8ce58e3e47 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -383,7 +383,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const std::string& key) { } } -void SegmentWriter::_serialize_block_to_row_column(const Block& block) { +void SegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -392,14 +392,14 @@ void SegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, {_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()}); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -713,7 +713,7 @@ Status SegmentWriter::append_block(const Block* block, size_t row_pos, size_t nu // or it's schema change write(since column data type maybe changed, so we should reubild) if (_opts.write_type == DataWriteType::TYPE_DIRECT || _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { - _serialize_block_to_row_column(*block); + _serialize_block_to_row_column(*const_cast(block)); } if (_opts.rowset_ctx->write_type != DataWriteType::TYPE_COMPACTION && diff --git a/be/src/storage/segment/segment_writer.h b/be/src/storage/segment/segment_writer.h index 37b4e996448d76..0c88cb193134a2 100644 --- a/be/src/storage/segment/segment_writer.h +++ b/be/src/storage/segment/segment_writer.h @@ -182,7 +182,7 @@ class SegmentWriter { void set_min_max_key(const Slice& key); void set_min_key(const Slice& key); void set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _generate_primary_key_index( const std::vector& primary_key_coders, const std::vector& primary_key_columns, diff --git a/be/src/storage/segment/variant/binary_column_extract_iterator.h b/be/src/storage/segment/variant/binary_column_extract_iterator.h index 0e5632b9853400..8458955dab762c 100644 --- a/be/src/storage/segment/variant/binary_column_extract_iterator.h +++ b/be/src/storage/segment/variant/binary_column_extract_iterator.h @@ -154,8 +154,8 @@ class BinaryColumnExtractIterator : public BaseBinaryColumnProcessor { _sparse_column_cache->binary_column->get_ptr(), 0, _sparse_column_cache->binary_column->size()); var.incr_num_rows(_sparse_column_cache->binary_column->size()); - var.get_sparse_column()->assume_mutable()->resize(var.rows()); - var.get_doc_value_column()->assume_mutable()->resize(var.rows()); + var.get_sparse_column_mutable().resize(var.rows()); + var.get_doc_value_column_mutable().resize(var.rows()); ENABLE_CHECK_CONSISTENCY(&var); } diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp index 052f231b27e68c..41de18d846e28c 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp @@ -203,7 +203,7 @@ Status HierarchicalDataIterator::_process_sub_columns( ColumnVariant& container_variant, const PathsWithColumnAndType& non_nested_subcolumns) { for (const auto& entry : non_nested_subcolumns) { DCHECK(!entry.path.has_nested_part()); - bool add = container_variant.add_sub_column(entry.path, entry.column->assume_mutable(), + bool add = container_variant.add_sub_column(entry.path, IColumn::mutate(entry.column), entry.type); if (!add) { return Status::InternalError("Duplicated {}, type {}", entry.path.get_path(), @@ -225,7 +225,7 @@ Status HierarchicalDataIterator::_process_nested_columns( check_and_get_column(*remove_nullable(entry.second[0].column)); MutableColumnPtr nested_object = ColumnVariant::create(0, false, base_array->get_data().size()); - MutableColumnPtr offset = base_array->get_offsets_ptr()->assume_mutable(); + MutableColumnPtr offset = IColumn::mutate(base_array->get_offsets_ptr()); auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays for (const auto& subcolumn : entry.second) { @@ -246,7 +246,7 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.get_path(), subcolumn.type->get_name()); } #endif - MutableColumnPtr flattend_column = target_array->get_data_ptr()->assume_mutable(); + MutableColumnPtr flattend_column = IColumn::mutate(target_array->get_data_ptr()); DataTypePtr flattend_type = check_and_get_data_type(remove_nullable(type).get()) ->get_nested_type(); @@ -255,14 +255,18 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()), std::move(flattend_column), std::move(flattend_type)); } - nested_object = make_nullable(nested_object->get_ptr())->assume_mutable(); - auto array = - make_nullable(ColumnArray::create(std::move(nested_object), std::move(offset))); + const size_t nested_object_size = nested_object->size(); + nested_object = ColumnNullable::create(std::move(nested_object), + ColumnUInt8::create(nested_object_size, 0)); + auto array = ColumnArray::create(std::move(nested_object), std::move(offset)); + const size_t array_size = array->size(); + auto nullable_array = + ColumnNullable::create(std::move(array), ColumnUInt8::create(array_size, 0)); PathInDataBuilder builder; // add parent prefix builder.append(entry.first.get_parts(), false); PathInData parent_path = builder.build(); - container_variant.add_sub_column(parent_path, array->assume_mutable(), + container_variant.add_sub_column(parent_path, std::move(nullable_array), container_variant.NESTED_TYPE); } return Status::OK(); @@ -283,14 +287,17 @@ Status HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si // auto column = root_var.get_root(); // auto type = root_var.get_root_type(); - MutableColumnPtr column = _root_reader->column->get_ptr(); + MutableColumnPtr column = IColumn::mutate(_root_reader->column->get_ptr()); // container_variant.add_sub_column({}, std::move(column), _root_reader->type); DCHECK(column->size() == nrows); - auto nullable_column = make_nullable(column->get_ptr()); + if (!column->is_nullable()) { + const size_t column_size = column->size(); + column = ColumnNullable::create(std::move(column), ColumnUInt8::create(column_size, 0)); + } auto type = make_nullable(_root_reader->type); // make sure the root type is nullable container = ColumnVariant::create(max_subcolumns_count, enable_doc_mode, type, - nullable_column->assume_mutable()); + std::move(column)); } else { DataTypePtr root_type = std::make_shared(); auto column = ColumnNothing::create(nrows); @@ -359,10 +366,10 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container if (_path.get_parts().empty()) { if (_read_type == ReadType::SUBCOLUMNS_AND_SPARSE) { container_variant.set_sparse_column(_binary_column_reader->column->get_ptr()); - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } else if (_read_type == ReadType::DOC_VALUE_COLUMN) { container_variant.set_doc_value_column(_binary_column_reader->column->get_ptr()); - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { return Status::InternalError("Invalid read type {}", _read_type); } @@ -378,7 +385,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container const auto& src_values = assert_cast(src_map.get_values()); // Clear pre-initialized doc_value offsets (created by ColumnVariant ctor with num_rows) - container_variant.get_doc_value_column()->assume_mutable()->clear(); + container_variant.get_doc_value_column_mutable().clear(); auto [dst_paths, dst_values] = container_variant.get_doc_value_data_paths_and_values(); auto& dst_offsets = container_variant.serialized_doc_value_column_offsets(); @@ -419,13 +426,13 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } dst_offsets.push_back(dst_paths->size()); } - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { const auto& offsets = assert_cast(*_binary_column_reader->column).get_offsets(); /// Check if there is no data in shared data in current range. if (offsets.back() == offsets[-1]) { - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { // Read for variant sparse column // Example path: a.b @@ -444,8 +451,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast( - *container_variant.get_sparse_column()->assume_mutable()) + assert_cast(container_variant.get_sparse_column_mutable()) .get_offsets(); auto [sparse_data_paths, sparse_data_values] = container_variant.get_sparse_data_paths_and_values(); @@ -544,7 +550,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } } } - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } ENABLE_CHECK_CONSISTENCY(&container_variant); return Status::OK(); diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.h b/be/src/storage/segment/variant/hierarchical_data_iterator.h index 3e3816736a4851..cc7e3f7bd15f7a 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.h +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.h @@ -138,6 +138,7 @@ class HierarchicalDataIterator : public ColumnIterator { // process read template Status process_read(ReadFunction&& read_func, MutableColumnPtr& dst, size_t nrows) { + dst = IColumn::mutate(std::move(dst)); // // Read all sub columns, and merge with root column ColumnNullable* nullable_column = nullptr; if (dst->is_nullable()) { diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp b/be/src/storage/segment/variant/variant_column_reader.cpp index d41775581bf2e5..1ac88b2479ef16 100644 --- a/be/src/storage/segment/variant/variant_column_reader.cpp +++ b/be/src/storage/segment/variant/variant_column_reader.cpp @@ -1601,8 +1601,9 @@ static void fill_nested_with_defaults(MutableColumnPtr& dst, MutableColumnPtr& s } auto new_nested = dst_array->get_data_ptr()->clone_resized(sibling_array->get_data_ptr()->size()); - auto new_array = make_nullable(ColumnArray::create( - new_nested->assume_mutable(), sibling_array->get_offsets_ptr()->assume_mutable())); + ColumnPtr nested_column = std::move(new_nested); + auto new_array = + make_nullable(ColumnArray::create(nested_column, sibling_array->get_offsets_ptr())); dst->insert_range_from(*new_array, 0, new_array->size()); #ifndef NDEBUG if (!dst_array->has_equal_offsets(*sibling_array)) { diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp b/be/src/storage/segment/variant/variant_column_writer_impl.cpp index 8ad08640ba19bd..9a491aaa8f4f9f 100644 --- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp +++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp @@ -1220,8 +1220,14 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, DCHECK_EQ(ptr->get_root()->get_ptr()->size(), num_rows); converter->add_column_data_convertor(*_tablet_column); const uint8_t* nullmap = nullptr; - auto& nullable_column = assert_cast(*ptr->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + // get_root() already returns a MutableColumnPtr; store it to avoid dangling ref and + // to avoid calling assume_mutable() again (which would see use_count>1 and throw). + auto root_mut = ptr->get_root(); + auto& nullable_column = assert_cast(*root_mut); + // Use const access to get the nested column ptr without bumping use_count in the + // non-const chameleon_ptr path, then mutate() to get exclusive ownership. + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const bool has_root_ng = std::ranges::any_of(_nested_group_routing_plan.ng_only_prefixes, @@ -1233,13 +1239,15 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, // If the root variant is nullable, then update the root column null column with the outer null column. if (_tablet_column->is_nullable()) { // use outer null column as final null column + // Move root_column (exclusive) directly into create() to avoid sharing ownership. root_column = - ColumnNullable::create(root_column->get_ptr(), ColumnUInt8::create(*_null_column)); + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(*_null_column)); nullmap = _null_column->get_data().data(); } else { // Otherwise setting to all not null. - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + size_t col_size = root_column->size(); + root_column = + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(col_size, 0)); } // make sure the root_column is nullable RETURN_IF_ERROR(converter->set_source_content_with_specifid_column( diff --git a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp index 0dcf05e095e302..a05201be9af38f 100644 --- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp +++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp @@ -141,8 +141,10 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant auto expected_root_type = make_nullable(std::make_shared()); variant->ensure_root_node_type(expected_root_type); - auto& nullable_column = assert_cast(*variant->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + auto root_mut = variant->get_root(); + auto& nullable_column = assert_cast(*root_mut); + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const size_t num_rows = chunk_variant.rows(); variant_writer_helpers::maybe_remove_root_jsonb_with_empty_defaults( &root_column, num_rows, _streaming_plan.can_remove_root_jsonb()); @@ -157,10 +159,11 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant } else { null_column->insert_many_defaults(num_rows); } - root_column = ColumnNullable::create(root_column->get_ptr(), std::move(null_column)); + root_column = ColumnNullable::create(std::move(root_column), std::move(null_column)); } else { - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + const size_t root_column_size = root_column->size(); + root_column = ColumnNullable::create(std::move(root_column), + ColumnUInt8::create(root_column_size, 0)); } auto converter = std::make_unique(); diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 6203bf50b233de..67fff40faa15cd 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -91,6 +91,14 @@ inline std::string vertical_segment_writer_mem_tracker_name(uint32_t segment_id) return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id); } +static ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, @@ -356,7 +364,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con } } -void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { +void VerticalSegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -365,15 +373,15 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); std::unordered_set row_store_cids_set(_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, row_store_cids_set); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -759,10 +767,9 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_block_aggregator.convert_seq_column(const_cast(data.block), data.row_pos, data.num_rows, seq_column)); - std::vector* skip_bitmaps = &( - assert_cast( - data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) - ->get_data()); + auto* mutable_block = const_cast(data.block); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(mutable_block, skip_bitmap_col_idx)->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*data.block, data.row_pos + data.num_rows); DCHECK(delete_signs != nullptr); @@ -1003,7 +1010,7 @@ Status VerticalSegmentWriter::write_batch() { _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { for (auto& data : _batched_blocks) { // TODO: maybe we should pass range to this method - _serialize_block_to_row_column(*data.block); + _serialize_block_to_row_column(*const_cast(data.block)); } } diff --git a/be/src/storage/segment/vertical_segment_writer.h b/be/src/storage/segment/vertical_segment_writer.h index 5c0ec0930e522d..39235811c07880 100644 --- a/be/src/storage/segment/vertical_segment_writer.h +++ b/be/src/storage/segment/vertical_segment_writer.h @@ -158,7 +158,7 @@ class VerticalSegmentWriter { void _set_min_max_key(const Slice& key); void _set_min_key(const Slice& key); void _set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _probe_key_for_mow(std::string key, std::size_t segment_pos, bool have_input_seq_column, bool have_delete_sign, const std::vector& specified_rowsets, diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp index 6802092c74995e..7611bf12874264 100644 --- a/be/src/storage/tablet/base_tablet.cpp +++ b/be/src/storage/tablet/base_tablet.cpp @@ -851,9 +851,10 @@ Status BaseTablet::sort_block(Block& in_block, Block& output_block) { vec_row_comparator->set_block(&mutable_input_block); std::vector> row_in_blocks; - DCHECK(in_block.rows() <= std::numeric_limits::max()); - row_in_blocks.reserve(in_block.rows()); - for (size_t i = 0; i < in_block.rows(); ++i) { + const auto input_rows = mutable_input_block.rows(); + DCHECK(input_rows <= std::numeric_limits::max()); + row_in_blocks.reserve(input_rows); + for (size_t i = 0; i < input_rows; ++i) { row_in_blocks.emplace_back(std::make_unique(i)); } std::sort(row_in_blocks.begin(), row_in_blocks.end(), @@ -865,12 +866,15 @@ Status BaseTablet::sort_block(Block& in_block, Block& output_block) { return value < 0; }); std::vector row_pos_vec; - row_pos_vec.reserve(in_block.rows()); + row_pos_vec.reserve(input_rows); for (auto& block : row_in_blocks) { row_pos_vec.emplace_back(block->_row_pos); } - return mutable_output_block.add_rows(&in_block, row_pos_vec.data(), - row_pos_vec.data() + in_block.rows()); + in_block.set_columns(std::move(mutable_input_block.mutable_columns())); + RETURN_IF_ERROR(mutable_output_block.add_rows(&in_block, row_pos_vec.data(), + row_pos_vec.data() + input_rows)); + output_block.set_columns(std::move(mutable_output_block.mutable_columns())); + return Status::OK(); } // fetch value by row column diff --git a/be/src/util/jsonb/serialize.cpp b/be/src/util/jsonb/serialize.cpp index 0088c6249f0030..669747b7949941 100644 --- a/be/src/util/jsonb/serialize.cpp +++ b/be/src/util/jsonb/serialize.cpp @@ -102,9 +102,10 @@ Status JsonbSerializeUtil::jsonb_to_block( auto col_it = col_id_to_idx.find(it->getKeyId()); if (col_it != col_id_to_idx.end() && (include_cids.empty() || include_cids.contains(it->getKeyId()))) { - MutableColumnPtr dst_column = - dst.get_by_position(col_it->second).column->assume_mutable(); + auto dst_column = + IColumn::mutate(std::move(dst.get_by_position(col_it->second).column)); serdes[col_it->second]->read_one_cell_from_jsonb(*dst_column, it->value()); + dst.replace_by_position(col_it->second, std::move(dst_column)); ++filled_columns; } } @@ -112,20 +113,25 @@ Status JsonbSerializeUtil::jsonb_to_block( return Status::OK(); } auto fill_column = [&](Block& dst, int pos, size_t old_num_rows) { - MutableColumnPtr dst_column = dst.get_by_position(pos).column->assume_mutable(); + auto dst_column = IColumn::mutate(std::move(dst.get_by_position(pos).column)); if (dst_column->size() < old_num_rows + 1) { DCHECK(dst_column->size() == old_num_rows); + Status st = Status::OK(); if (default_values[pos].empty()) { dst_column->insert_default(); } else { Slice value(default_values[pos].data(), default_values[pos].size()); DataTypeSerDe::FormatOptions opt; opt.converted_from_string = true; - RETURN_IF_ERROR( - serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt)); + st = serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt); } + dst.replace_by_position(pos, std::move(dst_column)); + RETURN_IF_ERROR(st); + DCHECK(dst.get_by_position(pos).column->size() == num_rows + 1); + return Status::OK(); } DCHECK(dst_column->size() == num_rows + 1); + dst.replace_by_position(pos, std::move(dst_column)); return Status::OK(); }; // fill missing column @@ -145,4 +151,4 @@ Status JsonbSerializeUtil::jsonb_to_block( return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/core/block/column_nullable_test.cpp b/be/test/core/block/column_nullable_test.cpp index dc837c335b13d1..0b92d1813fb8fb 100644 --- a/be/test/core/block/column_nullable_test.cpp +++ b/be/test/core/block/column_nullable_test.cpp @@ -44,7 +44,7 @@ TEST(ColumnNullableTest, HashTest) { nullable_column->update_hash_with_value(0, hashes[1]); EXPECT_EQ(hashes[0].get64(), hashes[1].get64()); - auto& null_map = ((ColumnNullable)(*nullable_column)).get_null_map_data(); + auto& null_map = nullable_column->get_null_map_data(); null_map[1] = true; column->update_hash_with_value(1, hashes[0]); nullable_column->update_hash_with_value(1, hashes[1]); diff --git a/be/test/core/column/column_array_test.cpp b/be/test/core/column/column_array_test.cpp index e8c0bd4467898c..e00bae393856c0 100644 --- a/be/test/core/column/column_array_test.cpp +++ b/be/test/core/column/column_array_test.cpp @@ -688,8 +688,7 @@ TEST_F(ColumnArrayTest, ConvertIfOverflowAndInsertTest) { // check ptr is itself auto ptr = column->convert_column_if_overflow(); EXPECT_EQ(ptr.get(), column.get()); - auto arr_col = - check_and_get_column(remove_nullable(column->assume_mutable()).get()); + auto arr_col = check_and_get_column(remove_nullable(column->get_ptr()).get()); auto nested_col = arr_col->get_data_ptr(); auto array_col1 = check_and_get_column(remove_nullable(ptr).get()); auto nested_col1 = array_col1->get_data_ptr(); diff --git a/be/test/core/column/column_ip_test.cpp b/be/test/core/column/column_ip_test.cpp index fc03446e45503b..05cf6034ed37e5 100644 --- a/be/test/core/column/column_ip_test.cpp +++ b/be/test/core/column/column_ip_test.cpp @@ -77,32 +77,32 @@ class ColumnIPTest : public CommonColumnTest { TEST_F(ColumnIPTest, InsertRangeFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_range_from_callback); } TEST_F(ColumnIPTest, InsertManyFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_from_callback); } TEST_F(ColumnIPTest, InsertIndicesFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_indices_from_callback); } TEST_F(ColumnIPTest, InsertDefaultTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); // ipv4 default value is '0.0.0.0' and ipv6 default value is '::' check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_default_callback); } @@ -110,31 +110,31 @@ TEST_F(ColumnIPTest, InsertDefaultTest) { TEST_F(ColumnIPTest, InsertManyDefaultsTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_defaults_callback); } TEST_F(ColumnIPTest, GetDataAtTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_get_data_at_callback); } TEST_F(ColumnIPTest, FieldTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_field_callback); } TEST_F(ColumnIPTest, GetRawDataTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, {serde[1]}, ';', {2}, data_files[0], assert_get_raw_data_callback); } @@ -142,8 +142,8 @@ TEST_F(ColumnIPTest, GetRawDataTest) { TEST_F(ColumnIPTest, SerDeVecTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deser_vec(ip_cols, {dt_ipv4, dt_ipv6}); } @@ -151,8 +151,8 @@ TEST_F(ColumnIPTest, SerDeVecTest) { TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deserialize_with_arena_impl(ip_cols, {dt_ipv4, dt_ipv6}); @@ -161,16 +161,16 @@ TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { TEST_F(ColumnIPTest, SizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_size_callback); } TEST_F(ColumnIPTest, ByteSizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_byte_size_callback); } @@ -178,8 +178,8 @@ TEST_F(ColumnIPTest, ByteSizeTest) { TEST_F(ColumnIPTest, AllocateBytesTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_allocated_bytes_callback); } @@ -187,8 +187,8 @@ TEST_F(ColumnIPTest, AllocateBytesTest) { TEST_F(ColumnIPTest, PopbackTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_pop_back_callback); } @@ -197,18 +197,18 @@ TEST_F(ColumnIPTest, CloneTest) { // we test the column with clone_resize, clone_empty for assert size and ptr // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); - assert_clone_empty(column_ipv4->assume_mutable_ref()); - assert_clone_empty(column_ipv6->assume_mutable_ref()); + assert_clone_empty(ip_cols[0]->assume_mutable_ref()); + assert_clone_empty(ip_cols[1]->assume_mutable_ref()); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_clone_resized_callback); } TEST_F(ColumnIPTest, CutTest) { MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_cut_callback); } @@ -216,24 +216,24 @@ TEST_F(ColumnIPTest, CutTest) { TEST_F(ColumnIPTest, ResizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_resize_callback); } TEST_F(ColumnIPTest, ReserveTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_reserve_callback); } TEST_F(ColumnIPTest, ReplaceColumnTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // replace_column_data @@ -246,26 +246,26 @@ TEST_F(ColumnIPTest, ReplaceColumnTest) { TEST_F(ColumnIPTest, AppendDataBySelectorTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_append_data_by_selector_callback); } TEST_F(ColumnIPTest, PermutationAndSortTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[1], ';', {1, 2}); - assert_column_permutations(column_ipv4->assume_mutable_ref(), dt_ipv4); - assert_column_permutations(column_ipv6->assume_mutable_ref(), dt_ipv6); + assert_column_permutations(ip_cols[0]->assume_mutable_ref(), dt_ipv4); + assert_column_permutations(ip_cols[1]->assume_mutable_ref(), dt_ipv6); } TEST_F(ColumnIPTest, FilterTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_filter_callback); } @@ -274,8 +274,8 @@ TEST_F(ColumnIPTest, HashTest) { // XXHash // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // update_hashes_with_value diff --git a/be/test/core/column/column_variant_test.cpp b/be/test/core/column/column_variant_test.cpp index ef99a57a257e1c..c5141fe697a0c7 100644 --- a/be/test/core/column/column_variant_test.cpp +++ b/be/test/core/column/column_variant_test.cpp @@ -1178,9 +1178,11 @@ TEST_F(ColumnVariantTest, field_test) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); test_func(obj); } @@ -2122,13 +2124,16 @@ TEST_F(ColumnVariantTest, fill_path_column_from_sparse_data) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); auto sparse_col = obj->get_sparse_column(); auto cloned_sparse = sparse_col->clone_empty(); - auto& offsets = obj->serialized_sparse_column_offsets(); + const auto& offsets = + static_cast(*obj).serialized_sparse_column_offsets(); for (size_t i = 0; i != offsets.size(); ++i) { auto start = offsets[i - 1]; auto end = offsets[i]; diff --git a/be/test/core/column/common_column_test.h b/be/test/core/column/common_column_test.h index ac4ed5eff76582..fe0ecf051d0140 100644 --- a/be/test/core/column/common_column_test.h +++ b/be/test/core/column/common_column_test.h @@ -634,11 +634,15 @@ class CommonColumnTest : public ::testing::Test { Block block; for (size_t i = 0; i < load_cols.size(); ++i) { ColumnWithTypeAndName columnTypeAndName; - columnTypeAndName.column = load_cols[i]->assume_mutable(); + columnTypeAndName.column = load_cols[i]->get_ptr(); columnTypeAndName.type = types[i]; block.insert(columnTypeAndName); } MutableBlock mb = MutableBlock::build_mutable_block(&block); + // Rebuild block from load_cols after build_mutable_block stole the column pointers + for (size_t i = 0; i < load_cols.size(); ++i) { + block.get_by_position(i).column = load_cols[i]->get_ptr(); + } // step2. to construct a block for assert_cols Block assert_block; Block empty_block; @@ -691,7 +695,9 @@ class CommonColumnTest : public ::testing::Test { continue; } else if (*pos + *cl > source_column->size()) { if (is_column( - remove_nullable(source_column->assume_mutable()).get())) { + remove_nullable(static_cast(source_column.get()) + ->get_ptr()) + .get())) { // insert_range_from in array has DCHECK_LG continue; } @@ -3544,13 +3550,13 @@ auto assert_column_vector_serialize_vec_callback = [](auto x, if (test_null_map) { cloned_target_column->serialize(input_keys.data(), rows); deser_column_wrapper = cloned_target_column->clone_empty(); - deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { target_column->serialize(input_keys.data(), rows); deser_column = source_column->clone_empty(); } if (test_null_map) { deser_column_wrapper->deserialize(input_keys.data(), rows); + deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { deser_column->deserialize(input_keys.data(), rows); } diff --git a/be/test/core/data_type/common_data_type_serder_test.h b/be/test/core/data_type/common_data_type_serder_test.h index d968cc1213e92d..a7393b9d8eee0c 100644 --- a/be/test/core/data_type/common_data_type_serder_test.h +++ b/be/test/core/data_type/common_data_type_serder_test.h @@ -277,7 +277,7 @@ class CommonDataTypeSerdeTest : public ::testing::Test { jsonb_column->reserve(load_cols[0]->size()); MutableColumns assert_cols; for (size_t i = 0; i < load_cols.size(); ++i) { - assert_cols.push_back(load_cols[i]->assume_mutable()); + assert_cols.push_back(load_cols[i]->clone_empty()); } DataTypeSerDe::FormatOptions options; auto tz = cctz::utc_time_zone(); diff --git a/be/test/core/data_type/complex_type_test.cpp b/be/test/core/data_type/complex_type_test.cpp index 54dc360e2a8fa1..9d3baa87edecd0 100644 --- a/be/test/core/data_type/complex_type_test.cpp +++ b/be/test/core/data_type/complex_type_test.cpp @@ -20,8 +20,16 @@ #include #include +#include +#include "agent/be_exec_version_manager.h" +#include "core/assert_cast.h" #include "core/column/column.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_map.h" @@ -34,6 +42,25 @@ namespace doris { +namespace { + +std::vector serialize_column(const DataTypePtr& type, const ColumnPtr& column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + std::vector buf(type->get_uncompressed_serialized_bytes(*column, be_exec_version)); + char* end = type->serialize(*column, buf.data(), be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); + return buf; +} + +void deserialize_column(const DataTypePtr& type, const std::vector& buf, + MutableColumnPtr* column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + const char* end = type->deserialize(buf.data(), column, be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); +} + +} // namespace + TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { DataTypePtr n1 = std::make_shared(std::make_shared()); DataTypePtr n2 = std::make_shared(std::make_shared()); @@ -70,4 +97,92 @@ TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { col_a->get(0, af); EXPECT_EQ(PrimitiveType::TYPE_ARRAY, af.get_type()); } + +TEST(ComplexTypeTest, DeserializeArrayWritesBackSharedNestedColumn) { + DataTypePtr nested_type = std::make_shared(); + DataTypePtr array_type = std::make_shared(nested_type); + + auto src_column = array_type->create_column(); + src_column->insert(Field::create_field( + Array {Field::create_field(1), Field::create_field(2)})); + src_column->insert(Field::create_field(Array {Field::create_field(3)})); + auto buf = serialize_column(array_type, src_column->get_ptr()); + + ColumnPtr shared_nested_column = ColumnInt32::create(); + MutableColumnPtr dst_column = ColumnArray::create(shared_nested_column); + deserialize_column(array_type, buf, &dst_column); + + const auto& array_column = assert_cast(*dst_column); + EXPECT_EQ(2, array_column.size()); + EXPECT_EQ(0, shared_nested_column->size()); + EXPECT_EQ(3, array_column.get_data().size()); + EXPECT_EQ(2, array_column.get_offsets()[0]); + EXPECT_EQ(3, array_column.get_offsets()[1]); + + const auto& data = assert_cast(array_column.get_data()).get_data(); + EXPECT_EQ(1, data[0]); + EXPECT_EQ(2, data[1]); + EXPECT_EQ(3, data[2]); +} + +TEST(ComplexTypeTest, DeserializeMapWritesBackSharedKeyAndValueColumns) { + DataTypePtr key_type = std::make_shared(); + DataTypePtr value_type = std::make_shared(); + DataTypePtr map_type = std::make_shared(key_type, value_type); + + auto src_column = map_type->create_column(); + Map map; + map.push_back(Field::create_field( + Array {Field::create_field(10), Field::create_field(20)})); + map.push_back(Field::create_field( + Array {Field::create_field("a"), Field::create_field("b")})); + src_column->insert(Field::create_field(map)); + auto buf = serialize_column(map_type, src_column->get_ptr()); + + ColumnPtr shared_keys_column = ColumnInt32::create(); + ColumnPtr shared_values_column = ColumnString::create(); + ColumnPtr offsets_column = ColumnArray::ColumnOffsets::create(); + MutableColumnPtr dst_column = + ColumnMap::create(shared_keys_column, shared_values_column, offsets_column); + deserialize_column(map_type, buf, &dst_column); + + const auto& map_column = assert_cast(*dst_column); + EXPECT_EQ(1, map_column.size()); + EXPECT_EQ(0, shared_keys_column->size()); + EXPECT_EQ(0, shared_values_column->size()); + EXPECT_EQ(2, map_column.get_keys().size()); + EXPECT_EQ(2, map_column.get_values().size()); + + const auto& keys = assert_cast(map_column.get_keys()).get_data(); + EXPECT_EQ(10, keys[0]); + EXPECT_EQ(20, keys[1]); + EXPECT_EQ("a", map_column.get_values().get_data_at(0).to_string()); + EXPECT_EQ("b", map_column.get_values().get_data_at(1).to_string()); +} + +TEST(ComplexTypeTest, DeserializeStructWritesBackSharedChildren) { + DataTypes children_types {std::make_shared(), + std::make_shared()}; + DataTypePtr struct_type = std::make_shared(children_types); + + auto src_column = struct_type->create_column(); + src_column->insert(Field::create_field( + Tuple {Field::create_field(7), Field::create_field("seven")})); + auto buf = serialize_column(struct_type, src_column->get_ptr()); + + ColumnPtr shared_int_column = ColumnInt32::create(); + ColumnPtr shared_string_column = ColumnString::create(); + Columns shared_columns {shared_int_column, shared_string_column}; + MutableColumnPtr dst_column = ColumnStruct::create(shared_columns); + deserialize_column(struct_type, buf, &dst_column); + + const auto& struct_column = assert_cast(*dst_column); + EXPECT_EQ(1, struct_column.size()); + EXPECT_EQ(0, shared_int_column->size()); + EXPECT_EQ(0, shared_string_column->size()); + + const auto& ints = assert_cast(struct_column.get_column(0)).get_data(); + EXPECT_EQ(7, ints[0]); + EXPECT_EQ("seven", struct_column.get_column(1).get_data_at(0).to_string()); +} } // namespace doris diff --git a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp index 0478507cab0844..84bce05751a061 100644 --- a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp @@ -512,8 +512,9 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto struct_col = static_cast( - static_cast(*col.get()).get_nested_column()); + // Use const access for read-only assertions: avoids assume_mutable_ref() on sub-columns. + const auto& struct_col = static_cast( + static_cast(*col.get()).get_nested_column()); EXPECT_EQ(struct_col.get_column(0).get_data_at(0).to_string(), "false"); EXPECT_EQ(struct_col.get_column(1).get_data_at(0).to_string(), "example"); @@ -537,11 +538,11 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto array_col = static_cast( - static_cast(*col.get()).get_nested_column()); + const auto& array_col = static_cast( + static_cast(*col.get()).get_nested_column()); - auto string_col = static_cast( - static_cast(array_col.get_data()).get_nested_column()); + const auto& string_col = static_cast( + static_cast(array_col.get_data()).get_nested_column()); EXPECT_EQ(string_col.get_data_at(0).to_string(), "1\003example"); EXPECT_EQ(string_col.get_data_at(1).to_string(), "2\003test"); } diff --git a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp index e583b50e4302f2..5158ab01c75f12 100644 --- a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp @@ -144,10 +144,9 @@ TEST_F(DataTypeStructSerDeTest, ArrowMemNotAligned) { EXPECT_EQ(string_values_address % 4, 1); // 5.Test read_column_from_arrow - std::vector vector_columns; - vector_columns.emplace_back(ColumnInt32::create()); - vector_columns.emplace_back(ColumnString::create()); - auto ser_col = ColumnStruct::create(vector_columns); + // Create sub-columns exclusively (no extra refs) so that ColumnStruct::get_column() + // non-const path does not find use_count > 1. + auto ser_col = ColumnStruct::create(Columns {ColumnInt32::create(), ColumnString::create()}); cctz::time_zone tz; DataTypeSerDeSPtrs elem_serdes = {serde_int32, serde_str}; Strings field_names = {"int_field", "string_field"}; diff --git a/be/test/exec/column_type_convert_test.cpp b/be/test/exec/column_type_convert_test.cpp index 5178cddbd59d2f..f336a245568cbd 100644 --- a/be/test/exec/column_type_convert_test.cpp +++ b/be/test/exec/column_type_convert_test.cpp @@ -63,8 +63,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -96,8 +95,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -130,8 +128,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -160,8 +157,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max() + 1); src_data.push_back(std::numeric_limits::min() - 1); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(!st.ok()); @@ -189,8 +185,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back((1L << 23) - 1); src_data.push_back(1L << 23); src_data.push_back((1L << 23) + 1); - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -232,8 +227,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_col->insert_data("invalid", 7); // Invalid string src_col->insert_data("", 0); // Empty string - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -289,8 +283,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back(-std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -325,8 +318,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-12345)); // -123.45 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -354,8 +346,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-67890)); // -678.90 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -385,8 +376,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(12345678901234)); // Normal value: 1234567890.1234 src_data.push_back(Decimal64(-98765432109876)); // Negative value: -9876543210.9876 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); @@ -419,8 +409,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(-12345)); // -123.45 src_data.push_back(Decimal32(23345)); // Too large 233.45 - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -458,8 +447,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal128V3(-102345)); src_data.push_back(Decimal128V3(203345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -499,8 +487,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal256(655363345)); src_data.push_back(Decimal256(3333333333332345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -539,8 +526,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -569,8 +555,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -598,8 +583,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -628,8 +612,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -667,8 +650,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 src_data.push_back(Decimal64(-1000000000)); // Out of range (underflow) - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -698,9 +680,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(999999999)); // Edge case: max for Decimal32 src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 ASSERT_EQ(3, src_data.size()); - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -743,9 +724,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(std::numeric_limits::infinity()); // Infinity src_data.push_back(std::numeric_limits::quiet_NaN()); // NaN - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -791,9 +771,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_col->insert_data("0.0", 3); // Zero value src_col->insert_data("9999999999.99", 13); // Edge case: max valid value within precision - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -837,8 +816,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::min()); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -869,8 +847,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -901,8 +878,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("not a number", 11); src_col->insert_data("2147483648", 10); // Greater than INT32_MAX - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -935,9 +911,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(Decimal32(-67890)); // -678.90 src_data.push_back(Decimal32(0)); // Zero - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -972,9 +947,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { value.unchecked_set_time(2070, 1, 1, 0, 0, 0); src_data.push_back(*reinterpret_cast(&value)); // "2070-01-01" in days format - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1005,8 +979,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(1); // true src_data.push_back(0); // false - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1041,8 +1014,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("1.5", 3); // Hive: null (not an integer) src_col->insert_data("", 0); // Hive: null - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast( @@ -1090,8 +1062,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("abc", 3); // Invalid - should be NULL src_col->insert_data("", 0); // Empty - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1137,8 +1108,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("-32769", 6); // Underflow - should be NULL src_col->insert_data("123.45", 6); // Decimal - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1182,8 +1152,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("1000000", 7); // Million src_col->insert_data("2147483648", 10); // Overflow - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1227,8 +1196,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("9223372036854775808", 19); // Overflow - should be NULL src_col->insert_data("123abc", 6); // Invalid - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1270,8 +1238,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("0", 1); // Zero src_col->insert_data("123e45", 6); // Scientific notation - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1460,8 +1427,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 2024-01-01 00:00:00.123456 auto src_col = make_datetimev2_col({{2024, 1, 1, 0, 0, 0, 123456}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1484,8 +1450,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 1970-01-01 00:00:00.000000 // 3000-01-01 00:00:00.000000 auto src_col = make_datetimev2_col({{1970, 1, 1, 0, 0, 0, 0}, {3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1512,8 +1477,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 3000-01-01 00:00:00.000000(会溢出int32) auto src_col = make_datetimev2_col({{3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_FALSE(st.ok()); @@ -1545,8 +1509,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 13:00:00")); src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 14:00:00")); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1715,10 +1678,9 @@ TEST_F(ColumnTypeConverterTest, TestEmptyColumnConversions) { ASSERT_FALSE(converter->is_consistent()); auto src_col = ColumnInt32::create(); // Empty column (no data) - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); src_col->resize(0); - dst_col->resize(0); + mutable_dst->resize(0); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); diff --git a/be/test/exec/common/schema_util_rowset_test.cpp b/be/test/exec/common/schema_util_rowset_test.cpp index cf99c9824956c5..18bc77ccb3f883 100644 --- a/be/test/exec/common/schema_util_rowset_test.cpp +++ b/be/test/exec/common/schema_util_rowset_test.cpp @@ -172,6 +172,7 @@ static void fill_block_with_test_data(Block* block, int size) { auto v4 = Field::create_field(i); columns[4]->insert(v4); } + block->set_columns(std::move(columns)); } static int64_t inc_id = 1000; static RowsetWriterContext rowset_writer_context(const std::unique_ptr& data_dir, diff --git a/be/test/exec/common/schema_util_test.cpp b/be/test/exec/common/schema_util_test.cpp index 63ba645272282c..262e08eaf030ff 100644 --- a/be/test/exec/common/schema_util_test.cpp +++ b/be/test/exec/common/schema_util_test.cpp @@ -817,7 +817,8 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) { // Test casting from variant to variant auto variant_column = ColumnVariant::create(10, false); - variant_column->create_root(nullable_array_type, nullable_array_column->assume_mutable()); + // nullable_array_column is also stored in array_col.column (use_count=2), so mutate() clones it. + variant_column->create_root(nullable_array_type, IColumn::mutate(nullable_array_column)); ColumnWithTypeAndName variant_col; variant_col.type = variant_type; @@ -1955,14 +1956,14 @@ TEST_F(SchemaUtilTest, parse_and_materialize_variant_columns_ambiguous_paths) { // Prepare the variant column with the string column as root ColumnVariant::Subcolumns dynamic_subcolumns; dynamic_subcolumns.create_root( - ColumnVariant::Subcolumn(string_col->assume_mutable(), string_type, true)); + ColumnVariant::Subcolumn(std::move(string_col), string_type, true)); auto variant_col = ColumnVariant::create(0, false, std::move(dynamic_subcolumns)); auto variant_type = std::make_shared(); // Construct the block Block block; - block.insert(ColumnWithTypeAndName(variant_col->assume_mutable(), variant_type, "v")); + block.insert(ColumnWithTypeAndName(std::move(variant_col), variant_type, "v")); // The variant column is at index 0 std::vector variant_pos = {0}; diff --git a/be/test/exec/connector/vjdbc_connector_test.cpp b/be/test/exec/connector/vjdbc_connector_test.cpp index 16ff8689aafaf2..5ec3fb7046a5a9 100644 --- a/be/test/exec/connector/vjdbc_connector_test.cpp +++ b/be/test/exec/connector/vjdbc_connector_test.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include @@ -33,26 +34,55 @@ class JdbcUtilsTest : public ::testing::Test { void SetUp() override { // Save original config and environment original_jdbc_drivers_dir_ = config::jdbc_drivers_dir; - original_doris_home_ = getenv("DORIS_HOME"); + const char* original_doris_home = getenv("DORIS_HOME"); + if (original_doris_home != nullptr) { + original_doris_home_ = original_doris_home; + has_original_doris_home_ = true; + } // Set DORIS_HOME for testing - setenv("DORIS_HOME", "/tmp/test_doris", 1); + temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_" + std::to_string(::getpid())); + second_temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_second_" + std::to_string(::getpid())); + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + std::filesystem::create_directories(temp_home_); + setenv("DORIS_HOME", temp_home_.c_str(), 1); } void TearDown() override { // Restore original config and environment config::jdbc_drivers_dir = original_jdbc_drivers_dir_; - if (original_doris_home_) { - setenv("DORIS_HOME", original_doris_home_, 1); + if (has_original_doris_home_) { + setenv("DORIS_HOME", original_doris_home_.c_str(), 1); } else { unsetenv("DORIS_HOME"); } + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + } + + std::string default_driver_dir() const { + return (temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string old_driver_dir() const { return (temp_home_ / "jdbc_drivers").string(); } + + std::string second_default_driver_dir() const { + return (second_temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string second_old_driver_dir() const { + return (second_temp_home_ / "jdbc_drivers").string(); } -private: std::string original_jdbc_drivers_dir_; - const char* original_doris_home_ = nullptr; + std::string original_doris_home_; + bool has_original_doris_home_ = false; + std::filesystem::path temp_home_; + std::filesystem::path second_temp_home_; }; // Test resolve_driver_url with absolute URLs @@ -79,10 +109,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { std::string result_url; // Set config to default value to trigger the default directory logic - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; // Create directory and file @@ -104,10 +134,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { // Test resolve_driver_url with default directory TEST_F(JdbcUtilsTest, TestResolveWithDefaultConfig) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; std::filesystem::create_directories(dir); @@ -138,9 +168,9 @@ TEST_F(JdbcUtilsTest, TestResolveWithCustomConfig) { } TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/existing-driver.jar"; std::filesystem::create_directories(dir); @@ -160,10 +190,10 @@ TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { } TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create only the old directory and file (not the new one) - std::string old_dir = "/tmp/test_doris/jdbc_drivers"; + std::string old_dir = old_driver_dir(); std::string file_path = old_dir + "/fallback-driver.jar"; std::filesystem::create_directories(old_dir); @@ -183,10 +213,11 @@ TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { } TEST_F(JdbcUtilsTest, TestPathConstruction) { - setenv("DORIS_HOME", "/tmp/test_doris2", 1); - config::jdbc_drivers_dir = "/tmp/test_doris2/plugins/jdbc_drivers"; + std::filesystem::create_directories(second_temp_home_); + setenv("DORIS_HOME", second_temp_home_.c_str(), 1); + config::jdbc_drivers_dir = second_default_driver_dir(); - std::string old_dir = "/tmp/test_doris2/jdbc_drivers"; + std::string old_dir = second_old_driver_dir(); std::string file_path = old_dir + "/test.jar"; std::filesystem::create_directories(old_dir); @@ -223,9 +254,9 @@ TEST_F(JdbcUtilsTest, TestEdgeCases) { } TEST_F(JdbcUtilsTest, TestMultipleCallsConsistency) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/same-driver.jar"; std::filesystem::create_directories(dir); diff --git a/be/test/exprs/aggregate/agg_array_agg_test.cpp b/be/test/exprs/aggregate/agg_array_agg_test.cpp index 101328496df850..6b27a2b55b03fd 100644 --- a/be/test/exprs/aggregate/agg_array_agg_test.cpp +++ b/be/test/exprs/aggregate/agg_array_agg_test.cpp @@ -42,6 +42,7 @@ #include "exprs/aggregate/agg_function_test.h" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" +#include "exprs/aggregate/aggregate_function_sort.h" #include "gtest/gtest_pred_impl.h" namespace doris { @@ -192,4 +193,32 @@ TEST_F(AggregateFunctionArrayAggTest, test_array_agg_aint64_foreach) { ColumnWithTypeAndName(std::move(array_array_column), array_array_data_type, "column")); } +TEST(AggregateFunctionSortDataTest, merge_does_not_share_rhs_block) { + auto data_type = std::make_shared(); + Block prototype({ColumnWithTypeAndName(data_type->create_column(), data_type, "value"), + ColumnWithTypeAndName(data_type->create_column(), data_type, "sort_key")}); + SortDescription sort_desc {SortColumnDescription(1, 1, 1)}; + + AggregateFunctionSortData lhs(sort_desc, prototype); + AggregateFunctionSortData rhs1(sort_desc, prototype); + AggregateFunctionSortData rhs2(sort_desc, prototype); + + auto values = ColumnInt64::create(); + values->insert_value(10); + values->insert_value(20); + auto sort_keys = ColumnInt64::create(); + sort_keys->insert_value(2); + sort_keys->insert_value(1); + const IColumn* row0[] = {values.get(), sort_keys.get()}; + const IColumn* row1[] = {values.get(), sort_keys.get()}; + + rhs1.add(row0, 2, 0); + rhs2.add(row1, 2, 1); + + lhs.merge(rhs1); + ASSERT_NO_THROW(lhs.merge(rhs2)); + ASSERT_EQ(lhs.block.rows(), 2); + ASSERT_EQ(rhs1.block.rows(), 1); +} + } // namespace doris diff --git a/be/test/exprs/function/function_variant_element_test.cpp b/be/test/exprs/function/function_variant_element_test.cpp index d4d413a601aa45..19f85217c6fa37 100644 --- a/be/test/exprs/function/function_variant_element_test.cpp +++ b/be/test/exprs/function/function_variant_element_test.cpp @@ -40,7 +40,7 @@ TEST(function_variant_element_test, extract_from_sparse_column) { sparse_column_offsets.push_back(sparse_column_keys->size()); variant_ptr->get_subcolumn({})->insert_default(); variant_ptr->set_num_rows(1); - variant_ptr->get_doc_value_column()->assume_mutable()->resize(1); + variant_ptr->get_doc_value_column_mutable().resize(1); ColumnPtr result; ColumnPtr index_column_ptr = ColumnString::create(); @@ -61,4 +61,4 @@ TEST(function_variant_element_test, extract_from_sparse_column) { EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}"); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/native/native_reader_writer_test.cpp b/be/test/format/native/native_reader_writer_test.cpp index 5d1d7dc207cef7..cf568354925b17 100644 --- a/be/test/format/native/native_reader_writer_test.cpp +++ b/be/test/format/native/native_reader_writer_test.cpp @@ -530,6 +530,7 @@ TEST_F(NativeReaderWriterTest, round_trip_native_file_large_rows) { MutableBlock merged_mutable(&merged_block); Status add_st = merged_mutable.add_rows(&dst_block, 0, read_rows); ASSERT_TRUE(add_st.ok()) << add_st; + merged_block.set_columns(std::move(merged_mutable.mutable_columns())); total_read_rows += read_rows; } } diff --git a/be/test/format/orc/orc_reader_fill_data_test.cpp b/be/test/format/orc/orc_reader_fill_data_test.cpp index 12c1dd209c585b..4fdc44b36bbebb 100644 --- a/be/test/format/orc/orc_reader_fill_data_test.cpp +++ b/be/test/format/orc/orc_reader_fill_data_test.cpp @@ -19,6 +19,7 @@ #include +#include "core/assert_cast.h" #include "core/column/column_array.h" #include "core/column/column_struct.h" #include "core/data_type/data_type_array.h" @@ -124,6 +125,43 @@ TEST_F(OrcReaderFillDataTest, TestFillLongColumnWithNull) { } } +TEST_F(OrcReaderFillDataTest, SchemaChangeNullableNullMapUsesAppendedSlice) { + std::vector values = {10, 20, 30}; + std::vector nulls = {true, false, true}; + auto batch = create_long_batch(values.size(), values, nulls); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); + + auto nested_column = ColumnFloat64::create(); + nested_column->insert_value(1); + nested_column->insert_value(2); + auto null_map_column = ColumnUInt8::create(); + null_map_column->insert_value(0); + null_map_column->insert_value(0); + ColumnPtr doris_column = + ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + auto data_type = make_nullable(std::make_shared()); + + TFileScanRangeParams params; + TFileRangeDesc range; + auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true); + + Status status = reader->_orc_column_to_doris_column( + "test_schema_change_nullable", doris_column, data_type, const_node, orc_type_ptr.get(), + batch.get(), values.size()); + + ASSERT_TRUE(status.ok()) << status.to_string(); + const auto* nullable_column = assert_cast(doris_column.get()); + ASSERT_EQ(nullable_column->size(), 5); + + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { // Array类型测试 { @@ -478,4 +516,4 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { "+-------------------+\n"); } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/parquet/parquet_column_convert_test.cpp b/be/test/format/parquet/parquet_column_convert_test.cpp index e50d28ef0e7930..64b821b694013f 100644 --- a/be/test/format/parquet/parquet_column_convert_test.cpp +++ b/be/test/format/parquet/parquet_column_convert_test.cpp @@ -22,6 +22,9 @@ #include #include +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" #include "util/timezone_utils.h" namespace doris::parquet { @@ -119,4 +122,39 @@ TEST(ParquetColumnConvertTest, LookupPathMatchesOriginal) { } } +TEST(ParquetColumnConvertTest, AlignNullMapUsesAppendedSourceSlice) { + auto dst_nested_column = ColumnFloat64::create(); + dst_nested_column->insert_value(1); + dst_nested_column->insert_value(2); + auto dst_null_map_column = ColumnUInt8::create(); + dst_null_map_column->insert_value(0); + dst_null_map_column->insert_value(0); + ColumnPtr dst_column = + ColumnNullable::create(std::move(dst_nested_column), std::move(dst_null_map_column)); + + auto src_nested_column = ColumnInt64::create(); + for (int i = 0; i < 5; ++i) { + src_nested_column->insert_value(i); + } + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + align_null_map(src_column, dst_column, 2, 3, 2); + + const auto* nullable_column = assert_cast(dst_column.get()); + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + } // namespace doris::parquet diff --git a/be/test/format/parquet/parquet_thrift_test.cpp b/be/test/format/parquet/parquet_thrift_test.cpp index 2253b6c12cce5f..56d8c9c7b1d36a 100644 --- a/be/test/format/parquet/parquet_thrift_test.cpp +++ b/be/test/format/parquet/parquet_thrift_test.cpp @@ -160,8 +160,8 @@ TEST_F(ParquetThriftReaderTest, complex_nested_file) { static int fill_nullable_column(ColumnPtr& doris_column, level_t* definitions, size_t num_values) { CHECK(doris_column->is_nullable()); - auto* nullable_column = - const_cast(static_cast(doris_column.get())); + doris_column = IColumn::mutate(std::move(doris_column)); + auto* nullable_column = assert_cast(doris_column->assume_mutable().get()); NullMap& map_data = nullable_column->get_null_map_data(); int null_cnt = 0; for (int i = 0; i < num_values; ++i) { @@ -192,6 +192,9 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column ColumnPtr src_column = _converter->get_physical_column( field_schema->physical_type, field_schema->data_type, doris_column, data_type, false); + if (_converter->read_directly_into_dst_logical_column()) { + src_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); io::BufferedFileStreamReader stream_reader(file_reader, start_offset, chunk_size, 1024); @@ -216,10 +219,10 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column if (src_column->is_nullable()) { // fill nullable values fill_nullable_column(src_column, definitions, rows); - auto* nullable_column = - const_cast(static_cast(src_column.get())); + auto* nullable_column = assert_cast(src_column->assume_mutable().get()); data_column = nullable_column->get_nested_column_ptr(); } else { + src_column = IColumn::mutate(std::move(src_column)); data_column = src_column->assume_mutable(); } FilterMap filter_map; diff --git a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp index bbfe9e18a35830..c8a40194ff0803 100644 --- a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp +++ b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp @@ -214,7 +214,8 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriterClusterKey : public ::testing::Test { diff --git a/be/test/load/delta_writer/delta_writer_test.cpp b/be/test/load/delta_writer/delta_writer_test.cpp index 08cd0f7c7e579a..0ce52ceea706eb 100644 --- a/be/test/load/delta_writer/delta_writer_test.cpp +++ b/be/test/load/delta_writer/delta_writer_test.cpp @@ -461,15 +461,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriter : public ::testing::Test { @@ -670,6 +671,7 @@ TEST_F(TestDeltaWriter, vec_write) { date_v2_int = date_v2.to_date_int_val(); columns[21]->insert_data((const char*)&date_v2_int, sizeof(date_v2_int)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/load/memtable/memtable_memory_limiter_test.cpp b/be/test/load/memtable/memtable_memory_limiter_test.cpp index 1d5c1238335346..f3566448a1f29b 100644 --- a/be/test/load/memtable/memtable_memory_limiter_test.cpp +++ b/be/test/load/memtable/memtable_memory_limiter_test.cpp @@ -165,6 +165,7 @@ TEST_F(MemTableMemoryLimiterTest, handle_memtable_flush_test) { int32_t k3 = -2147483647; columns[2]->insert_data((const char*)&k3, sizeof(k3)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 6c320d225f5e44..209ab1139a406b 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -214,6 +214,7 @@ static void add_rowset(int64_t tablet_id, int32_t schema_hash, int64_t partition auto columns = block.mutate_columns(); int16_t c1 = value; columns[0]->insert_data((const char*)&c1, sizeof(c1)); + block.set_columns(std::move(columns)); Status res = delta_writer->write(&block, {0}); EXPECT_TRUE(res.ok()); diff --git a/be/test/runtime/stream_load_parquet_test.cpp b/be/test/runtime/stream_load_parquet_test.cpp index bf9a35c2a64111..62e280f1e80a64 100644 --- a/be/test/runtime/stream_load_parquet_test.cpp +++ b/be/test/runtime/stream_load_parquet_test.cpp @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +#include + +#include + #include "gtest/gtest.h" #include "load/load_path_mgr.h" #include "runtime/exec_env.h" @@ -27,18 +31,19 @@ class LoadPathMgrTest : public testing::Test { _exec_env = ExecEnv::GetInstance(); _load_path_mgr = std::make_unique(_exec_env); - // create tmp file - _test_dir = "/tmp/test_clean_file"; - _test_dir1 = "/tmp/test_clean_file/mini_download"; - _test_dir2 = "/tmp/test_clean_file1/mini_download/test.parquet"; - - auto result = io::global_local_filesystem()->delete_directory_or_file(_test_dir1); - result = io::global_local_filesystem()->create_directory(_test_dir1); - EXPECT_TRUE(result.ok()); + auto test_root = std::filesystem::temp_directory_path() / + ("doris_load_path_mgr_test_" + std::to_string(::getpid())); + _test_dir = test_root.string(); + _test_dir1 = _test_dir + "/mini_download"; + _test_dir2 = _test_dir1 + "/test.parquet"; - result = io::global_local_filesystem()->delete_directory_or_file(_test_dir2); - result = io::global_local_filesystem()->create_directory(_test_dir2); - EXPECT_TRUE(result.ok()); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir1, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir2, ec); + ASSERT_FALSE(ec) << ec.message(); const_cast&>(_exec_env->store_paths()).emplace_back(_test_dir, 1024); } @@ -46,6 +51,9 @@ class LoadPathMgrTest : public testing::Test { void TearDown() override { const_cast&>(_exec_env->store_paths()).clear(); _load_path_mgr->stop(); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + EXPECT_FALSE(ec) << ec.message(); _exec_env->destroy(); } @@ -96,4 +104,4 @@ TEST_F(LoadPathMgrTest, CheckDiskSpaceTest) { EXPECT_FALSE(exists); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/adaptive_thread_pool_controller_test.cpp b/be/test/storage/adaptive_thread_pool_controller_test.cpp index 06d79629330e14..6c4e42fea05897 100644 --- a/be/test/storage/adaptive_thread_pool_controller_test.cpp +++ b/be/test/storage/adaptive_thread_pool_controller_test.cpp @@ -19,6 +19,7 @@ #include +#include #include #include "common/config.h" @@ -44,15 +45,19 @@ class AdaptiveThreadPoolControllerTest : public testing::Test { void SetUp() override { _original_enable_adaptive = config::enable_adaptive_flush_threads; + int num_cpus = std::thread::hardware_concurrency(); + if (num_cpus <= 0) num_cpus = 1; + int max_threads = std::max(64, num_cpus * 4); + ASSERT_TRUE(ThreadPoolBuilder("TestPool") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool) .ok()); ASSERT_TRUE(ThreadPoolBuilder("TestPool2") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool2) .ok()); } diff --git a/be/test/storage/compaction/ordered_data_compaction_test.cpp b/be/test/storage/compaction/ordered_data_compaction_test.cpp index 006d48358c467e..712f74d1394fa5 100644 --- a/be/test/storage/compaction/ordered_data_compaction_test.cpp +++ b/be/test/storage/compaction/ordered_data_compaction_test.cpp @@ -318,6 +318,7 @@ class OrderedDataCompactionTest : public ::testing::Test { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); @@ -587,6 +588,7 @@ TEST_F(OrderedDataCompactionTest, test_index_disk_size) { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/compaction/segcompaction_mow_test.cpp b/be/test/storage/compaction/segcompaction_mow_test.cpp index 760a5d953aa693..13b836cd5a38d3 100644 --- a/be/test/storage/compaction/segcompaction_mow_test.cpp +++ b/be/test/storage/compaction/segcompaction_mow_test.cpp @@ -103,6 +103,14 @@ class SegCompactionMoWTest : public ::testing::TestWithParam { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -358,7 +366,7 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) { } } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -458,7 +466,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -486,7 +494,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -514,7 +522,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -542,7 +550,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -573,7 +581,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { } unique_keys.emplace(k1, rid); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -610,7 +618,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -688,7 +696,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -716,7 +724,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -744,7 +752,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -772,7 +780,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -800,7 +808,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -873,7 +881,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/segcompaction_test.cpp b/be/test/storage/compaction/segcompaction_test.cpp index 6c43fea684cb43..15dc86c89d74b2 100644 --- a/be/test/storage/compaction/segcompaction_test.cpp +++ b/be/test/storage/compaction/segcompaction_test.cpp @@ -124,6 +124,14 @@ class SegCompactionTest : public testing::Test { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -316,7 +324,7 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -437,7 +445,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -455,7 +463,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -473,7 +481,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -491,7 +499,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -509,7 +517,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -528,7 +536,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -591,7 +599,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -609,7 +617,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -627,7 +635,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -645,7 +653,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -663,7 +671,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -730,7 +738,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -754,7 +762,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -779,7 +787,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -804,7 +812,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -817,7 +825,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -830,7 +838,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -997,7 +1005,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1021,7 +1029,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1046,7 +1054,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1071,7 +1079,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1084,7 +1092,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1097,7 +1105,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/vertical_compaction_test.cpp b/be/test/storage/compaction/vertical_compaction_test.cpp index 3b736857242caf..d744d7aa67e185 100644 --- a/be/test/storage/compaction/vertical_compaction_test.cpp +++ b/be/test/storage/compaction/vertical_compaction_test.cpp @@ -107,6 +107,14 @@ class VerticalCompactionTest : public ::testing::Test { ExecEnv::GetInstance()->set_storage_engine(nullptr); } + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = block->mutate_columns(); + return st; + } + TabletSchemaSPtr create_schema(KeysType keys_type = DUP_KEYS, bool without_key = false) { TabletSchemaSPtr tablet_schema = std::make_shared(); TabletSchemaPB tablet_schema_pb; @@ -255,7 +263,7 @@ class VerticalCompactionTest : public ::testing::Test { } num_rows++; } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_TRUE(s.ok()); @@ -1146,7 +1154,7 @@ TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMergeWithNullableSparseColum columns[2]->insert_data((const char*)&delete_sign, sizeof(delete_sign)); } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); ASSERT_TRUE(s.ok()) << s; s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s; @@ -1311,7 +1319,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesAccuracy) { columns[0]->insert_data(reinterpret_cast(&int_val), sizeof(int_val)); columns[1]->insert_data(fixed_string.data(), fixed_string.size()); } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; @@ -1412,7 +1420,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesNullableSparse) { columns[1]->insert_default(); // ColumnNullable default is null } } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; diff --git a/be/test/storage/index/date_bloom_filter_test.cpp b/be/test/storage/index/date_bloom_filter_test.cpp index 636e7a6848d39b..261c49a92d6595 100644 --- a/be/test/storage/index/date_bloom_filter_test.cpp +++ b/be/test/storage/index/date_bloom_filter_test.cpp @@ -146,6 +146,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); Status st; st = rowset_writer->add_block(&block); @@ -240,6 +241,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/index_builder_test.cpp b/be/test/storage/index/index_builder_test.cpp index c281fd511477fb..96cc6839390e3a 100644 --- a/be/test/storage/index/index_builder_test.cpp +++ b/be/test/storage/index/index_builder_test.cpp @@ -257,6 +257,8 @@ TEST_F(IndexBuilderTest, DropInvertedIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -545,6 +547,8 @@ TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -874,6 +878,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1042,6 +1048,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1191,6 +1199,8 @@ TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1341,6 +1351,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1515,6 +1527,8 @@ TEST_F(IndexBuilderTest, RenameColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1669,6 +1683,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1841,6 +1857,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1991,6 +2009,8 @@ TEST_F(IndexBuilderTest, NonNullIndexDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2115,6 +2135,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2246,6 +2268,8 @@ TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2370,6 +2394,8 @@ TEST_F(IndexBuilderTest, ResourceCleanupTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2535,6 +2561,8 @@ TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { array_col.insert(Field::create_field(arr)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2631,6 +2659,8 @@ TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2789,6 +2819,8 @@ TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2909,6 +2941,8 @@ TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -3024,6 +3058,8 @@ TEST_F(IndexBuilderTest, DropOneIndexNotAffectOtherIndexesOnSameColumnTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); diff --git a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp index 1512212d6f24d0..c9856eeaa53ec1 100644 --- a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp +++ b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp @@ -154,6 +154,7 @@ TEST_F(IndexGcBinglogsTest, gc_binlogs_test) { Field v1 = Field::create_field("v1"); columns[0]->insert(key); columns[1]->insert(v1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index 93670029927b6e..c3c23265de154d 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -699,6 +699,8 @@ class IndexCompactionUtils { } } + block.set_columns(std::move(columns)); + Status st = rowset_writer->add_block(&block); EXPECT_TRUE(st.ok()) << st.to_string(); st = rowset_writer->flush(); @@ -760,4 +762,4 @@ class IndexCompactionUtils { } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/rowid_conversion_test.cpp b/be/test/storage/rowid_conversion_test.cpp index 3ec611ca430cf6..0d470d1c7a7e74 100644 --- a/be/test/storage/rowid_conversion_test.cpp +++ b/be/test/storage/rowid_conversion_test.cpp @@ -205,6 +205,7 @@ class TestRowIdConversion : public testing::TestWithParamadd_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/segment/segment_cache_test.cpp b/be/test/storage/segment/segment_cache_test.cpp index 82bfe8242411e2..04b395f0089c89 100644 --- a/be/test/storage/segment/segment_cache_test.cpp +++ b/be/test/storage/segment/segment_cache_test.cpp @@ -198,15 +198,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class SegmentCacheTest : public ::testing::Test { diff --git a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp index 0236ac98c9f7be..b9cad3c63b3eb7 100644 --- a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp +++ b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp @@ -188,6 +188,7 @@ class SegmentsKeyBoundsTruncationTest : public testing::Test { columns[1]->insert_data(reinterpret_cast(&const_value), sizeof(const_value)); } + block.set_columns(std::move(columns)); ret.emplace_back(std::move(block)); } return ret; diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 3de2feb4b33600..a557b0cfbcf3a4 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -82,6 +82,38 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id tablet_index->add_col_unique_id(col_unique_id); } +static void fill_nullable_variant_block(Block* block, + std::unordered_map* inserted_jsonstr, + variant_util::PathToNoneNullValues* path_with_size) { + MutableColumnPtr column = IColumn::mutate(block->get_by_position(0).column); + auto* nullable_object = assert_cast(column.get()); + for (int idx = 0; idx < 10; idx++) { + nullable_object->insert_default(); // insert null + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 80; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + nullable_object->insert_many_defaults(17); + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 2, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 2; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + } + block->replace_by_position(0, std::move(column)); +} + // MockColumnReaderCache class for testing class MockColumnReaderCache : public segment_v2::ColumnReaderCache { public: @@ -2649,28 +2681,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2802,28 +2815,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable_without_finalize) auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2897,28 +2891,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2992,28 +2967,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bf_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3089,28 +3045,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_zm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3186,28 +3123,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_inverted_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3734,7 +3652,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t n = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3745,8 +3663,8 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object12 = ColumnVariant::create(3, false); - MutableColumnPtr null_object12 = ColumnNullable::create( - new_column_object12->assume_mutable(), ColumnUInt8::create()); + MutableColumnPtr null_object12 = + ColumnNullable::create(std::move(new_column_object12), ColumnUInt8::create()); st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); st = nested_iter->next_batch(&n, null_object12, &has_null); @@ -3778,7 +3696,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object2 = ColumnVariant::create(3, false); MutableColumnPtr null_object2 = - ColumnNullable::create(new_column_object2->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object2), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter2->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3888,7 +3806,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter_nullable) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); diff --git a/be/test/storage/tablet/tablet_cooldown_test.cpp b/be/test/storage/tablet/tablet_cooldown_test.cpp index b919aa887834e7..acf16442537fbd 100644 --- a/be/test/storage/tablet/tablet_cooldown_test.cpp +++ b/be/test/storage/tablet/tablet_cooldown_test.cpp @@ -344,7 +344,7 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep columns[1]->insert_data((const char*)&c2, sizeof(c2)); int32_t c3 = 1; - columns[2]->insert_data((const char*)&c3, sizeof(c2)); + columns[2]->insert_data((const char*)&c3, sizeof(c3)); VecDateTimeValue c4; { @@ -354,8 +354,9 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep {"2020-07-16 19:39:43", 19}, c4, nullptr, p); } int64_t c4_int = c4.to_int64(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); + block.set_columns(std::move(columns)); st = delta_writer->write(&block, {0}); ASSERT_EQ(Status::OK(), st); } diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp index af106e3a26011c..6bebd7289182ec 100644 --- a/be/test/util/bit_util_test.cpp +++ b/be/test/util/bit_util_test.cpp @@ -63,14 +63,14 @@ TEST(BitUtil, BigEndianToHost) { void insert_true(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(1); + assert_cast(column->get_nested_column()).insert_value(1); column->push_false_to_nullmap(1); } } void insert_false(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(0); + assert_cast(column->get_nested_column()).insert_value(0); column->push_false_to_nullmap(1); } } @@ -102,16 +102,12 @@ TEST(BitUtil, CountZero) { insert_false(column.get(), 54); insert_true(column.get(), 1); insert_false(column.get(), 14); + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } @@ -131,16 +127,12 @@ TEST(BitUtil, CountZero) { } } } + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } }