From c1eeb5202011519d85e84ec1cc61994ad076d3ba Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Fri, 1 May 2026 10:05:04 +0800 Subject: [PATCH] [refactor](ann) Avoid copying ANN search results (#62924) Issue Number: close #xxx Related PR: #xxx Problem Summary: ANN TopN copied distance results into an intermediate vector before materializing the virtual column, and row id ownership was moved through several wrappers. This refactors ANN search result ownership to share the generated buffers directly and removes the redundant update_result helper. None - Test: Unit Test - ./run-be-ut.sh --run --filter=AnnTopNRuntimeEvaluateTopN:AnnTopNRuntimeEvaluate_DimensionMismatch:AnnIndexReaderTest.TestQueryWithoutLoadIndex:AnnIndexReaderTest.TestQueryIVFWithoutLoadIndex:VirtualColumnIteratorTest.* - Behavior changed: No - Does this need documentation: No --- be/src/storage/index/ann/ann_index_reader.cpp | 24 ++---- be/src/storage/index/ann/ann_index_reader.h | 3 - be/src/storage/index/ann/ann_search_params.h | 17 ++-- be/src/storage/index/ann/ann_topn_runtime.cpp | 8 +- be/src/storage/index/ann/ann_topn_runtime.h | 2 +- be/src/storage/index/ann/faiss_ann_index.cpp | 20 ++--- be/src/storage/segment/segment_iterator.cpp | 5 +- .../segment/virtual_column_iterator.cpp | 4 +- .../storage/segment/virtual_column_iterator.h | 4 +- .../index/ann/ann_index_reader_test.cpp | 36 -------- .../index/ann/ann_range_search_test.cpp | 85 +------------------ .../index/ann/ann_topn_descriptor_test.cpp | 8 +- .../ann/ann_topn_runtime_negative_test.cpp | 2 +- .../ann/virtual_column_iterator_test.cpp | 48 +++++------ 14 files changed, 67 insertions(+), 199 deletions(-) diff --git a/be/src/storage/index/ann/ann_index_reader.cpp b/be/src/storage/index/ann/ann_index_reader.cpp index 8844e8aaec9e8d..80850c65901281 100644 --- a/be/src/storage/index/ann/ann_index_reader.cpp +++ b/be/src/storage/index/ann/ann_index_reader.cpp @@ -35,20 +35,6 @@ #include "util/once.h" namespace doris::segment_v2 { -#include "common/compile_check_begin.h" -void AnnIndexReader::update_result(const IndexSearchResult& search_result, - std::vector& distance, roaring::Roaring& roaring) { - DCHECK(search_result.distances != nullptr); - DCHECK(search_result.roaring != nullptr); - size_t limit = search_result.roaring->cardinality(); - // Use search result to update distance and row_id - distance.resize(limit); - for (size_t i = 0; i < limit; ++i) { - distance[i] = search_result.distances[i]; - } - roaring = *search_result.roaring; -} - AnnIndexReader::AnnIndexReader(const TabletIndex* index_meta, std::shared_ptr index_file_reader) : _index_meta(*index_meta), _index_file_reader(index_file_reader) { @@ -176,12 +162,12 @@ Status AnnIndexReader::query(io::IOContext* io_ctx, AnnTopNParam* param, AnnInde DORIS_CHECK(index_search_result.roaring != nullptr); DORIS_CHECK(index_search_result.distances != nullptr); DORIS_CHECK(index_search_result.row_ids != nullptr); - param->distance = std::make_unique>(); { SCOPED_TIMER(&(stats->result_process_costs_ns)); - update_result(index_search_result, *param->distance, *param->roaring); + param->distance = index_search_result.distances; + *param->roaring = *index_search_result.roaring; } - param->row_ids = std::move(index_search_result.row_ids); + param->row_ids = index_search_result.row_ids; } double search_costs_ms = static_cast(stats->search_costs_ns.value()) / 1000.0; @@ -267,13 +253,13 @@ Status AnnIndexReader::range_search(const AnnRangeSearchParams& params, DCHECK(search_result.row_ids->size() == search_result.roaring->cardinality()) << "Row ids size: " << search_result.row_ids->size() << ", roaring size: " << search_result.roaring->cardinality(); - result->row_ids = std::move(search_result.row_ids); + result->row_ids = search_result.row_ids; } else { result->row_ids = nullptr; } if (search_result.distances != nullptr) { - result->distance = std::move(search_result.distances); + result->distance = search_result.distances; } else { result->distance = nullptr; } diff --git a/be/src/storage/index/ann/ann_index_reader.h b/be/src/storage/index/ann/ann_index_reader.h index 0f2d5839e72323..a170c962dcc958 100644 --- a/be/src/storage/index/ann/ann_index_reader.h +++ b/be/src/storage/index/ann/ann_index_reader.h @@ -42,9 +42,6 @@ class AnnIndexReader : public IndexReader { std::shared_ptr index_file_reader); ~AnnIndexReader() override = default; - static void update_result(const IndexSearchResult&, std::vector& distance, - roaring::Roaring& row_id); - Status load_index(io::IOContext* io_ctx); // Try to load index, return true if successful, false if failed diff --git a/be/src/storage/index/ann/ann_search_params.h b/be/src/storage/index/ann/ann_search_params.h index 4cccac296aa586..95d8d11dd835ea 100644 --- a/be/src/storage/index/ann/ann_search_params.h +++ b/be/src/storage/index/ann/ann_search_params.h @@ -33,8 +33,10 @@ #include #include +#include #include #include +#include #include "exec/scan/vector_search_user_params.h" #include "runtime/runtime_profile.h" @@ -116,8 +118,8 @@ struct AnnTopNParam { doris::VectorSearchUserParams _user_params; roaring::Roaring* roaring; size_t rows_of_segment = 0; - std::unique_ptr> distance = nullptr; - std::unique_ptr> row_ids = nullptr; + std::shared_ptr distance = nullptr; + std::shared_ptr> row_ids = nullptr; std::unique_ptr stats = nullptr; }; @@ -136,22 +138,23 @@ struct AnnRangeSearchParams { struct AnnRangeSearchResult { std::shared_ptr roaring; - std::unique_ptr> row_ids; - std::unique_ptr distance; + std::shared_ptr> row_ids; + std::shared_ptr distance; }; /* This struct is used to wrap the search result of a vector index. roaring is a bitmap that contains the row ids that satisfy the search condition. -row_ids is a vector of row ids that are returned by the search, it could be used by virtual_column_iterator to do column filter. +row_ids is an ordered vector of row ids returned by the search. row_ids[i] is aligned with +distances[i], so virtual_column_iterator can map each distance back to its segment row id. distances is a vector of distances that are returned by the search. For range search, is condition is not le_or_lt, the row_ids and distances will be nullptr. */ struct IndexSearchResult { IndexSearchResult() = default; - std::unique_ptr distances = nullptr; - std::unique_ptr> row_ids = nullptr; + std::shared_ptr distances = nullptr; + std::shared_ptr> row_ids = nullptr; std::shared_ptr roaring = nullptr; // Internal engine timings (ns) int64_t engine_search_ns = 0; // time spent in the underlying index search call diff --git a/be/src/storage/index/ann/ann_topn_runtime.cpp b/be/src/storage/index/ann/ann_topn_runtime.cpp index f07ff39cc80649..4ac4042395fed4 100644 --- a/be/src/storage/index/ann/ann_topn_runtime.cpp +++ b/be/src/storage/index/ann/ann_topn_runtime.cpp @@ -183,7 +183,7 @@ Status AnnTopNRuntime::prepare(RuntimeState* state, const RowDescriptor& row_des Status AnnTopNRuntime::evaluate_vector_ann_search(segment_v2::AnnIndexIterator* ann_index_iterator, roaring::Roaring* roaring, size_t rows_of_segment, IColumn::MutablePtr& result_column, - std::unique_ptr>& row_ids, + std::shared_ptr>& row_ids, segment_v2::AnnIndexStats& ann_index_stats) { DCHECK(ann_index_iterator != nullptr); DCHECK(_order_by_expr_ctx != nullptr); @@ -220,13 +220,13 @@ Status AnnTopNRuntime::evaluate_vector_ann_search(segment_v2::AnnIndexIterator* DCHECK(ann_query_params.distance != nullptr); DCHECK(ann_query_params.row_ids != nullptr); - size_t num_results = ann_query_params.distance->size(); + size_t num_results = ann_query_params.row_ids->size(); auto result_column_float = ColumnFloat32::create(num_results); for (size_t i = 0; i < num_results; ++i) { - result_column_float->get_data()[i] = (*ann_query_params.distance)[i]; + result_column_float->get_data()[i] = ann_query_params.distance[i]; } result_column = std::move(result_column_float); - row_ids = std::move(ann_query_params.row_ids); + row_ids = ann_query_params.row_ids; ann_index_stats = *ann_query_params.stats; return Status::OK(); } diff --git a/be/src/storage/index/ann/ann_topn_runtime.h b/be/src/storage/index/ann/ann_topn_runtime.h index 8715225bad8c76..63e04cc30b6256 100644 --- a/be/src/storage/index/ann/ann_topn_runtime.h +++ b/be/src/storage/index/ann/ann_topn_runtime.h @@ -118,7 +118,7 @@ class AnnTopNRuntime { Status evaluate_vector_ann_search(segment_v2::AnnIndexIterator* ann_index_iterator, roaring::Roaring* row_bitmap, size_t rows_of_segment, IColumn::MutablePtr& result_column, - std::unique_ptr>& row_ids, + std::shared_ptr>& row_ids, segment_v2::AnnIndexStats& ann_index_stats); /** diff --git a/be/src/storage/index/ann/faiss_ann_index.cpp b/be/src/storage/index/ann/faiss_ann_index.cpp index 1072a2ea2c5be2..b07447b33b9515 100644 --- a/be/src/storage/index/ann/faiss_ann_index.cpp +++ b/be/src/storage/index/ann/faiss_ann_index.cpp @@ -726,8 +726,8 @@ doris::Status FaissVectorIndex::ann_topn_search(const float* query_vec, int k, result.roaring = std::make_shared(); update_roaring(labels, k, *result.roaring); size_t roaring_cardinality = result.roaring->cardinality(); - result.distances = std::make_unique(roaring_cardinality); - result.row_ids = std::make_unique>(); + result.distances = std::shared_ptr(new float[roaring_cardinality]); + result.row_ids = std::make_shared>(); result.row_ids->resize(roaring_cardinality); if (_metric == AnnIndexMetric::L2) { @@ -837,17 +837,17 @@ doris::Status FaissVectorIndex::range_search(const float* query_vec, const float size_t begin = native_search_result.lims[0]; size_t end = native_search_result.lims[1]; - auto row_ids = std::make_unique>(); + auto row_ids = std::make_shared>(); row_ids->resize(end - begin); if (params.is_le_or_lt) { if (_metric == AnnIndexMetric::L2) { - std::unique_ptr distances_ptr; + std::shared_ptr distances_ptr; float* distances = nullptr; auto roaring = std::make_shared(); { // Engine convert: build roaring, row_ids, distances from FAISS result SCOPED_RAW_TIMER(&result.engine_convert_ns); - distances_ptr = std::make_unique(end - begin); + distances_ptr = std::shared_ptr(new float[end - begin]); distances = distances_ptr.get(); // The distance returned by Faiss is actually the squared distance. // So we need to take the square root of the squared distance. @@ -857,8 +857,8 @@ doris::Status FaissVectorIndex::range_search(const float* query_vec, const float distances[i - begin] = sqrt(native_search_result.distances[i]); } } - result.distances = std::move(distances_ptr); - result.row_ids = std::move(row_ids); + result.distances = distances_ptr; + result.row_ids = row_ids; result.roaring = roaring; DCHECK(result.row_ids->size() == result.roaring->cardinality()) @@ -908,7 +908,7 @@ doris::Status FaissVectorIndex::range_search(const float* query_vec, const float // For inner product, we can use the distance directly. // range search on ip gets all vectors with inner product greater than or equal to the radius. // when query condition is not le_or_lt, we can use the roaring and distance directly. - std::unique_ptr distances_ptr = std::make_unique(end - begin); + std::shared_ptr distances_ptr(new float[end - begin]); float* distances = distances_ptr.get(); auto roaring = std::make_shared(); // The distance returned by Faiss is actually the squared distance. @@ -918,8 +918,8 @@ doris::Status FaissVectorIndex::range_search(const float* query_vec, const float roaring->add(cast_set(native_search_result.labels[i])); distances[i - begin] = native_search_result.distances[i]; } - result.distances = std::move(distances_ptr); - result.row_ids = std::move(row_ids); + result.distances = distances_ptr; + result.row_ids = row_ids; result.roaring = roaring; DCHECK(result.row_ids->size() == result.roaring->cardinality()) diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 5de85f07f7aaf4..f7d0107db88322 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -918,7 +918,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { return Status::OK(); } IColumn::MutablePtr result_column; - std::unique_ptr> result_row_ids; + std::shared_ptr> result_row_ids; segment_v2::AnnIndexStats ann_index_stats; // Try to load ANN index before search @@ -976,8 +976,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() { "Virtual column iterator, column_idx {}, is materialized with {} rows", dst_col_idx, result_row_ids->size()); // reference count of result_column should be 1, so move will not issue any data copy. - virtual_column_iter->prepare_materialization(std::move(result_column), - std::move(result_row_ids)); + virtual_column_iter->prepare_materialization(std::move(result_column), result_row_ids); _need_read_data_indices[src_cid] = false; VLOG_DEBUG << fmt::format( diff --git a/be/src/storage/segment/virtual_column_iterator.cpp b/be/src/storage/segment/virtual_column_iterator.cpp index ee9e833f0b7ac7..3c905705addbb3 100644 --- a/be/src/storage/segment/virtual_column_iterator.cpp +++ b/be/src/storage/segment/virtual_column_iterator.cpp @@ -37,7 +37,7 @@ Status VirtualColumnIterator::init(const ColumnIteratorOptions& opts) { } void VirtualColumnIterator::prepare_materialization(IColumn::Ptr column, - std::unique_ptr> labels) { + std::shared_ptr> labels) { DCHECK(labels->size() == column->size()) << "labels size: " << labels->size() << ", materialized column size: " << column->size(); // 1. do sort to labels @@ -165,4 +165,4 @@ Status VirtualColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t return Status::OK(); } -} // namespace doris::segment_v2 \ No newline at end of file +} // namespace doris::segment_v2 diff --git a/be/src/storage/segment/virtual_column_iterator.h b/be/src/storage/segment/virtual_column_iterator.h index d24ed2614f9238..28a7b5bbf77a4b 100644 --- a/be/src/storage/segment/virtual_column_iterator.h +++ b/be/src/storage/segment/virtual_column_iterator.h @@ -34,7 +34,7 @@ class VirtualColumnIterator : public ColumnIterator { ~VirtualColumnIterator() override = default; MOCK_FUNCTION void prepare_materialization(IColumn::Ptr column, - std::unique_ptr> labels); + std::shared_ptr> labels); Status init(const ColumnIteratorOptions& opts) override; @@ -61,4 +61,4 @@ class VirtualColumnIterator : public ColumnIterator { ordinal_t _current_ordinal = 0; }; -} // namespace doris::segment_v2 \ No newline at end of file +} // namespace doris::segment_v2 diff --git a/be/test/storage/index/ann/ann_index_reader_test.cpp b/be/test/storage/index/ann/ann_index_reader_test.cpp index b620557934da72..b193f355c2ac61 100644 --- a/be/test/storage/index/ann/ann_index_reader_test.cpp +++ b/be/test/storage/index/ann/ann_index_reader_test.cpp @@ -384,42 +384,6 @@ TEST_F(AnnIndexReaderTest, TestRangeSearchIVFWithoutLoadIndex) { } } -TEST_F(AnnIndexReaderTest, TestUpdateResultStatic) { - // Test the static update_result method - segment_v2::IndexSearchResult search_result; - - // Set up test data - auto roaring = std::make_shared(); - roaring->add(10); - roaring->add(20); - roaring->add(30); - - size_t num_results = 3; - auto distances = std::make_unique(num_results); - distances[0] = 1.5f; - distances[1] = 2.3f; - distances[2] = 3.1f; - - search_result.roaring = roaring; - search_result.distances = std::move(distances); - - // Call update_result - std::vector distance_vec; - roaring::Roaring result_roaring; - - segment_v2::AnnIndexReader::update_result(search_result, distance_vec, result_roaring); - - // Verify results - EXPECT_EQ(distance_vec.size(), num_results); - EXPECT_FLOAT_EQ(distance_vec[0], 1.5f); - EXPECT_FLOAT_EQ(distance_vec[1], 2.3f); - EXPECT_FLOAT_EQ(distance_vec[2], 3.1f); - EXPECT_EQ(result_roaring.cardinality(), num_results); - EXPECT_TRUE(result_roaring.contains(10)); - EXPECT_TRUE(result_roaring.contains(20)); - EXPECT_TRUE(result_roaring.contains(30)); -} - TEST_F(AnnIndexReaderTest, TestRangeSearchWithDifferentParameters) { auto reader = std::make_unique(_tablet_index.get(), _mock_index_file_reader); diff --git a/be/test/storage/index/ann/ann_range_search_test.cpp b/be/test/storage/index/ann/ann_range_search_test.cpp index cc8d5cf7475969..0cbf9ee6e9ed60 100644 --- a/be/test/storage/index/ann/ann_range_search_test.cpp +++ b/be/test/storage/index/ann/ann_range_search_test.cpp @@ -251,12 +251,12 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch2) { doris::segment_v2::AnnIndexStats* stats) { size_t num_results = 10; result->roaring = std::make_shared(); - result->row_ids = std::make_unique>(); + result->row_ids = std::make_shared>(); for (size_t i = 0; i < num_results; ++i) { result->roaring->add(i * 10); result->row_ids->push_back(i * 10); } - result->distance = std::make_unique(10); + result->distance = std::shared_ptr(new float[10]); return Status::OK(); })); @@ -431,44 +431,6 @@ TEST_F(VectorSearchTest, TestAnnIndexIteratorSuccessCases) { EXPECT_TRUE(status.ok()); } -TEST_F(VectorSearchTest, TestAnnIndexReaderUpdateResult) { - // Test AnnIndexReader::update_result method - std::map properties; - properties["index_type"] = "hnsw"; - properties["metric_type"] = "l2_distance"; - auto pair = vector_search_utils::create_tmp_ann_index_reader(properties); - auto reader = pair.second; - - // Create mock IndexSearchResult - doris::segment_v2::IndexSearchResult search_result; - search_result.roaring = std::make_shared(); - search_result.roaring->add(1); - search_result.roaring->add(5); - search_result.roaring->add(10); - - // Create distance array - size_t num_results = 3; - search_result.distances = std::make_unique(num_results); - search_result.distances[0] = 1.5f; - search_result.distances[1] = 2.3f; - search_result.distances[2] = 4.1f; - - // Call update_result - std::vector distance_vector; - roaring::Roaring result_roaring; - reader->update_result(search_result, distance_vector, result_roaring); - - // Verify results - EXPECT_EQ(distance_vector.size(), 3); - EXPECT_FLOAT_EQ(distance_vector[0], 1.5f); - EXPECT_FLOAT_EQ(distance_vector[1], 2.3f); - EXPECT_FLOAT_EQ(distance_vector[2], 4.1f); - EXPECT_EQ(result_roaring.cardinality(), 3); - EXPECT_TRUE(result_roaring.contains(1)); - EXPECT_TRUE(result_roaring.contains(5)); - EXPECT_TRUE(result_roaring.contains(10)); -} - TEST_F(VectorSearchTest, TestAnnIndexReaderNewIterator) { // Test AnnIndexReader::new_iterator method std::map properties; @@ -635,49 +597,6 @@ TEST_F(VectorSearchTest, TestAnnIndexReaderConstructor) { EXPECT_EQ(ip_reader->get_metric_type(), doris::segment_v2::AnnIndexMetric::IP); } -TEST_F(VectorSearchTest, TestAnnIndexReader_UpdateResult) { - // Test AnnIndexReader::update_result method - std::map properties; - properties["index_type"] = "hnsw"; - properties["metric_type"] = "l2_distance"; - auto pair = vector_search_utils::create_tmp_ann_index_reader(properties); - auto reader = pair.second; - - // Create a search result to test update_result - doris::segment_v2::IndexSearchResult search_result; - - // Set up test data - size_t num_results = 3; - auto roaring = std::make_shared(); - roaring->add(10); - roaring->add(20); - roaring->add(30); - - auto distances = std::make_unique(num_results); - distances[0] = 1.5f; - distances[1] = 2.3f; - distances[2] = 3.1f; - - search_result.roaring = roaring; - search_result.distances = std::move(distances); - - // Test update_result method - std::vector distance_vec; - roaring::Roaring result_roaring; - - reader->update_result(search_result, distance_vec, result_roaring); - - // Verify results - EXPECT_EQ(distance_vec.size(), num_results); - EXPECT_FLOAT_EQ(distance_vec[0], 1.5f); - EXPECT_FLOAT_EQ(distance_vec[1], 2.3f); - EXPECT_FLOAT_EQ(distance_vec[2], 3.1f); - EXPECT_EQ(result_roaring.cardinality(), num_results); - EXPECT_TRUE(result_roaring.contains(10)); - EXPECT_TRUE(result_roaring.contains(20)); - EXPECT_TRUE(result_roaring.contains(30)); -} - TEST_F(VectorSearchTest, TestAnnIndexReader_NewIterator) { // Test new_iterator method std::map properties; diff --git a/be/test/storage/index/ann/ann_topn_descriptor_test.cpp b/be/test/storage/index/ann/ann_topn_descriptor_test.cpp index de6dc623e105f9..2cb9f293ee583b 100644 --- a/be/test/storage/index/ann/ann_topn_descriptor_test.cpp +++ b/be/test/storage/index/ann/ann_topn_descriptor_test.cpp @@ -157,17 +157,17 @@ TEST_F(VectorSearchTest, AnnTopNRuntimeEvaluateTopN) { .Times(1) .WillOnce(testing::Invoke([](const segment_v2::IndexParam& value) { auto* ann_param = std::get(value); - ann_param->distance = std::make_unique>(); - ann_param->row_ids = std::make_unique>(); + ann_param->distance = std::shared_ptr(new float[10]); + ann_param->row_ids = std::make_shared>(); for (size_t i = 0; i < 10; ++i) { - ann_param->distance->push_back(static_cast(i)); + ann_param->distance[i] = static_cast(i); ann_param->row_ids->push_back(i); } return Status::OK(); })); _result_column = ColumnFloat32::create(0, 0); - std::unique_ptr> row_ids = std::make_unique>(); + std::shared_ptr> row_ids = std::make_shared>(); roaring::Roaring roaring; doris::segment_v2::AnnIndexStats ann_index_stats; diff --git a/be/test/storage/index/ann/ann_topn_runtime_negative_test.cpp b/be/test/storage/index/ann/ann_topn_runtime_negative_test.cpp index dd84c0f22ff0a3..91d03ea1c8e4be 100644 --- a/be/test/storage/index/ann/ann_topn_runtime_negative_test.cpp +++ b/be/test/storage/index/ann/ann_topn_runtime_negative_test.cpp @@ -134,7 +134,7 @@ TEST_F(VectorSearchTest, AnnTopNRuntimeEvaluate_DimensionMismatch) { roaring::Roaring bitmap; IColumn::MutablePtr result_col = ColumnFloat32::create(0); - std::unique_ptr> row_ids; + std::shared_ptr> row_ids; doris::segment_v2::AnnIndexStats stats; Status st = runtime->evaluate_vector_ann_search(_ann_index_iterator.get(), &bitmap, 10, result_col, row_ids, stats); diff --git a/be/test/storage/index/ann/virtual_column_iterator_test.cpp b/be/test/storage/index/ann/virtual_column_iterator_test.cpp index cee2624374845b..22e8c7b18e9ed1 100644 --- a/be/test/storage/index/ann/virtual_column_iterator_test.cpp +++ b/be/test/storage/index/ann/virtual_column_iterator_test.cpp @@ -60,14 +60,14 @@ TEST_F(VirtualColumnIteratorTest, ReadByRowIdsint32_tColumn) { // Create a materialized int32_t column with values [10, 20, 30, 40, 50] auto int_column = ColumnVector::create(); - std::unique_ptr> labels = std::make_unique>(); + std::shared_ptr> labels = std::make_shared>(); for (int i = 0; i < 5; i++) { int_column->insert_value(10 * (i + 1)); labels->push_back(i); } // Set the materialized column - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Create destination column MutableColumnPtr dst = ColumnVector::create(); @@ -98,13 +98,13 @@ TEST_F(VirtualColumnIteratorTest, ReadByRowIdsStringColumn) { string_column->insert_value("cherry"); string_column->insert_value("date"); string_column->insert_value("elderberry"); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (int i = 0; i < 5; i++) { labels->push_back(i); } // Set the materialized column - iterator.prepare_materialization(std::move(string_column), std::move(labels)); + iterator.prepare_materialization(std::move(string_column), labels); // Create destination column MutableColumnPtr dst = ColumnString::create(); @@ -129,14 +129,14 @@ TEST_F(VirtualColumnIteratorTest, ReadByRowIdsEmptyRowIds) { // Create a materialized int32_t column with values [10, 20, 30, 40, 50] auto int_column = ColumnVector::create(); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (int i = 0; i < 5; i++) { int_column->insert_value(10 * (i + 1)); labels->push_back(i); } // Set the materialized column - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Create destination column MutableColumnPtr dst = ColumnVector::create(); @@ -159,7 +159,7 @@ TEST_F(VirtualColumnIteratorTest, TestLargeRowset) { // Create a large materialized int32_t column (1000 values) auto int_column = ColumnVector::create(); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (int i = 0; i < 1000; i++) { int_column->insert_value(i); @@ -167,7 +167,7 @@ TEST_F(VirtualColumnIteratorTest, TestLargeRowset) { } // Set the materialized column - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Create destination column MutableColumnPtr dst = ColumnVector::create(); @@ -193,7 +193,7 @@ TEST_F(VirtualColumnIteratorTest, TestLargeRowset) { TEST_F(VirtualColumnIteratorTest, ReadByRowIdsNoContinueRowIds) { // Create a column with 1000 values (0-999) auto column = ColumnVector::create(); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); // Generate non-consecutive row IDs by multiplying by 2 (0,2,4,...) for (size_t i = 0; i < 1000; i++) { @@ -202,7 +202,7 @@ TEST_F(VirtualColumnIteratorTest, ReadByRowIdsNoContinueRowIds) { } VirtualColumnIterator iterator; - iterator.prepare_materialization(std::move(column), std::move(labels)); + iterator.prepare_materialization(std::move(column), labels); // Verify row_id_to_idx mapping is correct for (size_t i = 0; i < 1000; i++) { @@ -291,12 +291,12 @@ TEST_F(VirtualColumnIteratorTest, NextBatchTest1) { // Construct an int32 column with 100 rows, values from 0 to 99 auto int_column = ColumnVector::create(); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (int i = 0; i < 100; ++i) { int_column->insert_value(i); labels->push_back(i); } - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // 1. Seek to row 10, next_batch reads 10 rows { @@ -364,14 +364,14 @@ TEST_F(VirtualColumnIteratorTest, TestPrepare1) { int_column->insert_value(30); int_column->insert_value(40); int_column->insert_value(50); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); labels->push_back(100); labels->push_back(11); labels->push_back(33); labels->push_back(22); labels->push_back(55); // Set the materialized column - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Verify row_id_to_idx mapping const auto& row_id_to_idx = iterator.get_row_id_to_idx(); @@ -401,14 +401,14 @@ TEST_F(VirtualColumnIteratorTest, TestColumnNothing) { int_column->insert_value(30); int_column->insert_value(40); int_column->insert_value(50); - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); labels->push_back(100); labels->push_back(11); labels->push_back(33); labels->push_back(22); labels->push_back(55); // Set the materialized column - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Create destination column MutableColumnPtr dst = ColumnNothing::create(0); @@ -445,12 +445,12 @@ TEST_F(VirtualColumnIteratorTest, SeekAndNextBatchCombination) { int_column->insert_value(val); } - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (uint64_t id : global_row_ids) { labels->push_back(id); } - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Row IDs are already in order: [0, 1, 2, 3] // Corresponding values: [100, 200, 300, 400] @@ -512,12 +512,12 @@ TEST_F(VirtualColumnIteratorTest, ReadByRowidsComprehensive) { int_column->insert_value(val); } - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (uint64_t id : global_row_ids) { labels->push_back(id); } - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // After sorting by global_row_id: [25, 50, 100, 200] // Corresponding original values: [4000, 2000, 1000, 3000] @@ -560,12 +560,12 @@ TEST_F(VirtualColumnIteratorTest, MixedOperationsCombination) { int_column->insert_value(val); } - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (uint64_t id : global_row_ids) { labels->push_back(id); } - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Row IDs are consecutive: [0, 1, 2, 3], values [10, 20, 30, 40] // _row_id_to_idx: {0->0, 1->1, 2->2, 3->3} @@ -610,12 +610,12 @@ TEST_F(VirtualColumnIteratorTest, DstColumnNothingHandling) { int_column->insert_value(val); } - auto labels = std::make_unique>(); + auto labels = std::make_shared>(); for (uint64_t id : global_row_ids) { labels->push_back(id); } - iterator.prepare_materialization(std::move(int_column), std::move(labels)); + iterator.prepare_materialization(std::move(int_column), labels); // Row IDs are consecutive: [0, 1, 2] -> [100, 200, 300]