diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index 03fea7b8393..5cb62909ae1 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -85,6 +85,7 @@ M(StoragePoolUniPS) \ M(RegionPersisterRunMode) \ M(S3Requests) \ + M(S3RandomAccessFile) \ M(GlobalStorageRunMode) \ M(GlobalThread) \ M(GlobalThreadActive) \ diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 884a0c51acf..01bad5fac97 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -139,7 +139,9 @@ M(S3GetObjectRetry) \ M(S3PutObjectRetry) \ M(S3IORead) \ + M(S3IOReadError) \ M(S3IOSeek) \ + M(S3IOSeekError) \ M(S3IOSeekBackward) \ M(FileCacheHit) \ M(FileCacheMiss) \ diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 8dd02574fd6..fbfaf903c28 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -224,6 +224,22 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_uni_page_ids, {"type", "uni_page_ids"}), \ F(type_versioned_entries, {"type", "versioned_entries"})) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ + M(tiflash_storage_place_index_count, \ + "Total number of place index operations", \ + Counter, \ + F(type_reuse, {"type", "reuse"}), \ + F(type_placed, {"type", "placed"}), \ + F(type_placed_fully_indexed, {"type", "placed_fully_indexed"}), \ + F(type_placed_fully_saved, {"type", "placed_fully_saved"})) \ + M(tiflash_storage_place_index_stats_count, \ + "Bucketed histogram of number of rows/deletes of index placement operations", \ + Histogram, \ + F(type_rows_newly_placed, {{"type", "rows_newly_placed"}}, ExpBuckets{1000, 2, 10}), \ + F(type_deletes_newly_placed, {{"type", "deletes_newly_placed"}}, ExpBucketsWithRange{1, 2, 100}), \ + F(type_rows_after_placed, {{"type", "rows_after_placed"}}, ExpBuckets{1000, 2, 10}), \ + F(type_deletes_after_placed, {{"type", "deletes_after_placed"}}, ExpBucketsWithRange{1, 2, 100}), \ + F(type_rows_reuse_placed, {{"type", "rows_reuse_placed"}}, ExpBuckets{1000, 2, 10}), \ + F(type_deletes_reuse_placed, {{"type", "deletes_reuse_placed"}}, ExpBucketsWithRange{1, 2, 100})) \ M(tiflash_storage_command_count, \ "Total number of storage's command, such as delete range / shutdown /startup", \ Counter, \ @@ -246,18 +262,18 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_place_index_update, {"type", "place_index_update"})) \ M(tiflash_storage_subtask_duration_seconds, \ "Bucketed histogram of storage's sub task duration", \ - Histogram, \ - F(type_delta_merge_bg, {{"type", "delta_merge_bg"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_delta_merge_manual, {{"type", "delta_merge_manual"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_seg_split_bg, {{"type", "seg_split_bg"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_seg_split_ingest, {{"type", "seg_split_ingest"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_seg_merge_bg_gc, {{"type", "seg_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.001, 2, 20})) \ + Histogram, /* increase the bucket from 10ms to 87 minutes */ \ + F(type_delta_merge_bg, {{"type", "delta_merge_bg"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_delta_merge_manual, {{"type", "delta_merge_manual"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_seg_split_bg, {{"type", "seg_split_bg"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_seg_split_ingest, {{"type", "seg_split_ingest"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_seg_merge_bg_gc, {{"type", "seg_merge_bg_gc"}}, ExpBuckets{0.010, 2, 20}), \ + F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.010, 2, 20})) \ M(tiflash_storage_subtask_throughput_bytes, \ "Calculate the throughput of (maybe foreground) tasks of storage in bytes", \ Counter, /**/ \ @@ -677,12 +693,17 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva M(tiflash_storage_read_thread_gauge, \ "The gauge of storage read thread", \ Gauge, \ + F(type_read_task_pool, {"type", "read_task_pool"}), \ + F(type_read_task, {"type", "read_task"}), \ + F(type_read_task_active, {"type", "read_task_active"}), \ F(type_merged_task, {"type", "merged_task"}), \ - F(type_merged_task_active, {"type", "merged_task_active"})) \ + F(type_merged_task_units, /* num of merged task segments */ {"type", "merged_task_units"}), \ + F(type_merged_task_active, /* num of merged task actively reading by SegmentReader */ \ + {"type", "merged_task_active"})) \ M(tiflash_storage_read_thread_seconds, \ "Bucketed histogram of read thread", \ - Histogram, \ - F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \ + Histogram, /* increase the bucket from 10ms to 87 minutes */ \ + F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.010, 2, 20})) \ M(tiflash_mpp_task_manager, \ "The gauge of mpp task manager", \ Gauge, \ @@ -767,7 +788,8 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_list_objects, {{"type", "list_objects"}}, ExpBuckets{0.001, 2, 20}), \ F(type_delete_object, {{"type", "delete_object"}}, ExpBuckets{0.001, 2, 20}), \ F(type_head_object, {{"type", "head_object"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_read_stream, {{"type", "read_stream"}}, ExpBuckets{0.0001, 2, 20})) \ + F(type_read_stream, {{"type", "read_stream"}}, ExpBuckets{0.0001, 2, 20}), \ + F(type_read_stream_err, {{"type", "read_stream_err"}}, ExpBuckets{0.0001, 2, 20})) \ M(tiflash_storage_s3_http_request_seconds, \ "S3 request duration breakdown in seconds", \ Histogram, \ diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index e63760aa411..2857c67d546 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB::DM { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 14e3d688da3..f72876149f6 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 9ae28e93107..61408939a3f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace DB::DM { diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h index 1c68922dc15..ca4b6f41a73 100644 --- a/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h +++ b/dbms/src/Storages/DeltaMerge/ReadThread/MergedTask.h @@ -79,10 +79,12 @@ class MergedTask { passive_merged_segments.fetch_add(units.size() - 1, std::memory_order_relaxed); GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task).Increment(); + GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task_units).Increment(units.size()); } ~MergedTask() { passive_merged_segments.fetch_sub(units.size() - 1, std::memory_order_relaxed); + GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task_units).Decrement(units.size()); GET_METRIC(tiflash_storage_read_thread_gauge, type_merged_task).Decrement(); GET_METRIC(tiflash_storage_read_thread_seconds, type_merged_task).Observe(sw.elapsedSeconds()); } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 8ba58192771..46041df77ac 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -2666,6 +2666,7 @@ Segment::ReadInfo Segment::getReadInfo( bool ok = segment_snap->delta->getSharedDeltaIndex()->updateIfAdvanced(*my_delta_index); if (ok) { + GET_METRIC(tiflash_storage_place_index_count, type_placed_fully_saved).Increment(); LOG_DEBUG( segment_snap->log, "Segment updated delta index, my_delta_index={} {}", @@ -2796,6 +2797,9 @@ std::pair Segment::ensurePlace( start_ts)) { // We can reuse the shared-delta-index + GET_METRIC(tiflash_storage_place_index_count, type_reuse).Increment(); + GET_METRIC(tiflash_storage_place_index_stats_count, type_rows_reuse_placed).Observe(my_placed_rows); + GET_METRIC(tiflash_storage_place_index_stats_count, type_deletes_reuse_placed).Observe(my_placed_deletes); return {my_delta_index, false}; } @@ -2815,6 +2819,8 @@ std::pair Segment::ensurePlace( delta_snap->getDeletes()); bool fully_indexed = true; + size_t new_placed_rows = 0; + size_t new_placed_deletes = 0; for (auto & v : items) { if (v.isBlock()) @@ -2851,6 +2857,7 @@ std::pair Segment::ensurePlace( relevant_place); my_placed_rows += rows; + new_placed_rows += rows; } else { @@ -2874,6 +2881,7 @@ std::pair Segment::ensurePlace( relevant_place); ++my_placed_deletes; + ++new_placed_deletes; } } @@ -2887,11 +2895,25 @@ std::pair Segment::ensurePlace( my_delta_index->update(my_delta_tree, my_placed_rows, my_placed_deletes); + GET_METRIC(tiflash_storage_place_index_count, type_placed).Increment(); + GET_METRIC(tiflash_storage_place_index_stats_count, type_rows_newly_placed).Observe(new_placed_rows); + GET_METRIC(tiflash_storage_place_index_stats_count, type_deletes_newly_placed).Observe(new_placed_deletes); + GET_METRIC(tiflash_storage_place_index_stats_count, type_rows_after_placed).Observe(my_placed_rows); + GET_METRIC(tiflash_storage_place_index_stats_count, type_deletes_after_placed).Observe(my_placed_deletes); + if (fully_indexed) + GET_METRIC(tiflash_storage_place_index_count, type_placed_fully_indexed).Increment(); LOG_DEBUG( segment_snap->log, - "Finish segment ensurePlace, read_ranges={} placed_items={} shared_delta_index={} my_delta_index={} {}", + "Finish segment ensurePlace, read_ranges={} placed_items={} " + "new_placed_rows={} new_placed_deletes={} my_placed_rows={} my_placed_deletes={} fully_indexed={} " + "shared_delta_index={} my_delta_index={} {}", read_ranges, items.size(), + new_placed_rows, + new_placed_deletes, + my_placed_rows, + my_placed_deletes, + fully_indexed, delta_snap->getSharedDeltaIndex()->toString(), my_delta_index->toString(), simpleInfo()); diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp index 5dc1402f033..188f88acc99 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp @@ -56,7 +56,8 @@ SegmentReadTask::SegmentReadTask( , dm_context(dm_context_) , ranges(ranges_) { - CurrentMetrics::add(CurrentMetrics::DT_SegmentReadTasks); + CurrentMetrics::add(CurrentMetrics::DT_SegmentReadTasks); // keep for compatibility. + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task).Increment(); } SegmentReadTask::SegmentReadTask( @@ -74,7 +75,8 @@ SegmentReadTask::SegmentReadTask( size_t establish_disagg_task_resp_size) : store_id(store_id_) { - CurrentMetrics::add(CurrentMetrics::DT_SegmentReadTasks); + CurrentMetrics::add(CurrentMetrics::DT_SegmentReadTasks); // keep for compatibility. + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task).Increment(); auto tracing_id = fmt::format( "{} segment_id={} epoch={} delta_epoch={}", log->identifier(), @@ -181,7 +183,8 @@ SegmentReadTask::SegmentReadTask( SegmentReadTask::~SegmentReadTask() { - CurrentMetrics::sub(CurrentMetrics::DT_SegmentReadTasks); + CurrentMetrics::sub(CurrentMetrics::DT_SegmentReadTasks); // keep for compatibility. + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task).Decrement(); } void SegmentReadTask::addRange(const RowKeyRange & range) diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp index 983dd9f8df3..57fcce557bd 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp @@ -25,6 +25,10 @@ namespace DB::FailPoints extern const char pause_when_reading_from_dt_stream[]; } // namespace DB::FailPoints +namespace CurrentMetrics +{ +extern const Metric DT_SegmentReadTasks; +} // namespace CurrentMetrics namespace DB::DM { SegmentReadTasksWrapper::SegmentReadTasksWrapper(bool enable_read_thread_, SegmentReadTasks && ordered_tasks_) @@ -151,6 +155,7 @@ SegmentReadTaskPool::SegmentReadTaskPool( , keyspace_id(keyspace_id_) , res_group_name(res_group_name_) { + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task_pool).Increment(); if (tasks_wrapper.empty()) { q.finish(); @@ -159,6 +164,7 @@ SegmentReadTaskPool::SegmentReadTaskPool( SegmentReadTaskPool::~SegmentReadTaskPool() { + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task_pool).Decrement(); auto [pop_times, pop_empty_times, peak_blocks_in_queue] = q.getStat(); auto pop_empty_ratio = pop_times > 0 ? pop_empty_times * 1.0 / pop_times : 0.0; auto total_count = blk_stat.totalCount(); @@ -198,7 +204,8 @@ void SegmentReadTaskPool::finishSegment(const SegmentReadTaskPtr & seg) active_segment_ids.erase(seg->getGlobalSegmentID()); pool_finished = active_segment_ids.empty() && tasks_wrapper.empty(); } - LOG_DEBUG(log, "finishSegment pool_id={} segment={} pool_finished={}", pool_id, seg, pool_finished); + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task_active).Decrement(); + LOG_INFO(log, "finishSegment pool_id={} segment={} pool_finished={}", pool_id, seg, pool_finished); if (pool_finished) { q.finish(); @@ -217,8 +224,14 @@ SegmentReadTaskPtr SegmentReadTaskPool::getTask(const GlobalSegmentID & seg_id) std::lock_guard lock(mutex); auto t = tasks_wrapper.getTask(seg_id); RUNTIME_CHECK(t != nullptr, pool_id, seg_id); + auto no_task_left = tasks_wrapper.empty(); active_segment_ids.insert(seg_id); + GET_METRIC(tiflash_storage_read_thread_gauge, type_read_task_active).Increment(); peak_active_segments = std::max(peak_active_segments, active_segment_ids.size()); + if (no_task_left) + { + LOG_INFO(log, "pool_id={} all tasks scheduled, active_segment_size={}", pool_id, active_segment_ids.size()); + } return t; } diff --git a/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.cpp b/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.cpp index 8161387c408..5817d2820dd 100644 --- a/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.cpp +++ b/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.cpp @@ -358,6 +358,83 @@ HttpRequestRes HandleHttpRequestRemoteGC( return buildOkResp(api_name, std::move(body)); } +std::tuple, String> parseStoreIds(std::string_view path) +{ + std::vector store_ids; + String err_msg; + + if (path.empty() || path == "/") + { + // empty store_ids means all store ids + return {store_ids, err_msg}; + } + + if (path[0] == '/') + path.remove_prefix(1); // remove leading '/' + + std::vector parts; + boost::split(parts, path, boost::is_any_of(",")); + for (const auto & part : parts) + { + try + { + StoreID store_id = std::stoull(part); + store_ids.push_back(store_id); + } + catch (...) + { + err_msg = fmt::format("invalid store_id in request: {}", path); + return {std::vector{}, err_msg}; + } + } + return {store_ids, err_msg}; +} + +HttpRequestRes HandleHttpRequestRemoteInfo( + EngineStoreServerWrap * server, + std::string_view path, + const std::string & api_name, + std::string_view, + std::string_view) +{ + auto & global_ctx = server->tmt->getContext(); + if (auto err_resp = allowDisaggAPI(global_ctx, api_name, DisaggregatedMode::Storage, "can not get remote info"); + err_resp) + { + return err_resp.value(); + } + + auto [store_ids, err_msg] = parseStoreIds(path.substr(api_name.size())); + if (!err_msg.empty()) + { + auto body = fmt::format(R"json({{"message":"{}"}})json", err_msg); + return buildRespWithCode(HttpRequestStatus::BadRequest, api_name, std::move(body)); + } + + const auto & gc_mgr = server->tmt->getS3GCManager(); + if (!gc_mgr) + { + auto body = fmt::format(R"json({{"message":"S3 GC Manager is not enabled"}})json"); + return buildRespWithCode(HttpRequestStatus::InternalError, api_name, std::move(body)); + } + + auto log = Logger::get("HandleHttpRequestRemoteInfo"); + try + { + auto details = gc_mgr->getS3StorageSummary(store_ids); + std::stringstream ss; + details.toJson()->stringify(ss); + auto json_str = ss.str(); + return buildOkResp(api_name, std::move(json_str)); + } + catch (...) + { + auto body = fmt::format(R"json({{"message":"Exception occurred when getting remote info"}})json"); + tryLogCurrentWarningException(log); + return buildRespWithCode(HttpRequestStatus::InternalError, api_name, std::move(body)); + } +} + // Parse Http query string_view into a map HttpQueryMap parseHttpQueryMap(std::string_view query) { @@ -758,6 +835,7 @@ static const std::map AVAILABLE_HTTP_URI = {"/tiflash/remote/owner/resign", HandleHttpRequestRemoteOwnerResign}, {"/tiflash/remote/gc", HandleHttpRequestRemoteGC}, {"/tiflash/remote/upload", HandleHttpRequestRemoteReUpload}, + {"/tiflash/remote/info", HandleHttpRequestRemoteInfo}, {"/tiflash/remote/cache/evict", HandleHttpRequestRemoteCacheEvict}, {"/tiflash/remote/cache/info", HandleHttpRequestRemoteCacheInfo}, }; diff --git a/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.h b/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.h index 75c26b54688..72c00a846dc 100644 --- a/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.h +++ b/dbms/src/Storages/KVStore/FFI/ProxyFFIStatusService.h @@ -40,6 +40,8 @@ struct RemoteCacheEvictRequest RemoteCacheEvictRequest parseEvictRequest(std::string_view path, std::string_view api_name, std::string_view query); +std::tuple, String> parseStoreIds(std::string_view path); + } // namespace DB template <> diff --git a/dbms/src/Storages/S3/CheckpointManifestS3Set.h b/dbms/src/Storages/S3/CheckpointManifestS3Set.h index 7dd8662a5e1..6032e66195d 100644 --- a/dbms/src/Storages/S3/CheckpointManifestS3Set.h +++ b/dbms/src/Storages/S3/CheckpointManifestS3Set.h @@ -40,6 +40,7 @@ class CheckpointManifestS3Set static CheckpointManifestS3Set create(const std::vector & manifest_keys); + ALWAYS_INLINE size_t size() const { return manifests.size(); } ALWAYS_INLINE bool empty() const { return manifests.empty(); } UInt64 latestUploadSequence() const diff --git a/dbms/src/Storages/S3/S3Common.cpp b/dbms/src/Storages/S3/S3Common.cpp index 7867f41ac40..f04c595a13d 100644 --- a/dbms/src/Storages/S3/S3Common.cpp +++ b/dbms/src/Storages/S3/S3Common.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/S3/S3Common.h b/dbms/src/Storages/S3/S3Common.h index 3125a06ba6c..456d8c002c2 100644 --- a/dbms/src/Storages/S3/S3Common.h +++ b/dbms/src/Storages/S3/S3Common.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/S3/S3GCManager.cpp b/dbms/src/Storages/S3/S3GCManager.cpp index af90add0998..d66b15040e3 100644 --- a/dbms/src/Storages/S3/S3GCManager.cpp +++ b/dbms/src/Storages/S3/S3GCManager.cpp @@ -55,6 +55,40 @@ extern const int TIMEOUT_EXCEEDED; namespace DB::S3 { +Poco::JSON::Object::Ptr S3StoreStorageSummary::toJson() const +{ + Poco::JSON::Object::Ptr obj = new Poco::JSON::Object(); + obj->set("store_id", store_id); + obj->set("num_manifests", manifests.size()); + obj->set("num_keys", num_keys); + + Poco::JSON::Object::Ptr data_file_obj = new Poco::JSON::Object(); + data_file_obj->set("num", data_file.num); + data_file_obj->set("num_delmark", data_file.num_delmark); + data_file_obj->set("bytes", data_file.bytes); + obj->set("data_file", data_file_obj); + + Poco::JSON::Object::Ptr dt_file_obj = new Poco::JSON::Object(); + dt_file_obj->set("num", dt_file.num); + dt_file_obj->set("num_keys", dt_file.num_keys); + dt_file_obj->set("num_delmark", dt_file.num_delmark); + dt_file_obj->set("bytes", dt_file.bytes); + obj->set("dt_file", dt_file_obj); + + // never return a nullptr + return obj; +} + +Poco::JSON::Object::Ptr S3StorageSummary::toJson() const +{ + Poco::JSON::Object::Ptr obj = new Poco::JSON::Object(); + Poco::JSON::Array::Ptr stores_arr = new Poco::JSON::Array(); + for (const auto & store_details : stores) + stores_arr->add(store_details.toJson()); + obj->set("stores", stores_arr); + return obj; +} + S3GCManager::S3GCManager( pingcap::pd::ClientPtr pd_client_, OwnerManagerPtr gc_owner_manager_, @@ -776,6 +810,131 @@ void S3GCManager::removeOutdatedManifest( } } +namespace details +{ +String parseDTFileKeyFromDataSubpath(std::string_view data_subpath) +{ + // data_subpath=ks_1_t_333/dmf_664135/8.size.dat + // parse the last dmfile key part by removing the "/" suffix + auto pos = data_subpath.find_last_of('/'); + if (pos == String::npos) + return ""; + return String(data_subpath.substr(0, pos)); +} +} // namespace details + +S3StorageSummary S3GCManager::getS3StorageSummary(std::vector store_ids) +{ + S3StorageSummary summary; + // if no store_id specified, get all store_ids with data stored on S3 + if (store_ids.empty()) + store_ids = getAllStoreIds(); + + LOG_INFO(log, "getS3StorageSummary run on store_ids={}", store_ids); + + for (const auto store_id : store_ids) + summary.stores.emplace_back(getStoreStorageSummary(store_id)); + return summary; +} + +S3StoreStorageSummary S3GCManager::getStoreStorageSummary(StoreID store_id) +{ + auto client = S3::ClientFactory::instance().sharedTiFlashClient(); + + Stopwatch watch; + S3StoreStorageSummary summary{ + .store_id = store_id, + .manifests = CheckpointManifestS3Set::getFromS3(*client, store_id), + }; + + const auto prefix = S3Filename::fromStoreId(store_id).toDataPrefix(); + + // TODO: collect the locks belong to the store_id + // collect the CheckpointDataFile and StableFiles belong to the store_id + + double last_elapsed = 0.0; + constexpr double log_interval_seconds = 30.0; + size_t num_processed_keys = 0; + String last_dtfile_key; + size_t num_dtfile_keys_for_last_dtfile = 0; + S3::listPrefix(*client, prefix, [&](const Aws::S3::Model::Object & object) { + const auto & key = object.GetKey(); + const auto view = S3FilenameView::fromKey(key); + if (watch.elapsedSeconds() - last_elapsed > log_interval_seconds) + { + last_elapsed = watch.elapsedSeconds(); + LOG_INFO( + log, + "getS3StorageSummary processing, processed_keys={} current_key={} details={}", + num_processed_keys, + key, + summary); + } + + LOG_DEBUG( + log, + "getS3StorageSummary store_id={} key={} type={} isDMFile={} data_subpath={} processed_keys={}", + store_id, + object.GetKey(), + magic_enum::enum_name(view.type), + view.isDMFile(), + view.data_subpath, + num_processed_keys); + + if (view.isDataFile()) + { + if (view.isDMFile()) + { + // key=s273/data/ks_1_t_333/dmf_664135/8.size.dat + // view.data_subpath=ks_1_t_333/dmf_664135/8.size.dat + auto curr_dtfile_key = details::parseDTFileKeyFromDataSubpath(view.data_subpath); + if (curr_dtfile_key.empty()) + { + // log warning and ignore the parsed curr_dtfile_key + LOG_WARNING( + log, + "getS3StorageSummary failed to parse dtfile_key, store_id={} key={} data_subpath={}", + store_id, + key, + view.data_subpath); + } + else if (last_dtfile_key != curr_dtfile_key) + { + summary.dt_file.num += 1; + LOG_DEBUG( + log, + "getS3StorageSummary meet new dtfile_key={} last_dtfile_key={} num_keys_for_last_dtfile={} " + "store_id={}", + curr_dtfile_key, + last_dtfile_key, + num_dtfile_keys_for_last_dtfile, + store_id); + last_dtfile_key = curr_dtfile_key; + num_dtfile_keys_for_last_dtfile = 0; + } + num_dtfile_keys_for_last_dtfile += 1; + summary.dt_file.num_keys += 1; + summary.dt_file.bytes += object.GetSize(); + } + else + { + summary.data_file.num += 1; + summary.data_file.bytes += object.GetSize(); + } + } + else if (view.isDelMark()) + { + auto datafile_view = view.asDataFile(); + datafile_view.isDMFile() ? summary.dt_file.num_delmark += 1 : summary.data_file.num_delmark += 1; + } + num_processed_keys += 1; + return PageResult{.num_keys = 1, .more = true}; + }); + summary.num_keys = num_processed_keys; + LOG_INFO(log, "getS3StorageSummary finish, elapsed={:.3f}s summary={}", watch.elapsedSeconds(), summary); + return summary; +} + /// Service /// S3GCManagerService::S3GCManagerService( @@ -830,4 +989,13 @@ void S3GCManagerService::wake() const } } +S3StorageSummary S3GCManagerService::getS3StorageSummary(std::vector store_ids) +{ + if (manager) + { + return manager->getS3StorageSummary(store_ids); + } + return S3StorageSummary{}; +} + } // namespace DB::S3 diff --git a/dbms/src/Storages/S3/S3GCManager.h b/dbms/src/Storages/S3/S3GCManager.h index d12861acca9..045bb80556e 100644 --- a/dbms/src/Storages/S3/S3GCManager.h +++ b/dbms/src/Storages/S3/S3GCManager.h @@ -22,6 +22,12 @@ #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + #include #include @@ -93,6 +99,37 @@ struct GcStats double duration_scan_then_clean_data_files = 0.0; }; + +struct S3StoreStorageSummary +{ + StoreID store_id = InvalidStoreID; + CheckpointManifestS3Set manifests; + struct DataFile + { + size_t num = 0; + size_t num_delmark = 0; + size_t bytes = 0; + } data_file; + struct DTFile + { + size_t num = 0; + size_t num_keys = 0; + size_t num_delmark = 0; + size_t bytes = 0; + } dt_file; + + size_t num_keys = 0; + + Poco::JSON::Object::Ptr toJson() const; +}; + +struct S3StorageSummary +{ + std::vector stores; + + Poco::JSON::Object::Ptr toJson() const; +}; + class S3GCManager { public: @@ -109,6 +146,9 @@ class S3GCManager void shutdown() { shutdown_called = true; } + S3StoreStorageSummary getStoreStorageSummary(StoreID store_id); + S3StorageSummary getS3StorageSummary(std::vector store_ids); + // private: void runForStore(UInt64 gc_store_id, LoggerPtr slogger); @@ -188,6 +228,8 @@ class S3GCManagerService void wake() const; + S3StorageSummary getS3StorageSummary(std::vector store_ids); + private: Context & global_ctx; std::unique_ptr manager; @@ -215,3 +257,30 @@ struct fmt::formatter stat.duration_scan_then_clean_data_files); } }; + +template <> +struct fmt::formatter +{ + static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } + + template + auto format(const DB::S3::S3StoreStorageSummary & details, FormatContext & ctx) const + { + return fmt::format_to( + ctx.out(), + "{{store_id={} num_manifests={} num_keys={} " + "data_file={{num={} num_delmark={} bytes={}}} " + "dt_file={{num={} num_keys={} num_delmark={} bytes={}}} " + "}}", + details.store_id, + details.manifests.size(), + details.num_keys, + details.data_file.num, + details.data_file.num_delmark, + details.data_file.bytes, + details.dt_file.num, + details.dt_file.num_keys, + details.dt_file.num_delmark, + details.dt_file.bytes); + } +}; diff --git a/dbms/src/Storages/S3/S3RandomAccessFile.cpp b/dbms/src/Storages/S3/S3RandomAccessFile.cpp index 97a3f3a88cd..e9bfa3f8208 100644 --- a/dbms/src/Storages/S3/S3RandomAccessFile.cpp +++ b/dbms/src/Storages/S3/S3RandomAccessFile.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -33,13 +34,19 @@ #include #include +namespace CurrentMetrics +{ +extern const Metric S3RandomAccessFile; +} namespace ProfileEvents { extern const Event S3GetObject; extern const Event S3ReadBytes; extern const Event S3GetObjectRetry; extern const Event S3IORead; +extern const Event S3IOReadError; extern const Event S3IOSeek; +extern const Event S3IOSeekError; extern const Event S3IOSeekBackward; } // namespace ProfileEvents namespace DB::FailPoints @@ -67,6 +74,12 @@ S3RandomAccessFile::S3RandomAccessFile( { RUNTIME_CHECK(client_ptr != nullptr); initialize("init file"); + CurrentMetrics::add(CurrentMetrics::S3RandomAccessFile); +} + +S3RandomAccessFile::~S3RandomAccessFile() +{ + CurrentMetrics::sub(CurrentMetrics::S3RandomAccessFile); } std::string S3RandomAccessFile::getFileName() const @@ -122,11 +135,14 @@ ssize_t S3RandomAccessFile::readImpl(char * buf, size_t size) // It's just a double check for more safety. if (gcount < size && (!istr.eof() || cur_offset + gcount != static_cast(content_length))) { + ProfileEvents::increment(ProfileEvents::S3IOReadError); auto state = istr.rdstate(); + auto elapsed_secs = sw.elapsedSeconds(); + GET_METRIC(tiflash_storage_s3_request_seconds, type_read_stream_err).Observe(elapsed_secs); LOG_WARNING( log, - "Cannot read from istream, size={} gcount={} state=0x{:02X} cur_offset={} content_length={} errno={} " - "errmsg={} cost={}ns", + "Cannot read from istream, size={} gcount={} state=0x{:02X} cur_offset={} content_length={} " + "errno={} errmsg={} cost={:.6f}s", size, gcount, state, @@ -134,9 +150,10 @@ ssize_t S3RandomAccessFile::readImpl(char * buf, size_t size) content_length, errno, strerror(errno), - sw.elapsed()); + elapsed_secs); return (state & std::ios_base::failbit || state & std::ios_base::badbit) ? S3StreamError : S3UnknownError; } + auto elapsed_secs = sw.elapsedSeconds(); if (scan_context) { @@ -206,16 +223,20 @@ off_t S3RandomAccessFile::seekImpl(off_t offset_, int whence) auto & istr = read_result.GetBody(); if (!istr.ignore(offset_ - cur_offset)) { + ProfileEvents::increment(ProfileEvents::S3IOSeekError); auto state = istr.rdstate(); + auto elapsed_secs = sw.elapsedSeconds(); + GET_METRIC(tiflash_storage_s3_request_seconds, type_read_stream_err).Observe(elapsed_secs); LOG_WARNING( log, - "Cannot ignore from istream, state=0x{:02X}, errno={} errmsg={} cost={}ns", + "Cannot ignore from istream, state=0x{:02X}, errno={} errmsg={} cost={:.6f}s", state, errno, strerror(errno), - sw.elapsed()); + elapsed_secs); return (state & std::ios_base::failbit || state & std::ios_base::badbit) ? S3StreamError : S3UnknownError; } + auto elapsed_secs = sw.elapsedSeconds(); if (scan_context) { diff --git a/dbms/src/Storages/S3/S3RandomAccessFile.h b/dbms/src/Storages/S3/S3RandomAccessFile.h index 6817912d2c2..e700c7491dd 100644 --- a/dbms/src/Storages/S3/S3RandomAccessFile.h +++ b/dbms/src/Storages/S3/S3RandomAccessFile.h @@ -50,6 +50,8 @@ class S3RandomAccessFile final : public RandomAccessFile const String & remote_fname_, const DM::ScanContextPtr & scan_context_); + ~S3RandomAccessFile() override; + // Can only seek forward. [[nodiscard]] off_t seek(off_t offset, int whence) override; diff --git a/dbms/src/Storages/S3/tests/gtest_s3gcmanager.cpp b/dbms/src/Storages/S3/tests/gtest_s3gcmanager.cpp index 32e9cb8457f..b8f2b135459 100644 --- a/dbms/src/Storages/S3/tests/gtest_s3gcmanager.cpp +++ b/dbms/src/Storages/S3/tests/gtest_s3gcmanager.cpp @@ -447,6 +447,52 @@ try } CATCH +TEST_F(S3GCManagerTest, GetStorageSummary) +{ + StoreID store_id = 20; + auto prefix = S3Filename::fromStoreId(store_id).toDataPrefix(); + + // prepare some empty files for scanning + { + for (auto seq : {2, 3}) + { + auto m = S3Filename::newCheckpointManifest(store_id, seq).toFullKey(); + uploadEmptyFile(*mock_s3_client, m); + } + + for (auto idx : {1, 2, 3}) + { + auto fname = S3Filename::newCheckpointData(store_id, 1, idx); + uploadEmptyFile(*mock_s3_client, fname.toFullKey()); + if (idx == 1 || idx == 2) + uploadEmptyFile(*mock_s3_client, fname.toView().getDelMarkKey()); + } + + for (auto file_id : std::vector{10, 11, 12}) + { + auto fname = S3Filename::fromDMFileOID( // + DMFileOID{ + .store_id = store_id, + .table_id = 1000, + .file_id = file_id, + }); + auto dmf = fname.toFullKey(); + uploadEmptyFile(*mock_s3_client, dmf + "/meta"); + uploadEmptyFile(*mock_s3_client, dmf + "/8.size.dat"); + if (file_id == 10 || file_id == 11) + uploadEmptyFile(*mock_s3_client, fname.toView().getDelMarkKey()); + } + } + + auto details = gc_mgr->getStoreStorageSummary(store_id); + ASSERT_EQ(details.manifests.size(), 2); + ASSERT_EQ(details.data_file.num, 3); + ASSERT_EQ(details.data_file.num_delmark, 2); + ASSERT_EQ(details.dt_file.num, 3); + ASSERT_EQ(details.dt_file.num_keys, 2 * 3); // each dmfile has 2 objects + ASSERT_EQ(details.dt_file.num_delmark, 2); +} + TEST_F(S3GCManagerTest, RemoveLockOfDMFile) try { diff --git a/docs/tiflash_http_api.md b/docs/tiflash_http_api.md index 5b643ce388b..a618f6e4f29 100644 --- a/docs/tiflash_http_api.md +++ b/docs/tiflash_http_api.md @@ -113,7 +113,7 @@ curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/debug/pprof/memory_status" Get the current status of TiFlash ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/store-status" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/store-status" ``` ### Response @@ -124,8 +124,8 @@ Return a string represent the current status of TiFlash. The returned result is ## TiFlash replica syncing status ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/sync-status/${table_id}" -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/sync-status/keyspace/${keyspace_id}/table/${table_id}" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/sync-status/${table_id}" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/sync-status/keyspace/${keyspace_id}/table/${table_id}" ``` ### Parameters @@ -147,7 +147,7 @@ curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/sync-status/keyspace/${ ## TiFlash write node remote gc owner info under disaggregated arch ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/owner/info" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/owner/info" ``` ### Response @@ -170,7 +170,7 @@ curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/owner/info" ## Resign the TiFlash write node remote gc owner under disaggregated arch ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/owner/resign" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/owner/resign" ``` ### Response @@ -187,10 +187,30 @@ curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/owner/resign" } ``` +## Fetch the remote storage summary from TiFlash write node + +```bash +# Fetch the remote storage summary of all TiFlash write node store_ids +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/info" +# Fetch the remote storage summary of given TiFlash write node store_ids. +# Multiple store_ids are separated by "," +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/info/store1_id,store2_id" +``` + +### Response +```json +{ + "stores":[ + {"data_file":{"bytes":131509,"num":391,"num_delmark":311},"dt_file":{"bytes":598122428067,"num":7648,"num_delmark":2705,"num_keys":73148},"num_keys":76555,"num_manifests":90,"store_id":272}, + {"data_file":{"bytes":129603,"num":385,"num_delmark":311},"dt_file":{"bytes":597606637064,"num":7690,"num_delmark":2700,"num_keys":73317},"num_keys":76713,"num_manifests":83,"store_id":273} + ] +} +``` + ## Execute TiFlash write node remote gc under disaggregated arch ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/gc" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/gc" ``` ### Response @@ -214,7 +234,7 @@ curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/gc" ## Execute TiFlash write node upload under disaggregated arch ```bash -curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tifash/remote/upload" +curl "http://${TIFLASH_IP}:${TIFLASH_STATUS_PORT}/tiflash/remote/upload" ``` ### Response diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 146cea5eb5a..40d3c22f52f 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1757395110033, + "iteration": 1766901450632, "links": [], "panels": [ { @@ -3096,7 +3096,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\", instance=~\"$tiflash_role\"}[1m]))", + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\", instance=~\"$tiflash_role\", instance=~\"$proxy_instance\"}[1m]))", "format": "time_series", "hide": false, "instant": false, @@ -3108,7 +3108,7 @@ }, { "exemplar": true, - "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\", instance=~\"$tiflash_role\"})", + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\", instance=~\"$tiflash_role\", instance=~\"$proxy_instance\"})", "hide": false, "interval": "", "intervalFactor": 2, @@ -8149,6 +8149,7 @@ "alignAsTable": true, "avg": false, "current": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -8178,16 +8179,6 @@ "stack": false, "steppedLine": false, "targets": [ - { - "exemplar": true, - "expr": "tiflash_system_asynchronous_metric_MaxDTDeltaOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max_snapshot_lifetime-{{instance}}", - "refId": "K" - }, { "exemplar": true, "expr": "tiflash_system_current_metric_DT_SegmentReadTasks{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", @@ -8199,93 +8190,13 @@ }, { "exemplar": true, - "expr": "tiflash_system_current_metric_PSMVCCSnapshotsList{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "expr": "tiflash_system_asynchronous_metric_MaxDTDeltaOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", "format": "time_series", - "hide": true, + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "snapshot_list-{{instance}}", - "refId": "A" - }, - { - "expr": "tiflash_system_current_metric_PSMVCCNumSnapshots{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "heatmap", - "hide": true, - "intervalFactor": 1, - "legendFormat": "num_snapshot-{{instance}}", - "refId": "B" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfRead{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "read-{{instance}}", - "refId": "C" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfReadRaw{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "read_raw-{{instance}}", - "refId": "D" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfDeltaMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "delta_merge-{{instance}}", - "refId": "E" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfDeltaCompact{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "delta_compact-{{instance}}", - "refId": "J" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfSegmentMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "seg_merge-{{instance}}", - "refId": "F" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfSegmentSplit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "seg_split-{{instance}}", - "refId": "G" - }, - { - "expr": "tiflash_system_current_metric_DT_SnapshotOfPlaceIndex{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "place_index-{{instance}}", - "refId": "H" - }, - { - "expr": "tiflash_system_asynchronous_metric_MaxDTStableOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "max_snapshot_lifetime_stable-{{instance}}", - "refId": "L" - }, - { - "expr": "tiflash_system_asynchronous_metric_MaxDTMetaOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "max_snapshot_lifetime_meta-{{instance}}", - "refId": "M" + "legendFormat": "max_snapshot_lifetime-{{instance}}", + "refId": "N" } ], "thresholds": [], @@ -9282,7 +9193,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9459,13 +9370,29 @@ "targets": [ { "exemplar": false, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval]))) by (le,type) / 1000000000)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval]))) by (le,type, $additional_groupby) / 1000000000)", "format": "time_series", - "hide": false, + "hide": true, "interval": "", "intervalFactor": 2, - "legendFormat": "max-{{type}}", + "legendFormat": "max-{{type}} {{additional_groupby}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le, type, $additional_groupby))", + "hide": false, + "interval": "", + "legendFormat": "9999-{{type}} {{additional_groupby}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le, type, $additional_groupby))", + "hide": false, + "interval": "", + "legendFormat": "99-{{type}} {{additional_groupby}}", + "refId": "C" } ], "thresholds": [], @@ -9666,13 +9593,29 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval]))) by (le,type) / 1000000000)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval]))) by (le,type, $additional_groupby) / 1000000000)", "format": "time_series", - "hide": false, + "hide": true, "interval": "", "intervalFactor": 2, - "legendFormat": "max-{{type}}", + "legendFormat": "max-{{type}} {{additional_groupby}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le, type, $additional_groupby))", + "hide": false, + "interval": "", + "legendFormat": "9999-{{type}} {{additional_groupby}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le, type, $additional_groupby))", + "hide": false, + "interval": "", + "legendFormat": "99-{{type}} {{additional_groupby}}", + "refId": "C" } ], "thresholds": [], @@ -11019,6 +10962,7 @@ "alignAsTable": true, "avg": false, "current": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -11138,22 +11082,6 @@ "intervalFactor": 1, "legendFormat": "max_snapshot_lifetime-{{instance}}", "refId": "K" - }, - { - "expr": "tiflash_system_asynchronous_metric_MaxDTStableOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "max_snapshot_lifetime_stable-{{instance}}", - "refId": "L" - }, - { - "expr": "tiflash_system_asynchronous_metric_MaxDTMetaOldestSnapshotLifetime{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "max_snapshot_lifetime_meta-{{instance}}", - "refId": "M" } ], "thresholds": [], @@ -11163,7 +11091,7 @@ "title": "Read Snapshots", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11267,6 +11195,7 @@ "exemplar": true, "expr": "histogram_quantile(0.95, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", "format": "time_series", + "hide": true, "interval": "", "intervalFactor": 1, "legendFormat": "95-{{type}}", @@ -11276,6 +11205,7 @@ "exemplar": true, "expr": "histogram_quantile(0.80, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", "format": "time_series", + "hide": true, "interval": "", "intervalFactor": 1, "legendFormat": "80-{{type}}", @@ -11445,7 +11375,7 @@ "fillGradient": 0, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, "y": 26 }, @@ -11561,7 +11491,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Errors of DeltaIndex", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -11570,16 +11500,17 @@ "fillGradient": 0, "gridPos": { "h": 8, - "w": 12, - "x": 12, + "w": 8, + "x": 8, "y": 26 }, "hiddenSeries": false, - "id": 237, + "id": 361, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -11599,19 +11530,24 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/cache_hit_ratio/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_DTDeltaIndexError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (instance)", + "expr": "sum by (type,$additional_groupby) (tiflash_storage_read_thread_gauge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"})", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "DeltaIndexError-{{instance}}", + "intervalFactor": 2, + "legendFormat": "{{type}} {{$additional_groupby}}", "refId": "A" } ], @@ -11619,10 +11555,10 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DeltaIndexError", + "title": "Segment MergedTask", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11636,7 +11572,7 @@ "yaxes": [ { "decimals": null, - "format": "cps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -11644,12 +11580,12 @@ "show": true }, { - "format": "opm", + "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": "0", - "show": false + "show": true } ], "yaxis": { @@ -11671,16 +11607,17 @@ "fillGradient": 0, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 34 + "w": 8, + "x": 16, + "y": 26 }, "hiddenSeries": false, - "id": 301, + "id": 362, "legend": { "alignAsTable": true, "avg": true, "current": false, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -11707,49 +11644,38 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_read_thread_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type,$additional_groupby))", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "999-{{type}}", + "legendFormat": "999-{{type}} {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_read_thread_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type,$additional_groupby))", "format": "time_series", - "hide": true, "interval": "", "intervalFactor": 1, - "legendFormat": "99-{{type}}", + "legendFormat": "99-{{type}} {{$additional_groupby}}", "refId": "B" }, { "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_read_thread_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type,$additional_groupby))", "format": "time_series", "hide": true, "interval": "", "intervalFactor": 1, - "legendFormat": "95-{{type}}", + "legendFormat": "80-{{type}} {{$additional_groupby}}", "refId": "C" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "80-{{type}}", - "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "VersionChain", + "title": "Segment MergedTask Duration", "tooltip": { "shared": true, "sort": 2, @@ -11765,7 +11691,7 @@ }, "yaxes": [ { - "format": "ms", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -11785,56 +11711,35 @@ "align": false, "alignLevel": null } - } - ], - "title": "Storage Read Pool & Data Sharing", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 119, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The disk usage of PageStorage instances in each TiFlash node", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 34 }, "hiddenSeries": false, - "id": 128, + "id": 301, "legend": { "alignAsTable": true, - "avg": false, - "current": true, + "avg": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -11850,65 +11755,57 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/^valid_rate/", - "yaxis": 2 - }, - { - "alias": "/size/", - "linewidth": 3 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "hide": false, + "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "format": "time_series", "interval": "", - "intervalFactor": 2, - "legendFormat": "blob_disk_size-{{instance}}", + "intervalFactor": 1, + "legendFormat": "999-{{type}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", - "hide": false, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "blob_valid_size-{{instance}}", + "intervalFactor": 1, + "legendFormat": "99-{{type}}", "refId": "B" }, { "exemplar": true, - "expr": "sum((tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) / (tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"})) by (instance)", - "hide": false, + "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, "interval": "", - "legendFormat": "blob_valid_rate-{{instance}}", + "intervalFactor": 1, + "legendFormat": "95-{{type}}", "refId": "C" }, { "exemplar": true, - "expr": "tiflash_system_asynchronous_metric_LogDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_version_chain_ms_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", "format": "time_series", - "hide": false, + "hide": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "log_size-{{instance}}", - "refId": "E", - "step": 10 + "intervalFactor": 1, + "legendFormat": "80-{{type}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PageStorage Disk Usage", + "title": "VersionChain", "tooltip": { - "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -11923,7 +11820,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -11931,12 +11828,12 @@ "show": true }, { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": "1.1", - "min": "0", - "show": true + "max": null, + "min": null, + "show": false } ], "yaxis": { @@ -11950,34 +11847,29 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The number of files of PageStorage instances in each TiFlash node", - "editable": true, - "error": false, + "description": "Errors of DeltaIndex", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 0, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 34 }, "hiddenSeries": false, - "id": 129, + "id": 237, "legend": { "alignAsTable": true, "avg": false, - "current": true, - "max": false, + "current": false, + "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -12000,33 +11892,23 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_DTDeltaIndexError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (instance)", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "blob_file-{{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(tiflash_system_asynchronous_metric_LogNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "log_file-{{instance}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "DeltaIndexError-{{instance}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PageStorage File Num", + "title": "DeltaIndexError", "tooltip": { - "msResolution": false, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -12039,7 +11921,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "cps", "label": null, "logBase": 1, "max": null, @@ -12047,117 +11930,68 @@ "show": true }, { - "format": "percentunit", + "format": "opm", "label": null, "logBase": 1, - "max": "1.1", + "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { "align": false, "alignLevel": null } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 20 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 133, - "legend": { - "show": true - }, - "pluginVersion": "6.1.6", - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(delta(tiflash_storage_page_write_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"v3\"}[1m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "PageStorage WriteBatch Size", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "bytes", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - }, + } + ], + "title": "Storage Read Pool & Data Sharing", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 119, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The disk usage of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 20 + "x": 0, + "y": 43 }, "hiddenSeries": false, - "id": 158, + "id": 128, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, @@ -12173,62 +12007,65 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/^valid_rate/", + "yaxis": 2 + }, + { + "alias": "/size/", + "linewidth": 3 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type) / 1000000000)", - "format": "time_series", - "hide": true, + "expr": "tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}-max", + "intervalFactor": 2, + "legendFormat": "blob_disk_size-{{instance}}", "refId": "A" }, { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", - "format": "time_series", + "expr": "sum(tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}-999", + "intervalFactor": 2, + "legendFormat": "blob_valid_size-{{instance}}", "refId": "B" }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", - "hide": true, + "expr": "sum((tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) / (tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"})) by (instance)", + "hide": false, "interval": "", - "legendFormat": "{{type}}-99", + "legendFormat": "blob_valid_rate-{{instance}}", "refId": "C" }, { "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", - "hide": true, - "interval": "", - "legendFormat": "{{type}}-95", - "refId": "D" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", - "hide": true, + "expr": "tiflash_system_asynchronous_metric_LogDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "format": "time_series", + "hide": false, "interval": "", - "legendFormat": "{{type}}-80", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "log_size-{{instance}}", + "refId": "E", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Page write Duration", + "title": "PageStorage Disk Usage", "tooltip": { + "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -12243,7 +12080,7 @@ }, "yaxes": [ { - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -12251,11 +12088,11 @@ "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -12270,34 +12107,36 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The number of files of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 43 }, "hiddenSeries": false, - "id": 163, + "id": 129, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -12318,22 +12157,31 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tiflash_storage_page_gc_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[$__rate_interval])) by (type)", + "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", "format": "time_series", "hide": false, - "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "blob_file-{{instance}}", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_LogNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "log_file-{{instance}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Page GC Tasks OPM", + "title": "PageStorage File Num", "tooltip": { + "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -12348,7 +12196,7 @@ }, "yaxes": [ { - "format": "opm", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -12356,11 +12204,11 @@ "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -12369,6 +12217,74 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 133, + "legend": { + "show": true + }, + "pluginVersion": "6.1.6", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tiflash_storage_page_write_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"v3\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "PageStorage WriteBatch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "bytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, @@ -12385,16 +12301,14 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 51 }, "hiddenSeries": false, - "id": 162, + "id": 158, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -12423,10 +12337,9 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_page_gc_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type) / 1000000000)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type) / 1000000000)", "format": "time_series", - "hide": false, - "instant": false, + "hide": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}-max", @@ -12434,20 +12347,44 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_page_gc_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", "format": "time_series", - "hide": true, + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-99", + "legendFormat": "{{type}}-999", "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "hide": true, + "interval": "", + "legendFormat": "{{type}}-99", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "hide": true, + "interval": "", + "legendFormat": "{{type}}-95", + "refId": "D" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_page_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "hide": true, + "interval": "", + "legendFormat": "{{type}}-80", + "refId": "E" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Page GC Duration", + "title": "Page write Duration", "tooltip": { "shared": true, "sort": 2, @@ -12490,38 +12427,34 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The number of pages of all TiFlash instance", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 59 }, "hiddenSeries": false, - "id": 164, + "id": 163, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, + "current": false, + "hideEmpty": false, + "hideZero": true, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, - "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -12542,33 +12475,22 @@ "targets": [ { "exemplar": true, - "expr": "tiflash_system_asynchronous_metric_PagesInMem{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "num_pages-{{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "tiflash_system_asynchronous_metric_VersionedEntries{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "expr": "sum(increase(tiflash_storage_page_gc_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "num_entries-{{instance}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Numer of Pages", + "title": "Page GC Tasks OPM", "tooltip": { - "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -12583,7 +12505,7 @@ }, "yaxes": [ { - "format": "short", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -12610,32 +12532,30 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The num of pending writers in PageStorage", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 59 }, "hiddenSeries": false, - "id": 231, + "id": 162, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": false, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, @@ -12644,7 +12564,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -12660,23 +12580,34 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_PSPendingWriterNum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_page_gc_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type) / 1000000000)", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "size-{{instance}}", + "legendFormat": "{{type}}-max", "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_page_gc_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PageStorage Pending Writers Num", + "title": "Page GC Duration", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -12689,7 +12620,7 @@ }, "yaxes": [ { - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -12702,7 +12633,7 @@ "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -12717,27 +12648,253 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "", + "description": "The number of pages of all TiFlash instance", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 67 }, - "height": "", "hiddenSeries": false, - "id": 198, + "id": 164, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tiflash_system_asynchronous_metric_PagesInMem{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_pages-{{instance}}", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "tiflash_system_asynchronous_metric_VersionedEntries{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_entries-{{instance}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Numer of Pages", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The num of pending writers in PageStorage", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 231, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_PSPendingWriterNum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "size-{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PageStorage Pending Writers Num", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "height": "", + "hiddenSeries": false, + "id": 198, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, "hideZero": true, "max": true, "min": false, @@ -12841,7 +12998,7 @@ "h": 8, "w": 12, "x": 12, - "y": 44 + "y": 75 }, "hiddenSeries": false, "id": 123, @@ -12972,7 +13129,7 @@ "h": 9, "w": 24, "x": 0, - "y": 52 + "y": 83 }, "hiddenSeries": false, "id": 232, @@ -13079,7 +13236,7 @@ "h": 9, "w": 24, "x": 0, - "y": 61 + "y": 92 }, "hiddenSeries": false, "id": 345, @@ -13201,7 +13358,7 @@ "h": 8, "w": 12, "x": 0, - "y": 11 + "y": 44 }, "hiddenSeries": false, "id": 84, @@ -13306,7 +13463,7 @@ "h": 8, "w": 12, "x": 12, - "y": 11 + "y": 44 }, "hiddenSeries": false, "id": 305, @@ -13412,7 +13569,7 @@ "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 52 }, "hiddenSeries": false, "id": 266, @@ -13522,7 +13679,7 @@ "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 52 }, "hiddenSeries": false, "id": 86, @@ -13694,7 +13851,7 @@ "h": 8, "w": 24, "x": 0, - "y": 11 + "y": 13 }, "hiddenSeries": false, "id": 62, @@ -13813,7 +13970,7 @@ "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 21 }, "height": "", "hiddenSeries": false, @@ -13932,7 +14089,7 @@ "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 21 }, "height": "", "hiddenSeries": false, @@ -14049,7 +14206,7 @@ "h": 9, "w": 24, "x": 0, - "y": 27 + "y": 29 }, "height": "", "hiddenSeries": false, @@ -14171,7 +14328,7 @@ "h": 9, "w": 24, "x": 0, - "y": 36 + "y": 38 }, "hiddenSeries": false, "id": 90, @@ -17772,10 +17929,661 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Rough Set Filter Rate", + "title": "Rough Set Filter Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 97, + "legend": { + "show": false + }, + "pluginVersion": "6.1.6", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tiflash_storage_rough_set_filter_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Rough Set Filter Rate Histogram", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "percent", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Rough Set Filter Rate Histogram", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 171, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "PageStorage Checkpoint Duration", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 187, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "hide": false, + "interval": "", + "legendFormat": "{{type}}-999 {{$additional_groupby}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "hide": true, + "interval": "", + "legendFormat": "{{type}}-99 {{$additional_groupby}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Checkpoint Upload Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The flow of checkpoint operations", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "height": "", + "hiddenSeries": false, + "id": 174, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeatedByRow": true, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_checkpoint_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"incremental\"}[1m])) by ($additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "incremental {{$additional_groupby}}", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_checkpoint_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"compaction\"}[1m])) by ($additional_groupby)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "compaction {{$additional_groupby}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Checkpoint Upload flow", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The keys of checkpoint operations. All keys are uploaded in the checkpoint. Grouped by key types.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "height": "", + "hiddenSeries": false, + "id": 196, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeatedByRow": true, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_checkpoint_keys_by_types{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Checkpoint Upload keys speed by type (all)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The flow of checkpoint operations. Group by key types", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "height": "", + "hiddenSeries": false, + "id": 197, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeatedByRow": true, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_checkpoint_flow_by_types{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Checkpoint Upload flow by type (incremental+compaction)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The number of files of owned by each TiFlash node", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 176, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"num_files\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "checkpoint_data-{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Remote File Num", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17788,8 +18596,7 @@ }, "yaxes": [ { - "decimals": null, - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -17797,12 +18604,11 @@ "show": true }, { - "decimals": null, - "format": "short", - "label": "", + "format": "percentunit", + "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -17811,120 +18617,40 @@ "alignLevel": null } }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateOranges", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 97, - "legend": { - "show": false - }, - "pluginVersion": "6.1.6", - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(delta(tiflash_storage_rough_set_filter_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "Rough Set Filter Rate Histogram", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "percent", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - } - ], - "title": "Rough Set Filter Rate Histogram", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 171, - "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "PageStorage Checkpoint Duration", + "decimals": 1, + "description": "The remote store usage owned by each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 17 + "x": 12, + "y": 33 }, "hiddenSeries": false, - "id": 187, + "id": 175, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, @@ -17940,44 +18666,54 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/^valid_rate/", + "yaxis": 2 + }, + { + "alias": "/size/", + "linewidth": 3 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", - "format": "time_series", - "hide": true, + "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"total_size\"}) by (instance)", + "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}} {{$additional_groupby}}", + "intervalFactor": 2, + "legendFormat": "remote_size-{{instance}}", "refId": "A" }, { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"valid_size\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "{{type}}-999 {{$additional_groupby}}", - "refId": "C" + "intervalFactor": 2, + "legendFormat": "valid_size-{{instance}}", + "refId": "B" }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_checkpoint_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum((tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"valid_size\"}) / (tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"total_size\"})) by (instance)", "hide": true, "interval": "", - "legendFormat": "{{type}}-99 {{$additional_groupby}}", - "refId": "D" + "legendFormat": "valid_rate-{{instance}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Checkpoint Upload Duration", + "title": "Remote Store Usage", "tooltip": { + "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -17992,7 +18728,7 @@ }, "yaxes": [ { - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -18000,11 +18736,11 @@ "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -18019,36 +18755,30 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The flow of checkpoint operations", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 17 + "x": 0, + "y": 41 }, - "height": "", "hiddenSeries": false, - "id": 174, + "id": 189, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": true, + "hideEmpty": true, + "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -18064,7 +18794,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -18072,33 +18801,22 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_checkpoint_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"incremental\"}[1m])) by ($additional_groupby)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "incremental {{$additional_groupby}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_checkpoint_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"compaction\"}[1m])) by ($additional_groupby)", + "expr": "sum(rate(tiflash_disaggregated_object_lock_request_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "compaction {{$additional_groupby}}", - "refId": "B" + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Checkpoint Upload flow", + "title": "Remote Object Lock Request QPS", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -18111,7 +18829,8 @@ }, "yaxes": [ { - "format": "binBps", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -18119,11 +18838,11 @@ "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -18138,52 +18857,45 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The keys of checkpoint operations. All keys are uploaded in the checkpoint. Grouped by key types.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 25 + "x": 12, + "y": 41 }, - "height": "", "hiddenSeries": false, - "id": 196, + "id": 191, "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": true, + "current": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", - "repeatedByRow": true, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -18191,21 +18903,18 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_checkpoint_keys_by_types{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "format": "time_series", - "hide": false, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_disaggregated_object_lock_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}} {{$additional_groupby}}", - "refId": "A", - "step": 10 + "legendFormat": "99%-{{type}} {{$additional_groupby}}", + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Checkpoint Upload keys speed by type (all)", + "title": "Remote Object Lock Duration", "tooltip": { "shared": true, "sort": 2, @@ -18221,7 +18930,7 @@ }, "yaxes": [ { - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -18233,7 +18942,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -18248,74 +18957,86 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The flow of checkpoint operations. Group by key types", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 25 + "x": 0, + "y": 49 }, - "height": "", "hiddenSeries": false, - "id": 197, + "id": 193, "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": true, + "current": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", - "repeatedByRow": true, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/total/", + "yaxis": 2 + }, + { + "alias": "/one_store/", + "yaxis": 2 + }, + { + "alias": "/clean_locks/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_checkpoint_flow_by_types{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "format": "time_series", + "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_s3_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "hide": true, + "interval": "", + "legendFormat": "99%-{{type}} {{$additional_groupby}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}} {{$additional_groupby}}", - "refId": "A", - "step": 10 + "legendFormat": "90%-{{type}} {{$additional_groupby}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Checkpoint Upload flow by type (incremental+compaction)", + "title": "Remote GC Duration Breakdown", "tooltip": { "shared": true, "sort": 2, @@ -18331,7 +19052,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -18339,11 +19060,11 @@ "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -18359,7 +19080,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The number of files of owned by each TiFlash node", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -18372,20 +19093,24 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 33 + "x": 12, + "y": 49 }, "hiddenSeries": false, - "id": 176, + "id": 195, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, + "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, @@ -18394,7 +19119,7 @@ "links": [], "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": false }, "percentage": false, "pluginVersion": "7.5.11", @@ -18408,12 +19133,11 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"num_files\"}) by (instance)", + "expr": "sum(tiflash_storage_s3_gc_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance,type)", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "checkpoint_data-{{instance}}", + "legendFormat": "{{instance}}-{{type}}", "refId": "A", "step": 10 } @@ -18422,7 +19146,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote File Num", + "title": "Remote GC Status", "tooltip": { "msResolution": false, "shared": true, @@ -18447,11 +19171,11 @@ "show": true }, { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": "1.1", - "min": "0", + "max": null, + "min": null, "show": true } ], @@ -18466,25 +19190,21 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The remote store usage owned by each TiFlash node", - "editable": true, - "error": false, + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 0, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 33 + "x": 0, + "y": 57 }, "hiddenSeries": false, - "id": 175, + "id": 251, "legend": { "alignAsTable": true, "avg": false, @@ -18493,7 +19213,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -18511,12 +19230,8 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/^valid_rate/", + "alias": "/hit_ratio/", "yaxis": 2 - }, - { - "alias": "/size/", - "linewidth": 3 } ], "spaceLength": 10, @@ -18525,40 +19240,23 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"total_size\"}) by (instance)", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "remote_size-{{instance}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"valid_size\"}) by (instance)", + "expr": "sum(rate(tiflash_fap_task_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "valid_size-{{instance}}", + "intervalFactor": 1, + "legendFormat": "{{type}} {{$additional_groupby}}", "refId": "B" - }, - { - "exemplar": true, - "expr": "sum((tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"valid_size\"}) / (tiflash_storage_remote_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"total_size\"})) by (instance)", - "hide": true, - "interval": "", - "legendFormat": "valid_rate-{{instance}}", - "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Store Usage", + "title": "FAP result", "tooltip": { - "msResolution": false, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -18571,7 +19269,8 @@ }, "yaxes": [ { - "format": "bytes", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -18582,7 +19281,7 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": "1.1", + "max": null, "min": "0", "show": true } @@ -18598,6 +19297,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -18607,18 +19307,16 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 41 + "x": 12, + "y": 57 }, "hiddenSeries": false, - "id": 189, + "id": 252, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": true, - "hideZero": false, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, @@ -18628,7 +19326,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -18637,26 +19335,32 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/hit_ratio/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_disaggregated_object_lock_request_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_fap_task_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{type}} {{$additional_groupby}}", - "refId": "A" + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Object Lock Request QPS", + "title": "FAP state", "tooltip": { "shared": true, "sort": 0, @@ -18673,7 +19377,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -18681,11 +19385,11 @@ "show": true }, { - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -18700,6 +19404,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -18709,58 +19414,63 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 41 + "x": 0, + "y": 65 }, "hiddenSeries": false, - "id": 191, + "id": 254, "legend": { "alignAsTable": true, "avg": false, - "current": false, - "max": true, + "current": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/hit_ratio/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_disaggregated_object_lock_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "histogram_quantile(0.999, sum(round(1000000000*rate(tiflash_fap_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", + "format": "time_series", + "hide": false, "interval": "", - "legendFormat": "99%-{{type}} {{$additional_groupby}}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Object Lock Duration", + "title": "FAP time by stage", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -18773,6 +19483,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -18781,11 +19492,11 @@ "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -18800,6 +19511,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -18809,69 +19521,52 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 49 + "x": 12, + "y": 65 }, "hiddenSeries": false, - "id": 193, + "id": 253, "legend": { "alignAsTable": true, "avg": false, - "current": false, - "max": true, + "current": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { - "alias": "/total/", - "yaxis": 2 - }, - { - "alias": "/one_store/", - "yaxis": 2 - }, - { - "alias": "/clean_locks/", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tiflash_storage_s3_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", - "hide": true, - "interval": "", - "legendFormat": "99%-{{type}} {{$additional_groupby}}", - "queryType": "randomWalk", - "refId": "A" - }, + "alias": "/hit_ratio/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum(rate(tiflash_fap_nomatch_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "90%-{{type}} {{$additional_groupby}}", + "intervalFactor": 1, + "legendFormat": "{{type}} {{$additional_groupby}}", "refId": "B" } ], @@ -18879,10 +19574,10 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote GC Duration Breakdown", + "title": "FAP no match reason", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -18895,7 +19590,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -18903,11 +19599,11 @@ "show": true }, { - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -18915,58 +19611,65 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Disaggregated-Write", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 347, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 0, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 49 + "w": 24, + "x": 0, + "y": 18 }, "hiddenSeries": false, - "id": 195, + "id": 173, "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": true, - "max": false, + "current": false, + "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { - "alertThreshold": false + "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -18976,22 +19679,19 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_storage_s3_gc_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance,type)", - "format": "time_series", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_disaggregated_breakdown_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "99%-{{type}} {{$additional_groupby}}", + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote GC Status", + "title": "Read Duration Breakdown", "tooltip": { - "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -19006,7 +19706,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19033,7 +19733,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "Remote Cache Operations", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19044,18 +19744,21 @@ "h": 8, "w": 12, "x": 0, - "y": 57 + "y": 26 }, "hiddenSeries": false, - "id": 251, + "id": 185, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, @@ -19073,7 +19776,11 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/hit_ratio/", + "alias": "dtfile_cache_hit_ratio", + "yaxis": 2 + }, + { + "alias": "page_cache_hit_ratio", "yaxis": 2 } ], @@ -19083,23 +19790,39 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_fap_task_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{type}} {{$additional_groupby}}", "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"dtfile_hit\"}[1m]))/sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"dtfile_hit|dtfile_miss\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "dtfile_cache_hit_ratio", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"page_hit\"}[1m]))/sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"page_hit|page_miss\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "page_cache_hit_ratio", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "FAP result", + "title": "Remote Cache Operations", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19140,7 +19863,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "Remote Cache Flow", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19151,18 +19874,21 @@ "h": 8, "w": 12, "x": 12, - "y": 57 + "y": 26 }, "hiddenSeries": false, - "id": 252, + "id": 186, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, @@ -19178,19 +19904,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/hit_ratio/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_fap_task_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_storage_remote_cache_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", "hide": false, "interval": "", @@ -19203,10 +19924,10 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "FAP state", + "title": "Remote Cache Flow", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19220,7 +19941,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -19233,7 +19954,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -19247,7 +19968,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "Remote Cache Usage", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19258,18 +19979,21 @@ "h": 8, "w": 12, "x": 0, - "y": 65 + "y": 34 }, "hiddenSeries": false, - "id": 254, + "id": 188, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, @@ -19285,35 +20009,53 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/hit_ratio/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(round(1000000000*rate(tiflash_fap_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", + "expr": "sum(tiflash_system_current_metric_DTFileCacheCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}} {{$additional_groupby}}", + "legendFormat": "DTFileCapacity-{{instance}}", "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_DTFileCacheUsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "DTFileUsed-{{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_PageCacheCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "PageCapacity-{{instance}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_PageCacheUsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "PageUsed-{{instance}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "FAP time by stage", + "title": "Remote Cache Usage", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19327,7 +20069,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -19340,7 +20082,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -19354,7 +20096,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "Memory Usage of Storage Tasks", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19365,15 +20107,16 @@ "h": 8, "w": 12, "x": 12, - "y": 65 + "y": 34 }, "hiddenSeries": false, - "id": 253, + "id": 233, "legend": { "alignAsTable": true, "avg": false, - "current": true, - "max": false, + "current": false, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, @@ -19392,35 +20135,52 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/hit_ratio/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_fap_nomatch_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "format": "time_series", + "expr": "sum(tiflash_system_current_metric_MemoryTrackingQueryStorageTask{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}} {{$additional_groupby}}", - "refId": "B" + "legendFormat": "MemoryTrackingQueryStorageTask-{{instance}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_MemoryTrackingFetchPages{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "MemoryTrackingFetchPages-{{instance}}", + "refId": "D" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_DT_DeltaIndexCacheSize{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "DeltaIndexCacheSize-{{instance}}", + "refId": "E" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_MemoryTrackingSharedColumnData{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "SharedColumnData-{{instance}}", + "refId": "F" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "FAP no match reason", + "title": "Memory Usage of Storage Tasks", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19434,7 +20194,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -19447,35 +20207,21 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { "align": false, "alignLevel": null } - } - ], - "title": "Disaggregated-Write", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 347, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "DeltaIndex cache of ReadNodes", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19483,49 +20229,63 @@ "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, + "h": 7, + "w": 12, "x": 0, - "y": 18 + "y": 42 }, "hiddenSeries": false, - "id": 173, + "id": 236, "legend": { "alignAsTable": true, - "avg": false, - "current": false, - "max": true, + "avg": true, + "current": true, + "hideZero": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/hit_ratio/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_disaggregated_breakdown_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, instance)", + "format": "time_series", + "hide": false, "interval": "", - "legendFormat": "99%-{{type}} {{$additional_groupby}}", - "queryType": "randomWalk", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"hit\"}[1m])) by (instance) /sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "hit_ratio-{{instance}}", "refId": "A" } ], @@ -19533,7 +20293,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Read Duration Breakdown", + "title": "MVCCIndexCache", "tooltip": { "shared": true, "sort": 2, @@ -19549,7 +20309,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -19557,11 +20318,11 @@ "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -19576,7 +20337,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Remote Cache Operations", + "description": "Duration of storage's internal sub tasks", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19584,24 +20345,23 @@ "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 26 + "x": 12, + "y": 42 }, "hiddenSeries": false, - "id": 185, + "id": 356, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, + "sort": null, + "sortDesc": null, "total": false, "values": true }, @@ -19617,44 +20377,35 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "dtfile_cache_hit_ratio", - "yaxis": 2 - }, - { - "alias": "page_cache_hit_ratio", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "exemplar": false, + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"place_index_update\"}[$__rate_interval]))) by (le,type, $additional_groupby) / 1000000000)", "format": "time_series", - "hide": false, + "hide": true, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}} {{$additional_groupby}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "max-{{type}} {{$additional_groupby}}", + "refId": "A" }, { "exemplar": true, - "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"dtfile_hit\"}[1m]))/sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"dtfile_hit|dtfile_miss\"}[1m]))", + "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"place_index_update\"}[$__rate_interval])) by (le, type, $additional_groupby))", "hide": false, "interval": "", - "legendFormat": "dtfile_cache_hit_ratio", - "refId": "A" + "legendFormat": "9999-{{type}} {{$additional_groupby}}", + "refId": "B" }, { "exemplar": true, - "expr": "sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"page_hit\"}[1m]))/sum(rate(tiflash_storage_remote_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"page_hit|page_miss\"}[1m]))", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=\"place_index_update\"}[$__rate_interval])) by (le, type, $additional_groupby))", "hide": false, "interval": "", - "legendFormat": "page_cache_hit_ratio", + "legendFormat": "99-{{type}} {{$additional_groupby}}", "refId": "C" } ], @@ -19662,10 +20413,10 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Cache Operations", + "title": "PlaceIndex Tasks Duration", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -19678,8 +20429,8 @@ }, "yaxes": [ { - "decimals": null, - "format": "ops", + "decimals": 1, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19687,12 +20438,12 @@ "show": true }, { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -19706,7 +20457,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Remote Cache Flow", + "description": "Total number of storage's internal sub tasks", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19714,24 +20465,22 @@ "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 26 + "x": 0, + "y": 49 }, "hiddenSeries": false, - "id": 186, + "id": 353, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, @@ -19750,15 +20499,23 @@ "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_remote_cache_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_storage_place_index_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[$__rate_interval])) by (type, $additional_groupby)", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, + "legendFormat": "{{type}} {{$additional_groupby}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"place_index_update\"}[$__rate_interval])) by (type, $additional_groupby)", + "hide": false, + "interval": "", "legendFormat": "{{type}} {{$additional_groupby}}", "refId": "B" } @@ -19767,10 +20524,10 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Cache Flow", + "title": "PlaceIndexTask/Reuse OPS", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -19783,8 +20540,8 @@ }, "yaxes": [ { - "decimals": null, - "format": "binBps", + "decimals": 1, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -19792,7 +20549,7 @@ "show": true }, { - "format": "percentunit", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -19811,31 +20568,31 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Remote Cache Usage", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 34 + "x": 12, + "y": 49 }, "hiddenSeries": false, - "id": 188, + "id": 358, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": true, - "max": false, + "hideEmpty": false, + "hideZero": false, + "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -19859,35 +20616,32 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_DTFileCacheCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_place_index_stats_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, $additional_groupby) / 1000000000)", "format": "time_series", - "hide": false, - "interval": "", - "legendFormat": "DTFileCapacity-{{instance}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_DTFileCacheUsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", - "hide": false, + "hide": true, "interval": "", - "legendFormat": "DTFileUsed-{{instance}}", + "intervalFactor": 1, + "legendFormat": "max {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_PageCacheCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", - "hide": false, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_place_index_stats_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "format": "time_series", + "hide": true, "interval": "", - "legendFormat": "PageCapacity-{{instance}}", + "intervalFactor": 1, + "legendFormat": "99-{{type}} {{$additional_groupby}}", "refId": "C" }, { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_PageCacheUsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum by (type, $additional_groupby) (rate(tiflash_storage_place_index_stats_count_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))\n/\nsum by (type, $additional_groupby) (rate(tiflash_storage_place_index_stats_count_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "PageUsed-{{instance}}", + "intervalFactor": 1, + "legendFormat": "avg-{{type}} {{$additional_groupby}}", "refId": "D" } ], @@ -19895,7 +20649,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Remote Cache Usage", + "title": "PlaceIndex update rows/deletes", "tooltip": { "shared": true, "sort": 2, @@ -19911,8 +20665,7 @@ }, "yaxes": [ { - "decimals": null, - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -19920,26 +20673,42 @@ "show": true }, { - "format": "percentunit", + "decimals": 2, + "format": "opm", "label": null, "logBase": 1, "max": null, "min": "0", - "show": false + "show": true } ], "yaxis": { "align": false, "alignLevel": null } - }, + } + ], + "title": "Disaggregated-Compute", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 172, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "DeltaIndex cache of ReadNodes", + "description": "S3 read/write throughput", "fieldConfig": { "defaults": {}, "overrides": [] @@ -19949,20 +20718,21 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 34 + "x": 0, + "y": 19 }, "hiddenSeries": false, - "id": 236, + "id": 178, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": true, - "max": false, + "max": true, "min": false, "rightSide": true, "show": true, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, @@ -19978,43 +20748,46 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/hit_ratio/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3WriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "S3WriteBytes {{$additional_groupby}}", "refId": "B" }, { "exemplar": true, - "expr": "sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\", type=~\"hit\"}[1m])) by (instance) /sum(rate(tiflash_storage_mvcc_index_cache{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3ReadBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "hit_ratio-{{instance}}", + "legendFormat": "S3ReadBytes {{$additional_groupby}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3WriteDMFileBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3WriteDMFileBytes {{$additional_groupby}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "MVCCIndexCache", + "title": "S3 Bytes", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -20028,7 +20801,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -20036,7 +20809,7 @@ "show": true }, { - "format": "percentunit", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -20055,7 +20828,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Memory Usage of Storage Tasks", + "description": "S3 OPS", "fieldConfig": { "defaults": {}, "overrides": [] @@ -20065,20 +20838,21 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 19 }, "hiddenSeries": false, - "id": 233, + "id": 179, "legend": { "alignAsTable": true, "avg": false, - "current": false, - "hideZero": true, + "current": true, "max": true, "min": false, "rightSide": true, "show": true, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, @@ -20101,42 +20875,108 @@ "targets": [ { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_MemoryTrackingQueryStorageTask{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3PutObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "MemoryTrackingQueryStorageTask-{{instance}}", + "intervalFactor": 1, + "legendFormat": "S3PutObject {{$additional_groupby}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3GetObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3GetObject {{$additional_groupby}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3HeadObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3HeadObject {{$additional_groupby}}", "refId": "C" }, { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_MemoryTrackingFetchPages{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3ListObjects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "MemoryTrackingFetchPages-{{instance}}", + "legendFormat": "S3ListObjects {{$additional_groupby}}", "refId": "D" }, { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_DT_DeltaIndexCacheSize{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3DeleteObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "DeltaIndexCacheSize-{{instance}}", + "legendFormat": "S3DeleteObject {{$additional_groupby}}", "refId": "E" }, { "exemplar": true, - "expr": "sum(tiflash_system_current_metric_MemoryTrackingSharedColumnData{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) by (instance)", + "expr": "sum(rate(tiflash_system_profile_event_S3CopyObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "SharedColumnData-{{instance}}", + "legendFormat": "S3CopyObject {{$additional_groupby}}", "refId": "F" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3CreateMultipartUpload{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3CreateMultipartUpload {{$additional_groupby}}", + "refId": "G" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3UploadPart{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3UploadPart {{$additional_groupby}}", + "refId": "H" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3CompleteMultipartUpload{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3CompleteMultipartUpload {{$additional_groupby}}", + "refId": "I" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3PutDMFile{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "S3PutDMFile {{$additional_groupby}}", + "refId": "J" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IORead{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": true, + "interval": "", + "legendFormat": "S3IORead {{$additional_groupby}}", + "refId": "K" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeek{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": true, + "interval": "", + "legendFormat": "S3IOSeek {{$additional_groupby}}", + "refId": "L" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Memory Usage of Storage Tasks", + "title": "S3 OPS", "tooltip": { "shared": true, "sort": 2, @@ -20153,7 +20993,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -20161,41 +21001,26 @@ "show": true }, { - "format": "percentunit", + "format": "opm", "label": null, "logBase": 1, "max": null, "min": "0", - "show": false + "show": true } ], "yaxis": { "align": false, "alignLevel": null } - } - ], - "title": "Disaggregated-Compute", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 172, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "S3 read/write throughput", + "description": "S3 Retry OPS", "fieldConfig": { "defaults": {}, "overrides": [] @@ -20206,10 +21031,10 @@ "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 27 }, "hiddenSeries": false, - "id": 178, + "id": 182, "legend": { "alignAsTable": true, "avg": false, @@ -20242,36 +21067,66 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_system_profile_event_S3GetObjectRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "S3WriteBytes {{$additional_groupby}}", + "legendFormat": "S3GetObjectRetry {{$additional_groupby}}", "refId": "B" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_system_profile_event_S3PutObjectRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "S3ReadBytes {{$additional_groupby}}", + "legendFormat": "S3PutObjectRetry {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteDMFileBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_system_profile_event_S3PutDMFileRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "S3WriteDMFileBytes {{$additional_groupby}}", + "legendFormat": "S3PutDMFileRetry {{$additional_groupby}}", "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IOReadError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "S3IOReadError {{$additional_groupby}}", + "refId": "D" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeekError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "S3IOSeekError {{$additional_groupby}}", + "refId": "E" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeekBackward{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "S3IOSeekBackward {{$additional_groupby}}", + "refId": "F" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 Bytes", + "title": "S3 Retry OPS", "tooltip": { "shared": true, "sort": 0, @@ -20288,7 +21143,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -20315,21 +21170,21 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "S3 OPS", + "description": "S3 Request Duration", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 27 }, "hiddenSeries": false, - "id": 179, + "id": 180, "legend": { "alignAsTable": true, "avg": false, @@ -20362,92 +21217,38 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3PutObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", "format": "time_series", - "hide": false, + "hide": true, "interval": "", "intervalFactor": 1, - "legendFormat": "S3PutObject {{$additional_groupby}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3GetObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3GetObject {{$additional_groupby}}", + "legendFormat": "{{type}}-max {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3HeadObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3HeadObject {{$additional_groupby}}", - "refId": "C" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ListObjects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3ListObjects {{$additional_groupby}}", - "refId": "D" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3DeleteObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3DeleteObject {{$additional_groupby}}", - "refId": "E" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3CopyObject{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3CopyObject {{$additional_groupby}}", - "refId": "F" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3CreateMultipartUpload{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3CreateMultipartUpload {{$additional_groupby}}", - "refId": "G" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3UploadPart{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "S3UploadPart {{$additional_groupby}}", - "refId": "H" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3CompleteMultipartUpload{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "S3CompleteMultipartUpload {{$additional_groupby}}", - "refId": "I" + "intervalFactor": 1, + "legendFormat": "{{type}}-9999 {{$additional_groupby}}", + "refId": "B" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3PutDMFile{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "hide": false, "interval": "", - "legendFormat": "S3PutDMFile {{$additional_groupby}}", - "refId": "J" + "legendFormat": "{{type}}-99 {{$additional_groupby}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 OPS", + "title": "S3 Request Duration", "tooltip": { "shared": true, "sort": 2, @@ -20463,8 +21264,7 @@ }, "yaxes": [ { - "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -20472,11 +21272,11 @@ "show": true }, { - "format": "opm", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -20491,7 +21291,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "S3 Retry OPS", + "description": "S3 HTTP OPS", "fieldConfig": { "defaults": {}, "overrides": [] @@ -20502,10 +21302,10 @@ "h": 8, "w": 12, "x": 0, - "y": 27 + "y": 35 }, "hiddenSeries": false, - "id": 182, + "id": 343, "legend": { "alignAsTable": true, "avg": false, @@ -20538,39 +21338,93 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3GetObjectRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "format": "time_series", + "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsCount{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "S3GetObjectRetry {{$additional_groupby}}", - "refId": "B" + "legendFormat": "read-count {{$additional_groupby}}", + "refId": "K" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3PutObjectRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsCount{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "S3PutObjectRetry {{$additional_groupby}}", + "legendFormat": "write-count {{$additional_groupby}}", + "refId": "L" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsErrors{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "read-error {{$additional_groupby}}", + "refId": "M" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsErrors{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "write-error {{$additional_groupby}}", + "refId": "N" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsThrottling{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "read-throttling {{$additional_groupby}}", + "refId": "O" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsThrottling{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "write-throttling {{$additional_groupby}}", + "refId": "P" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsRedirects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "read-redirects {{$additional_groupby}}", + "refId": "Q" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsRedirects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "write-redirects {{$additional_groupby}}", + "refId": "R" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsNotFound{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "hide": false, + "interval": "", + "legendFormat": "read-notfound {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3PutDMFileRetry{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsNotFound{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "hide": false, "interval": "", - "legendFormat": "S3PutDMFileRetry {{$additional_groupby}}", - "refId": "C" + "legendFormat": "write-notfound {{$additional_groupby}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 Retry OPS", + "title": "S3 HTTP OPS", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -20611,7 +21465,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "S3 Request Duration", + "description": "S3 HTTP Request Duration", "fieldConfig": { "defaults": {}, "overrides": [] @@ -20622,10 +21476,10 @@ "h": 8, "w": 12, "x": 12, - "y": 27 + "y": 35 }, "hiddenSeries": false, - "id": 180, + "id": 344, "legend": { "alignAsTable": true, "avg": false, @@ -20658,7 +21512,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", "format": "time_series", "hide": true, "interval": "", @@ -20668,28 +21522,26 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", - "format": "time_series", + "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "hide": false, "interval": "", - "intervalFactor": 1, "legendFormat": "{{type}}-9999 {{$additional_groupby}}", - "refId": "B" + "refId": "C" }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", "hide": false, "interval": "", "legendFormat": "{{type}}-99 {{$additional_groupby}}", - "refId": "C" + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 Request Duration", + "title": "S3 HTTP Request Duration", "tooltip": { "shared": true, "sort": 2, @@ -20743,10 +21595,10 @@ "h": 8, "w": 12, "x": 0, - "y": 35 + "y": 43 }, "hiddenSeries": false, - "id": 343, + "id": 360, "legend": { "alignAsTable": true, "avg": false, @@ -20779,82 +21631,18 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsCount{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "read-count {{$additional_groupby}}", - "refId": "K" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsCount{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "write-count {{$additional_groupby}}", - "refId": "L" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsErrors{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "read-error {{$additional_groupby}}", - "refId": "M" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsErrors{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "write-error {{$additional_groupby}}", - "refId": "N" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsThrottling{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "read-throttling {{$additional_groupby}}", - "refId": "O" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsThrottling{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "write-throttling {{$additional_groupby}}", - "refId": "P" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsRedirects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "read-redirects {{$additional_groupby}}", - "refId": "Q" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsRedirects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", - "hide": false, - "interval": "", - "legendFormat": "write-redirects {{$additional_groupby}}", - "refId": "R" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3ReadRequestsNotFound{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum by (type, $additional_groupby) (tiflash_system_current_metric_S3Requests{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"})", "hide": false, "interval": "", - "legendFormat": "read-notfound {{$additional_groupby}}", + "legendFormat": "S3Requests {{$additional_groupby}}", "refId": "A" }, { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_S3WriteRequestsNotFound{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "expr": "sum by (type, $additional_groupby) (tiflash_system_current_metric_S3RandomAccessFile{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}) ", "hide": false, "interval": "", - "legendFormat": "write-notfound {{$additional_groupby}}", + "legendFormat": "S3RandomAccessFile {{$additional_groupby}}", "refId": "B" } ], @@ -20862,7 +21650,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 HTTP OPS", + "title": "S3 on-going instances", "tooltip": { "shared": true, "sort": 2, @@ -20879,7 +21667,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -20906,25 +21694,26 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "S3 HTTP Request Duration", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 35 + "y": 43 }, "hiddenSeries": false, - "id": 344, + "id": 359, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideZero": false, "max": true, "min": false, "rightSide": true, @@ -20953,28 +21742,52 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m]))) by (le, type, $additional_groupby) / 1000000000)", + "expr": "sum(rate(tiflash_system_profile_event_S3IOReadError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", "format": "time_series", - "hide": true, + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-max {{$additional_groupby}}", - "refId": "A" + "legendFormat": "S3IOReadError {{$additional_groupby}}", + "refId": "D" }, { "exemplar": true, - "expr": "histogram_quantile(0.9999, sum(rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeekError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "{{type}}-9999 {{$additional_groupby}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "S3IOSeekError {{$additional_groupby}}", + "refId": "E" }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_s3_http_request_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (le, type, $additional_groupby))", + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeekBackward{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", "hide": false, "interval": "", - "legendFormat": "{{type}}-99 {{$additional_groupby}}", + "intervalFactor": 1, + "legendFormat": "S3IOSeekBackward {{$additional_groupby}}", + "refId": "F" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IORead{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "S3IORead {{$additional_groupby}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_S3IOSeek{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", instance=~\"$tiflash_role\"}[1m])) by (type, $additional_groupby)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "S3IOSeek {{$additional_groupby}}", "refId": "B" } ], @@ -20982,7 +21795,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "S3 HTTP Request Duration", + "title": "S3RandomAccessFile OPS", "tooltip": { "shared": true, "sort": 2, @@ -20998,7 +21811,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21006,11 +21820,11 @@ "show": true }, { - "format": "short", + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -22199,7 +23013,7 @@ "h": 8, "w": 12, "x": 0, - "y": 53 + "y": 21 }, "hiddenSeries": false, "id": 246, @@ -22366,7 +23180,7 @@ "h": 8, "w": 12, "x": 12, - "y": 53 + "y": 21 }, "hiddenSeries": false, "id": 201, @@ -22774,7 +23588,7 @@ "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 23 }, "hiddenSeries": false, "id": 286, @@ -22895,7 +23709,7 @@ "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 23 }, "hiddenSeries": false, "id": 288, @@ -23024,7 +23838,7 @@ "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 31 }, "hiddenSeries": false, "id": 282, @@ -23151,7 +23965,7 @@ "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 31 }, "hiddenSeries": false, "id": 284,