From f6aeff4f509875831d02a8f40bc54a74dd71d445 Mon Sep 17 00:00:00 2001 From: Andrei Smirnov Date: Wed, 11 Mar 2026 22:29:29 +0300 Subject: [PATCH] app: improved loki client (#4389) Improved loki client: buffered input channel + dropping and counting entries when the buffer is full. category: refactor ticket: none --- app/log/loki/client.go | 5 ++++- app/log/loki/metrics.go | 16 ++++++++++++++++ docs/metrics.md | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 app/log/loki/metrics.go diff --git a/app/log/loki/client.go b/app/log/loki/client.go index 6a493ffc20..ab894d96c0 100644 --- a/app/log/loki/client.go +++ b/app/log/loki/client.go @@ -32,6 +32,7 @@ const ( batchWait = 1 * time.Second batchMax = 5 * 1 << 20 // 5MB maxLogLineLen = 4 << 10 // 4096B + inputBuffer = 1000 // Buffered channel capacity for log lines ) // lazyLabelsFunc abstracts lazy loading of labels, logs will only be sent when it returns true. @@ -59,7 +60,7 @@ func newInternal(endpoint string, serviceLabel string, batchWait time.Duration, endpoint: endpoint, done: make(chan struct{}), quit: make(chan struct{}), - input: make(chan string), + input: make(chan string, inputBuffer), batchMax: batchMax, batchWait: batchWait, maxLogLineLen: maxLogLineLen, @@ -156,6 +157,8 @@ func (c *Client) Add(line string) { select { case c.input <- line: case <-c.quit: + default: + droppedTotal.Inc() } } diff --git a/app/log/loki/metrics.go b/app/log/loki/metrics.go new file mode 100644 index 0000000000..5ae1a7994f --- /dev/null +++ b/app/log/loki/metrics.go @@ -0,0 +1,16 @@ +// Copyright © 2022-2026 Obol Labs Inc. Licensed under the terms of a Business Source License 1.1 + +package loki + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/obolnetwork/charon/app/promauto" +) + +var droppedTotal = promauto.NewCounter(prometheus.CounterOpts{ + Namespace: "app", + Subsystem: "log_loki", + Name: "dropped_total", + Help: "Total count of dropped log lines due to full buffer", +}) diff --git a/docs/metrics.md b/docs/metrics.md index 4dcd87fdae..2c4032b51c 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -29,6 +29,7 @@ when storing metrics from multiple nodes or clusters in one Prometheus instance. | `app_health_checks` | Gauge | Application health checks by name and severity. Set to 1 for failing, 0 for ok. | `severity, name` | | `app_health_metrics_high_cardinality` | Gauge | Metrics with high cardinality by name. | `name` | | `app_log_error_total` | Counter | Total count of logged errors by topic | `topic` | +| `app_log_loki_dropped_total` | Counter | Total count of dropped log lines due to full buffer | | | `app_log_warn_total` | Counter | Total count of logged warnings by topic | `topic` | | `app_monitoring_readyz` | Gauge | Set to 1 if the node is operational and monitoring api `/readyz` endpoint is returning 200s. Else `/readyz` is returning 500s and this metric is either set to 2 if the beacon node is down, or3 if the beacon node is syncing, or4 if quorum peers are not connected. | | | `app_peer_name` | Gauge | Constant gauge with label set to the name of the cluster peer | `peer_name` |