From f6aeff4f509875831d02a8f40bc54a74dd71d445 Mon Sep 17 00:00:00 2001
From: Andrei Smirnov <andrei@obol.tech>
Date: Wed, 11 Mar 2026 22:29:29 +0300
Subject: [PATCH] app: improved loki client (#4389)

Improved loki client: buffered input channel + dropping and counting entries when the buffer is full.

category: refactor
ticket: none
---
 app/log/loki/client.go  |  5 ++++-
 app/log/loki/metrics.go | 16 ++++++++++++++++
 docs/metrics.md         |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 app/log/loki/metrics.go

diff --git a/app/log/loki/client.go b/app/log/loki/client.go
index 6a493ffc20..ab894d96c0 100644
--- a/app/log/loki/client.go
+++ b/app/log/loki/client.go
@@ -32,6 +32,7 @@ const (
 	batchWait     = 1 * time.Second
 	batchMax      = 5 * 1 << 20 // 5MB
 	maxLogLineLen = 4 << 10     // 4096B
+	inputBuffer   = 1000        // Buffered channel capacity for log lines
 )
 
 // lazyLabelsFunc abstracts lazy loading of labels, logs will only be sent when it returns true.
@@ -59,7 +60,7 @@ func newInternal(endpoint string, serviceLabel string, batchWait time.Duration,
 		endpoint:       endpoint,
 		done:           make(chan struct{}),
 		quit:           make(chan struct{}),
-		input:          make(chan string),
+		input:          make(chan string, inputBuffer),
 		batchMax:       batchMax,
 		batchWait:      batchWait,
 		maxLogLineLen:  maxLogLineLen,
@@ -156,6 +157,8 @@ func (c *Client) Add(line string) {
 	select {
 	case c.input <- line:
 	case <-c.quit:
+	default:
+		droppedTotal.Inc()
 	}
 }
 
diff --git a/app/log/loki/metrics.go b/app/log/loki/metrics.go
new file mode 100644
index 0000000000..5ae1a7994f
--- /dev/null
+++ b/app/log/loki/metrics.go
@@ -0,0 +1,16 @@
+// Copyright © 2022-2026 Obol Labs Inc. Licensed under the terms of a Business Source License 1.1
+
+package loki
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/obolnetwork/charon/app/promauto"
+)
+
+var droppedTotal = promauto.NewCounter(prometheus.CounterOpts{
+	Namespace: "app",
+	Subsystem: "log_loki",
+	Name:      "dropped_total",
+	Help:      "Total count of dropped log lines due to full buffer",
+})
diff --git a/docs/metrics.md b/docs/metrics.md
index 4dcd87fdae..2c4032b51c 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -29,6 +29,7 @@ when storing metrics from multiple nodes or clusters in one Prometheus instance.
 | `app_health_checks` | Gauge | Application health checks by name and severity. Set to 1 for failing, 0 for ok. | `severity, name` |
 | `app_health_metrics_high_cardinality` | Gauge | Metrics with high cardinality by name. | `name` |
 | `app_log_error_total` | Counter | Total count of logged errors by topic | `topic` |
+| `app_log_loki_dropped_total` | Counter | Total count of dropped log lines due to full buffer |  |
 | `app_log_warn_total` | Counter | Total count of logged warnings by topic | `topic` |
 | `app_monitoring_readyz` | Gauge | Set to 1 if the node is operational and monitoring api `/readyz` endpoint is returning 200s. Else `/readyz` is returning 500s and this metric is either set to 2 if the beacon node is down, or3 if the beacon node is syncing, or4 if quorum peers are not connected. |  |
 | `app_peer_name` | Gauge | Constant gauge with label set to the name of the cluster peer | `peer_name` |