Skip to content

Commit c25991b

Browse files
committed
Add new JSON log formatter QW_LOG_FORMAT=DDG
The existing JSON formatter (`QW_LOG_FORMAT=json`) produces deeply nested JSON with separate span objects. This makes logs hard to search and filter. The new DDG formatter outputs a flat JSON object with four fields: - `timestamp`: RFC 3339 - `level`: log level (INFO, WARN, ERROR, ...) - `service`: crate name extracted from the tracing target - `message`: the full text-formatted log line (level, target, spans, fields) The message reuses tracing's standard Full formatter, so it matches the regular text output minus the timestamp.
1 parent 290d055 commit c25991b

2 files changed

Lines changed: 303 additions & 16 deletions

File tree

quickwit/quickwit-cli/src/logger.rs

Lines changed: 302 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ use time::format_description::BorrowedFormatItem;
2929
use tracing::{Event, Level, Subscriber};
3030
use tracing_subscriber::EnvFilter;
3131
use tracing_subscriber::field::RecordFields;
32-
use tracing_subscriber::fmt::FmtContext;
3332
use tracing_subscriber::fmt::format::{
3433
DefaultFields, Format, FormatEvent, FormatFields, Full, Json, JsonFields, Writer,
3534
};
3635
use tracing_subscriber::fmt::time::UtcTime;
36+
use tracing_subscriber::fmt::FmtContext;
3737
use tracing_subscriber::layer::SubscriberExt;
3838
use tracing_subscriber::prelude::*;
3939
use tracing_subscriber::registry::LookupSpan;
@@ -174,30 +174,37 @@ fn time_formatter() -> UtcTime<Vec<BorrowedFormatItem<'static>>> {
174174
enum EventFormat<'a> {
175175
Full(Format<Full, UtcTime<Vec<BorrowedFormatItem<'a>>>>),
176176
Json(Format<Json>),
177+
Ddg(DdgFormat),
177178
}
178179

179180
impl EventFormat<'_> {
180-
/// Gets the log format from the environment variable `QW_LOG_FORMAT`. Returns a JSON
181-
/// formatter if the variable is set to `json`, otherwise returns a full formatter.
181+
/// Gets the log format from the environment variable `QW_LOG_FORMAT`.
182182
fn get_from_env() -> Self {
183-
if get_from_env_opt::<String>("QW_LOG_FORMAT", false)
184-
.map(|log_format| log_format.eq_ignore_ascii_case("json"))
185-
.unwrap_or(false)
183+
match get_from_env_opt::<String>("QW_LOG_FORMAT", false)
184+
.as_deref()
185+
.map(str::to_ascii_lowercase)
186+
.as_deref()
186187
{
187-
let json_format = tracing_subscriber::fmt::format().json();
188-
EventFormat::Json(json_format)
189-
} else {
190-
let full_format = tracing_subscriber::fmt::format()
191-
.with_target(true)
192-
.with_timer(time_formatter());
193-
194-
EventFormat::Full(full_format)
188+
Some("json") => {
189+
EventFormat::Json(tracing_subscriber::fmt::format().json())
190+
}
191+
Some("ddg") => {
192+
EventFormat::Ddg(DdgFormat::new())
193+
}
194+
_ => {
195+
let full_format = tracing_subscriber::fmt::format()
196+
.with_target(true)
197+
.with_timer(time_formatter());
198+
EventFormat::Full(full_format)
199+
}
195200
}
196201
}
197202

198203
fn format_fields(&self) -> FieldFormat {
199204
match self {
200-
EventFormat::Full(_) => FieldFormat::Default(DefaultFields::new()),
205+
EventFormat::Full(_) | EventFormat::Ddg(_) => {
206+
FieldFormat::Default(DefaultFields::new())
207+
}
201208
EventFormat::Json(_) => FieldFormat::Json(JsonFields::new()),
202209
}
203210
}
@@ -217,10 +224,70 @@ where
217224
match self {
218225
EventFormat::Full(format) => format.format_event(ctx, writer, event),
219226
EventFormat::Json(format) => format.format_event(ctx, writer, event),
227+
EventFormat::Ddg(format) => format.format_event(ctx, writer, event),
220228
}
221229
}
222230
}
223231

232+
/// Outputs JSON with `timestamp`, `level`, `service`, and `message` fields.
233+
/// The `service` is extracted from the tracing target (crate name).
234+
/// The `message` is formatted using the regular text formatter (level, target, spans, fields).
235+
///
236+
/// Example output:
237+
/// ```json
238+
/// {"timestamp":"2025-03-23T14:30:45Z","level":"INFO","service":"quickwit_search","message":"INFO quickwit_search: hello"}
239+
/// ```
240+
struct DdgFormat {
241+
text_format: Format<Full, ()>,
242+
}
243+
244+
impl DdgFormat {
245+
fn new() -> Self {
246+
Self {
247+
text_format: tracing_subscriber::fmt::format()
248+
.with_target(true)
249+
.without_time(),
250+
}
251+
}
252+
}
253+
254+
impl<S, N> FormatEvent<S, N> for DdgFormat
255+
where
256+
S: Subscriber + for<'a> LookupSpan<'a>,
257+
N: for<'a> FormatFields<'a> + 'static,
258+
{
259+
fn format_event(
260+
&self,
261+
ctx: &FmtContext<'_, S, N>,
262+
mut writer: Writer<'_>,
263+
event: &Event<'_>,
264+
) -> fmt::Result {
265+
// Render the event as text using the Full formatter (without timestamp)
266+
let mut message = String::with_capacity(256);
267+
self.text_format
268+
.format_event(ctx, Writer::new(&mut message), event)?;
269+
let message = message.trim();
270+
271+
// Timestamp (RFC 3339)
272+
let timestamp = time::OffsetDateTime::now_utc()
273+
.format(&time::format_description::well_known::Rfc3339)
274+
.map_err(|_| fmt::Error)?;
275+
276+
let level = event.metadata().level().as_str();
277+
278+
// Extract crate name from the target (e.g. "quickwit_cli::logger" -> "quickwit_cli")
279+
let target = event.metadata().target();
280+
let service = target.split("::").next().unwrap_or(target);
281+
282+
// Write JSON with properly escaped message
283+
let escaped_message = serde_json::to_string(message).map_err(|_| fmt::Error)?;
284+
writeln!(
285+
writer,
286+
r#"{{"timestamp":"{timestamp}","level":"{level}","service":"{service}","message":{escaped_message}}}"#
287+
)
288+
}
289+
}
290+
224291
enum FieldFormat {
225292
Default(DefaultFields),
226293
Json(JsonFields),
@@ -363,3 +430,223 @@ pub(super) mod jemalloc_profiled {
363430
))
364431
}
365432
}
433+
434+
#[cfg(test)]
435+
mod tests {
436+
use std::sync::{Arc, Mutex};
437+
438+
use tracing_subscriber::layer::SubscriberExt;
439+
440+
use super::*;
441+
442+
/// A shared buffer writer for capturing log output in tests.
443+
#[derive(Clone, Default)]
444+
struct TestMakeWriter(Arc<Mutex<Vec<u8>>>);
445+
446+
impl TestMakeWriter {
447+
fn get_string(&self) -> String {
448+
String::from_utf8(self.0.lock().unwrap().clone()).unwrap()
449+
}
450+
}
451+
452+
impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for TestMakeWriter {
453+
type Writer = TestWriter;
454+
455+
fn make_writer(&'a self) -> Self::Writer {
456+
TestWriter(self.0.clone())
457+
}
458+
}
459+
460+
struct TestWriter(Arc<Mutex<Vec<u8>>>);
461+
462+
impl std::io::Write for TestWriter {
463+
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
464+
self.0.lock().unwrap().write_all(buf)?;
465+
Ok(buf.len())
466+
}
467+
468+
fn flush(&mut self) -> std::io::Result<()> {
469+
Ok(())
470+
}
471+
}
472+
473+
/// Sets up a subscriber with `DdgFormat` and captures the output.
474+
fn capture_ddg_log<F: FnOnce()>(f: F) -> serde_json::Value {
475+
let writer = TestMakeWriter::default();
476+
let subscriber = tracing_subscriber::registry().with(
477+
tracing_subscriber::fmt::layer()
478+
.event_format(DdgFormat::new())
479+
.fmt_fields(FieldFormat::Default(DefaultFields::new()))
480+
.with_ansi(false)
481+
.with_writer(writer.clone()),
482+
);
483+
tracing::subscriber::with_default(subscriber, f);
484+
let output = writer.get_string();
485+
serde_json::from_str(&output).expect("output should be valid JSON")
486+
}
487+
488+
const TARGET: &str = "quickwit_cli::logger::tests";
489+
490+
#[test]
491+
fn test_ddg_format_has_expected_fields() {
492+
let json = capture_ddg_log(|| tracing::info!("hello"));
493+
let obj = json.as_object().unwrap();
494+
assert_eq!(obj.len(), 4, "{obj:?}");
495+
assert!(obj.contains_key("timestamp"));
496+
assert!(obj.contains_key("level"));
497+
assert!(obj.contains_key("service"));
498+
assert!(obj.contains_key("message"));
499+
}
500+
501+
#[test]
502+
fn test_ddg_format_basic_message() {
503+
let json = capture_ddg_log(|| tracing::info!("hello world"));
504+
assert_eq!(json["level"], "INFO");
505+
assert_eq!(json["service"], "quickwit_cli");
506+
assert_eq!(
507+
json["message"].as_str().unwrap(),
508+
format!("INFO {TARGET}: hello world")
509+
);
510+
}
511+
512+
#[test]
513+
fn test_ddg_format_with_fields() {
514+
let json = capture_ddg_log(|| {
515+
tracing::info!(key = "value", count = 42, "processing request");
516+
});
517+
assert_eq!(
518+
json["message"].as_str().unwrap(),
519+
format!("INFO {TARGET}: processing request key=\"value\" count=42")
520+
);
521+
}
522+
523+
#[test]
524+
fn test_ddg_format_with_span() {
525+
let json = capture_ddg_log(|| {
526+
let span = tracing::info_span!("my_span", id = 123);
527+
let _guard = span.enter();
528+
tracing::info!("inside span");
529+
});
530+
assert_eq!(
531+
json["message"].as_str().unwrap(),
532+
format!("INFO my_span{{id=123}}: {TARGET}: inside span")
533+
);
534+
}
535+
536+
/// Captures raw text output using the production Full formatter (with timestamp, no ANSI).
537+
fn capture_full_log<F: FnOnce()>(f: F) -> String {
538+
let writer = TestMakeWriter::default();
539+
let full_format = tracing_subscriber::fmt::format()
540+
.with_target(true)
541+
.with_timer(time_formatter());
542+
let subscriber = tracing_subscriber::registry().with(
543+
tracing_subscriber::fmt::layer()
544+
.event_format(full_format)
545+
.fmt_fields(DefaultFields::new())
546+
.with_ansi(false)
547+
.with_writer(writer.clone()),
548+
);
549+
tracing::subscriber::with_default(subscriber, f);
550+
writer.get_string().trim_end().to_string()
551+
}
552+
553+
#[test]
554+
fn test_ddg_format_with_nested_spans() {
555+
let make_event = || {
556+
let outer = tracing::info_span!("outer", req_id = 42);
557+
let _outer_guard = outer.enter();
558+
let inner = tracing::info_span!("inner", step = "parse");
559+
let _inner_guard = inner.enter();
560+
tracing::info!("deep inside");
561+
};
562+
563+
// Compare DDG message against production Full formatter output.
564+
// The only difference is the leading timestamp.
565+
let full_output = capture_full_log(make_event);
566+
let json = capture_ddg_log(make_event);
567+
let ddg_message = json["message"].as_str().unwrap();
568+
569+
// Full output: "2025-03-23T14:30:45.123Z INFO outer{...}: target: deep inside"
570+
// DDG message: "INFO outer{...}: target: deep inside"
571+
// The timestamp adds one extra space of padding, so we trim both and compare.
572+
let full_without_timestamp = full_output
573+
.find(" ")
574+
.map(|pos| &full_output[pos..])
575+
.unwrap_or(&full_output);
576+
assert_eq!(
577+
ddg_message.trim_start(),
578+
full_without_timestamp.trim_start(),
579+
);
580+
581+
assert_eq!(
582+
ddg_message,
583+
format!(
584+
"INFO outer{{req_id=42}}:inner{{step=\"parse\"}}: {TARGET}: deep inside"
585+
)
586+
);
587+
}
588+
589+
#[test]
590+
fn test_ddg_format_escapes_special_chars() {
591+
let json = capture_ddg_log(|| {
592+
tracing::info!(r#"hello "world" with\backslash"#);
593+
});
594+
assert_eq!(
595+
json["message"].as_str().unwrap(),
596+
format!(r#"INFO {TARGET}: hello "world" with\backslash"#)
597+
);
598+
}
599+
600+
#[test]
601+
fn test_ddg_format_escapes_newlines() {
602+
let json = capture_ddg_log(|| {
603+
tracing::info!("line1\nline2\ttab");
604+
});
605+
assert_eq!(
606+
json["message"].as_str().unwrap(),
607+
format!("INFO {TARGET}: line1\nline2\ttab")
608+
);
609+
}
610+
611+
#[test]
612+
fn test_ddg_format_levels() {
613+
for (expected_level, log_fn) in [
614+
("WARN", Box::new(|| tracing::warn!("w")) as Box<dyn FnOnce()>),
615+
("ERROR", Box::new(|| tracing::error!("e"))),
616+
("INFO", Box::new(|| tracing::info!("i"))),
617+
] {
618+
let json = capture_ddg_log(log_fn);
619+
assert_eq!(json["level"], expected_level);
620+
}
621+
}
622+
623+
#[test]
624+
fn test_ddg_format_timestamp_is_rfc3339() {
625+
let json = capture_ddg_log(|| tracing::info!("hello"));
626+
let ts = json["timestamp"].as_str().unwrap();
627+
time::OffsetDateTime::parse(ts, &time::format_description::well_known::Rfc3339)
628+
.unwrap_or_else(|err| panic!("timestamp is not valid RFC 3339: {ts}: {err}"));
629+
}
630+
631+
#[test]
632+
fn test_ddg_format_with_bool_and_float_fields() {
633+
let json = capture_ddg_log(|| {
634+
tracing::info!(enabled = true, ratio = 0.75, "status check");
635+
});
636+
assert_eq!(
637+
json["message"].as_str().unwrap(),
638+
format!("INFO {TARGET}: status check enabled=true ratio=0.75")
639+
);
640+
}
641+
642+
#[test]
643+
fn test_ddg_format_fields_only() {
644+
let json = capture_ddg_log(|| {
645+
tracing::info!(action = "ping");
646+
});
647+
assert_eq!(
648+
json["message"].as_str().unwrap(),
649+
format!("INFO {TARGET}: action=\"ping\"")
650+
);
651+
}
652+
}

quickwit/quickwit-config/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,5 @@ quickwit-proto = { workspace = true, features = ["testsuite"] }
4646
quickwit-common = { workspace = true, features = ["testsuite"] }
4747

4848
[features]
49-
testsuite = []
49+
testsuite = ["quickwit-proto/testsuite"]
5050
vrl = ["dep:vrl"]

0 commit comments

Comments
 (0)