From e2652fba3021f20c7c6de2293643a7f13e8f8d96 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Wed, 20 May 2026 11:24:11 +0100 Subject: [PATCH 1/3] [.Net][ADR] OTel auto-instrumentation proposal Big OTel rock. Tribe pattern say AddXxxInstrumentation() on provider, kill switch env var. Agent Framework miss. ADR 0026 land options + decision. Recommended: embed extensions in core assemblies. AddAgentFrameworkInstrumentation() on TracerProviderBuilder + MeterProviderBuilder. Dep small: OpenTelemetry.Api.ProviderBuilderExtensions only, 2 net-new transitive packages. AsAIAgent() factories auto-wrap via IServiceProvider param when AgentFrameworkInstrumentationOptions registered. Workflow.AsAIAgent() join same. Workflow internal spans still opt-in via WorkflowBuilder.WithOpenTelemetry(). Kill switch: OTEL_DOTNET_AGENTFRAMEWORK_INSTRUMENTATION_ENABLED. Multi-call: last-wins. ADR-0003 not superseded. Source-naming questions deferred to follow-on ADR. Refs #5852 --- .../0026-agent-otel-auto-instrumentation.md | 363 ++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 docs/decisions/0026-agent-otel-auto-instrumentation.md diff --git a/docs/decisions/0026-agent-otel-auto-instrumentation.md b/docs/decisions/0026-agent-otel-auto-instrumentation.md new file mode 100644 index 00000000000..7ccf734f523 --- /dev/null +++ b/docs/decisions/0026-agent-otel-auto-instrumentation.md @@ -0,0 +1,363 @@ +--- +status: proposed +contact: rogerbarreto +date: 2026-05-20 +deciders: stephentoub, markwallace-microsoft, rogerbarreto, westey-m +consulted: +informed: +--- + +# Agent Framework OpenTelemetry Auto-Instrumentation + +## Context and Problem Statement + +Today: telemetry on agent = manual `UseOpenTelemetry()` per agent (see +[ADR-0003](./0003-agent-opentelemetry-instrumentation.md)). Wrapper pattern +fine for explicit control. No match for .NET OTel ecosystem convention. +ASP.NET Core, EF Core, SqlClient = one `AddXxxInstrumentation()` on provider +builder, done. + +Three frictions: + +1. No convention entry point. User know `AddAspNetCoreInstrumentation()`. No + `AddAgentFrameworkInstrumentation()` exist. +2. DI scenarios = per-agent ceremony. Many `AsAIAgent()` factories in + provider packages (`OpenAI`, `Anthropic`, `Foundry`, `A2A`, etc.) not + reached by any DI wrap. +3. No kill switch. Ops cannot disable Agent Framework telemetry without + rebuild. `OTEL_DOTNET_AUTO__INSTRUMENTATION_ENABLED` is the + convention. + +Need: + +- Single call on `TracerProviderBuilder` / `MeterProviderBuilder`. Done. +- `AsAIAgent()` factories auto-wrap when DI present. +- Full parity with `UseOpenTelemetry()` knobs (source name, sensitive data). + No worse-API trap. +- Env-var kill switch. +- Keep `OpenTelemetryAgent` + `UseOpenTelemetry()` as primitives. + +## Decision Drivers + +- **Convention alignment.** Match `AddXxxInstrumentation()` shape. User + reflex wins discovery. +- **Dep hygiene.** Small + stable deps only. Use + `OpenTelemetry.Api.ProviderBuilderExtensions` — the package OTel publishes + exactly for library authors who expose provider-builder extensions + without SDK pull. +- **Layer separation.** Core knows zero workflow. Workflow knows zero core + internals. Each owns own surface. +- **Parity with `UseOpenTelemetry()`.** Every knob today reachable through + new entry point. No regression. +- **No surprise.** Explicit user call. No env-var SDK bootstrap (spec + forbid). +- **Auto-wire `AsAIAgent()`.** Factory result wrapped when caller pass + `IServiceProvider`. +- **Back-compat.** Existing `UseOpenTelemetry()` unchanged. + +## Considered Options + +- Status quo. Keep `UseOpenTelemetry()` only. +- **Embed extensions in core assemblies.** Recommended. +- New dedicated instrumentation packages. +- Auto-wrap on generic `OTEL_*` env vars. +- Service-collection-only extension. + +## Decision Outcome + +Chosen: **Embed extensions in core assemblies.** + +Add `OpenTelemetry.Api.ProviderBuilderExtensions` to `Microsoft.Agents.AI`. +Bump `Microsoft.Agents.AI.Workflows` from `OpenTelemetry.Api` to same. +Each assembly expose own `AddAgentFrameworkInstrumentation()` (workflows +flavor: `AddAgentFrameworkWorkflowsInstrumentation()`) on +`TracerProviderBuilder` + `MeterProviderBuilder`. Namespaces: +`OpenTelemetry.Trace` / `OpenTelemetry.Metrics` per ASP.NET Core +convention. Each extension subscribe only its own source. Neither know +other layer. + +New `AgentFrameworkInstrumentationOptions` class. Two props mirror +`UseOpenTelemetry()` exactly: `SourceName` override + `ConfigureAgent` +callback. Activation register options as DI singleton. Options presence += active signal. No separate marker type. + +`AsAIAgent()` overloads gain optional `IServiceProvider? services = null` +parameter. When services pass + options registered → factory wrap result +with `OpenTelemetryAgent`, invoke `ConfigureAgent` callback, return. +Idempotent: skip if already wrapped. `Workflow.AsAIAgent()` join same +pattern via agents marker (its `WorkflowHostAgent` IS `AIAgent`). +Workflow internal spans still opt-in via `WorkflowBuilder.WithOpenTelemetry()` +at build time; activation only subscribe source. + +Env-var kill switch: `OTEL_DOTNET_AGENTFRAMEWORK_INSTRUMENTATION_ENABLED` +(default `true`). When `false` → `AddAgentFrameworkInstrumentation()` +no-op. No source subscribe, no DI register, no auto-wire downstream. +Read once at extension-call time. + +Multi-call semantics: **last-wins** via plain +`services.AddSingleton(options)`. App-level config beat library-level +default. Library cannot silently suppress explicit app override. + +### Open question for reviewers + +One sub-decision deliberately open: + +**Do `OpenTelemetryAgent`, `UseOpenTelemetry()`, `OpenTelemetryConsts` +stay in core or move to new dedicated packages?** + +- Recommended option above = **stay in core**. Activation extensions + added alongside. No public types removed. +- "New dedicated instrumentation packages" alternative = **move out**. + Clean separation. Breaking change for current `UseOpenTelemetry()` + callers (need new package reference). + +Two endpoints coherent. Middle ground = split-assembly awkward, not +recommended either way. + +### Consequences + +- **Good:** One-line `AddAgentFrameworkInstrumentation()` matches user + ecosystem reflex. +- **Good:** `AsAIAgent()` factories auto-wrap under DI. Zero per-agent + ceremony. +- **Good:** Full parity with `UseOpenTelemetry()`. No regression for + migrating users. +- **Good:** Dep cost small + stable. Two net-new transitive packages + (`OpenTelemetry.Api`, `OpenTelemetry.Api.ProviderBuilderExtensions`). + Both `Stable` per OTel versioning. `M.E.DependencyInjection.Abstractions` + + `System.Diagnostics.DiagnosticSource` already in core's closure, + reused. +- **Good:** Ops get documented kill switch via single env var. +- **Good:** Non-breaking. Existing `UseOpenTelemetry()` chain unchanged. +- **Neutral:** Workflow internal spans still need + `WorkflowBuilder.WithOpenTelemetry()` at build time. Activation only + subscribe source. Document in samples. +- **Neutral:** Agents + workflows have separate activation methods. "No + workflow knowledge in core" principle precludes unified method. +- **Bad:** Adds optional `IServiceProvider? services = null` to every + `AsAIAgent()` overload across ~10–15 provider packages. + Source-compatible + binary-compatible. Coordinated release needed. +- **Bad:** Core public API grows: one options class + four extensions + per affected assembly. Reviewers minimizing core surface may prefer + dedicated-packages alternative. + +## Validation + +- **Unit tests** assert `AddAgentFrameworkInstrumentation()` register + correct source + meter. Options singleton resolvable via DI with + configured values. +- **Unit tests** assert `AsAIAgent()` with options-registered DI → + outermost wrapper = `OpenTelemetryAgent`. `ConfigureAgent` invoked. +- **Unit tests** assert `AsAIAgent()` without DI or without options + registration → unwrapped agent (current behavior preserved). +- **Unit tests** assert already-wrapped input not double-wrapped. +- **Unit tests** assert last-wins on repeated activation: most recent + options instance resolved. +- **Unit tests** assert + `OTEL_DOTNET_AGENTFRAMEWORK_INSTRUMENTATION_ENABLED=false` → no-op + activation. +- **Integration test** capture `invoke_agent` activity via in-memory + exporter through new path. End-to-end. No explicit `UseOpenTelemetry()`. +- **Sample** demonstrate new activation alongside existing manual sample. + README cross-link explain when to use each. + +## Pros and Cons of the Options + +### Status quo + +Continue per-agent `AIAgentBuilder.UseOpenTelemetry()`. No new packages. +No new APIs. + +- **Good:** Zero work. Zero new deps. +- **Good:** Explicit control preserved. +- **Bad:** Diverge from universal .NET OTel convention. New user reflex + miss. +- **Bad:** Per-agent ceremony bad at DI scale. `AsAIAgent()` factories + produce telemetry-less agents without per-callsite work. +- **Bad:** No kill switch. +- **Bad:** Exact problem this ADR fix. + +### Embed extensions in core assemblies (recommended) + +Add `OpenTelemetry.Api.ProviderBuilderExtensions` to +`Microsoft.Agents.AI` (workflows bump from `OpenTelemetry.Api` to same). +Add `AddAgentFrameworkInstrumentation()` + +`AddAgentFrameworkWorkflowsInstrumentation()` per assembly. Add options +class, DI auto-wire mechanism, kill-switch env var. Keep +`OpenTelemetryAgent`, `UseOpenTelemetry()`, `OpenTelemetryConsts` in +core unchanged. + +- **Good:** Single-package install with convention-aligned extension. +- **Good:** Non-breaking. Existing `UseOpenTelemetry()` work as-is. +- **Good:** Dep surface minimal + stable. Two net-new transitive + packages (`OpenTelemetry.Api`, `OpenTelemetry.Api.ProviderBuilderExtensions`). + Verified by NuGet resolution against current + `Microsoft.Agents.AI` graph. + `M.E.DependencyInjection.Abstractions` + `System.Diagnostics.DiagnosticSource` + already present, reused. +- **Good:** Workflows already depend on `OpenTelemetry.Api`. Bump to PBE + = extend existing pattern, not new coupling. +- **Good:** Each assembly own its source name only. No cross-layer + knowledge. +- **Good:** Auto-wire reuse `OpenTelemetryAgent`. No parallel pipeline. +- **Neutral:** Wrapper stays in same assembly as activation extension. + Some reviewer may prefer split (see alternative). +- **Bad:** Core public API grows: one options class + four extensions + (two per builder type) per affected assembly. +- **Bad:** Every `AsAIAgent()` overload across provider packages need + optional `IServiceProvider? services = null`. Source + binary + compatible. Breadth = coordinated release. + +### New dedicated instrumentation packages + +Move `OpenTelemetryAgent`, `UseOpenTelemetry()`, `OpenTelemetryConsts` +out of core to new `Microsoft.Agents.AI.OpenTelemetry` + +`Microsoft.Agents.AI.Workflows.OpenTelemetry` packages. New packages +depend on respective core assembly + `OpenTelemetry.Api.ProviderBuilderExtensions`. +Expose activation extensions alongside moved wrapper. + +- **Good:** Strictest separation. Core assemblies regain zero OTel-aware + code. +- **Good:** Match ASP.NET Core / EF Core packaging discipline most + literally. +- **Good:** Library authors who reference `Microsoft.Agents.AI` but + never want OTel pay nothing. +- **Bad:** Breaking-change release for current `UseOpenTelemetry()` + callers. Need new package ref. `Microsoft.Agents.AI` still pre-GA so + window open, but migration cost real. +- **Bad:** Two additional packages to publish + maintain. Own CHANGELOG, + release cadence, version compat. For small amount of code. +- **Bad:** User friction higher. Two-package install for activation entry + point vs one in recommended. + +### Auto-wrap on generic `OTEL_*` env vars + +Library detect standard OTel env vars (`OTEL_SERVICE_NAME`, +`OTEL_EXPORTER_OTLP_ENDPOINT`, ...) and auto-wrap every constructed +agent with `OpenTelemetryAgent`. No user activation call needed. + +- **Good:** Zero user activation code in apps with standard env vars. +- **Bad:** Violate OTel spec: library not start SDK from env. App start + SDK. +- **Bad:** Env vars in question = SDK config not "instrument me" signal. + Presence in environment not intended for Agent Framework telemetry + (e.g. CI configured for other app) silently activate. +- **Bad:** Serious double-wrap risk if user also call `UseOpenTelemetry()`. + Detect "already wrapped" possible but fragile. +- **Bad:** Diverge from how every other .NET OTel instrumentation lib + activate. + +### Service-collection-only extension + +Single `services.AddAgentFrameworkInstrumentation()` on `IServiceCollection`. +Register auto-wire decorator. Subscribe zero sources. User still subscribe +agent source manually on `TracerProviderBuilder`. + +- **Good:** No new OTel package dep in core + (`M.E.DependencyInjection.Abstractions` already there). +- **Bad:** Abandon most discoverable half of convention. User expect + activation on builder where they configured tracing + metrics. +- **Bad:** User still call + `tracerProviderBuilder.AddSource("Experimental.Microsoft.Agents.AI")` + by hand. Defeat point of convention. +- **Bad:** Strict subset of recommended option's surface. Marginal dep + savings. + +## More Information + +Additive to +[ADR-0003 (Agent OpenTelemetry Instrumentation)](./0003-agent-opentelemetry-instrumentation.md). +`OpenTelemetryAgent` + `UseOpenTelemetry()` remain underlying mechanism +for per-agent telemetry. This ADR add convention-aligned activation on +top. ADR-0003 not superseded. + +Separate follow-on ADR anticipated for source-naming questions out of +scope here: + +- Workflows source name carry `"Experimental."` prefix? (Currently no; + agents source has it.) +- Consolidate workflows source under agents source? Or keep separate? +- When/how drop `"Experimental."` prefix in coordination with upstream + gen-AI semantic convention stabilization. + +`"Experimental."` prefix on agents source intentional. Mirror +`Microsoft.Extensions.AI` (`Experimental.Microsoft.Extensions.AI`). +Signal emitted telemetry conform to semconv still experimental. Drop +prefix prematurely = falsely advertise stability upstream not yet +provide. + +Implementation tracked by GitHub issue +[#5852](https://github.com/microsoft/agent-framework/issues/5852). +Source-naming ADR implementation tracked separately when opened. + +### Reference API shape + +Recommended option produce approximately this user-facing API. +Signatures illustrative; final shape match +`OpenTelemetry.Instrumentation.AspNetCore` overload set. + +```csharp +namespace Microsoft.Agents.AI; + +public class AgentFrameworkInstrumentationOptions +{ + /// + /// Overrides the default activity source name. When null, the default + /// ("Experimental.Microsoft.Agents.AI") is used. + /// + public string? SourceName { get; set; } + + /// + /// Invoked against every OpenTelemetryAgent instance produced by the + /// auto-wiring pipeline. Use this to set EnableSensitiveData and any + /// future per-wrapper options. + /// + public Action? ConfigureAgent { get; set; } +} +``` + +```csharp +namespace OpenTelemetry.Trace; + +public static class AgentFrameworkInstrumentationTracerProviderBuilderExtensions +{ + public static TracerProviderBuilder AddAgentFrameworkInstrumentation(this TracerProviderBuilder builder); + public static TracerProviderBuilder AddAgentFrameworkInstrumentation( + this TracerProviderBuilder builder, + Action? configure); + public static TracerProviderBuilder AddAgentFrameworkInstrumentation( + this TracerProviderBuilder builder, + string? name, + Action? configure); +} +``` + +```csharp +// Symmetric extensions exist on MeterProviderBuilder in OpenTelemetry.Metrics. +// Symmetric extensions exist in Microsoft.Agents.AI.Workflows +// under the name AddAgentFrameworkWorkflowsInstrumentation. +``` + +### Reference activation example + +```csharp +services.AddOpenTelemetry() + .WithTracing(t => t + .AddAgentFrameworkInstrumentation(o => o.ConfigureAgent = ot => ot.EnableSensitiveData = true) + .AddAgentFrameworkWorkflowsInstrumentation() + .AddOtlpExporter()) + .WithMetrics(m => m + .AddAgentFrameworkInstrumentation() + .AddOtlpExporter()); + +// Anywhere in the app +var agent = chatClient.AsAIAgent(services: serviceProvider); +// Auto-wrapped with OpenTelemetryAgent because options singleton present in serviceProvider. +``` + +### Reference kill-switch example + +```bash +OTEL_DOTNET_AGENTFRAMEWORK_INSTRUMENTATION_ENABLED=false dotnet run +# AddAgentFrameworkInstrumentation() calls no-op. Agents not wrapped. +``` From f05067abd00a5813a397dbb9e6a8c0ecda7d6995 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+rogerbarreto@users.noreply.github.com> Date: Wed, 20 May 2026 11:32:57 +0100 Subject: [PATCH 2/3] Update deciders --- docs/decisions/0026-agent-otel-auto-instrumentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/decisions/0026-agent-otel-auto-instrumentation.md b/docs/decisions/0026-agent-otel-auto-instrumentation.md index 7ccf734f523..ee2f7504d24 100644 --- a/docs/decisions/0026-agent-otel-auto-instrumentation.md +++ b/docs/decisions/0026-agent-otel-auto-instrumentation.md @@ -2,7 +2,7 @@ status: proposed contact: rogerbarreto date: 2026-05-20 -deciders: stephentoub, markwallace-microsoft, rogerbarreto, westey-m +deciders: sergeymenshykh, rogerbarreto, westey-m, chetantoshniwal consulted: informed: --- From 125da260d159879927337c790d16e8f3cd808896 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Wed, 27 May 2026 15:59:20 +0100 Subject: [PATCH 3/3] [.Net][ADR] OTel BCL-native emission (ADR 0027) + implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADR 0027 propose BCL-native emission for bare ChatClientAgent. AddSource("Experimental.Microsoft.Agents.AI") alone enough — match HttpClient pattern. Symmetric 2-span (invoke_agent + chat) for bare AND explicit OpenTelemetryAgent decorator path. Design: ChatClientAgent lazily hold OpenTelemetryAgent(this, defaultSource). RunCoreAsync delegate to self-wrap unless suppressed. Suppression: UseProvidedChatClientAsIs, no listeners on default source, or per-instance marker found on parent chain. OpenTelemetryAgent.UpdateCurrentActivity stamp marker = inner ChatClientAgent ref. Re-entry find marker, do chat work. Per-instance ReferenceEquals avoid over-suppress sub-agent calls. HasListeners fast path mandatory — without it metrics-only subscribers cause infinite recursion (no Activity → no marker → re-enter). Documented trade-off: metrics-only users must explicit-wrap with OpenTelemetryAgent. Tests: ChatClientAgentOpenTelemetryTests (16 new, RED→GREEN). OwnerScopedActivityCapture helper filter activities by per-instance marker via parent-chain walk. Test-only — production users unaffected. Make tests parallel-safe vs global ActivitySource listener registry contamination. Files: - docs/decisions/0027-agent-otel-bcl-native-emission.md (new) - ChatClientAgent.cs: _selfTelemetryWrap, SuppressSelfTelemetryWrap, EnsureSelfTelemetryWrap, RunChatClientCoreAsync split - OpenTelemetryAgent.cs: UpdateCurrentActivity stamp marker - OpenTelemetryConsts.cs: OwnedInvokeAgentScopeMarker, AgentActivitySource - OwnerScopedActivityCapture.cs (new, test helper) - ChatClientAgentOpenTelemetryTests.cs (new) All 1564 unit tests pass parallel. CI-parity dotnet format clean. --- .../0027-agent-otel-bcl-native-emission.md | 256 +++++++++ .../ChatClient/ChatClientAgent.cs | 154 +++++- .../Microsoft.Agents.AI/OpenTelemetryAgent.cs | 9 + .../OpenTelemetryConsts.cs | 25 + .../ChatClientAgentOpenTelemetryTests.cs | 501 ++++++++++++++++++ .../ChatClient/OwnerScopedActivityCapture.cs | 89 ++++ .../OpenTelemetryAgentTests.cs | 15 +- 7 files changed, 1038 insertions(+), 11 deletions(-) create mode 100644 docs/decisions/0027-agent-otel-bcl-native-emission.md create mode 100644 dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentOpenTelemetryTests.cs create mode 100644 dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/OwnerScopedActivityCapture.cs diff --git a/docs/decisions/0027-agent-otel-bcl-native-emission.md b/docs/decisions/0027-agent-otel-bcl-native-emission.md new file mode 100644 index 00000000000..e511c6d8f0f --- /dev/null +++ b/docs/decisions/0027-agent-otel-bcl-native-emission.md @@ -0,0 +1,256 @@ +--- +status: proposed +contact: rogerbarreto +date: 2026-05-26 +deciders: sergeymenshykh, rogerbarreto, westey-m, chetantoshniwal +consulted: +informed: +--- + +# Agent Framework BCL Native OpenTelemetry Emission + +## Context and Problem Statement + +Agent Framework today emits agent-level OpenTelemetry only when the developer explicitly wraps an agent with `OpenTelemetryAgent` (or `AIAgentBuilder.UseOpenTelemetry()`). Without that wrapping, a bare `ChatClientAgent` is silent even when an OpenTelemetry tracer provider has subscribed to the Agent Framework source. This diverges from common .NET BCL instrumentation shape (e.g. `HttpClient` / `System.Net.Http`), where subscribing to the source via `AddSource(...)` is enough — the library emits natively. + +We want telemetry to flow for bare `ChatClientAgent` as soon as a customer subscribes to the existing source `Experimental.Microsoft.Agents.AI`, with no other configuration change. The emitted shape must exactly match what `new OpenTelemetryAgent(agent)` (the current default decorator wrapping) produces today. + +## Decision Drivers + +- **No customer configuration change.** Existing `AddSource("Experimental.Microsoft.Agents.AI")` must remain the only required step. +- **No regression for explicit `OpenTelemetryAgent` users.** The decorator path must produce exactly the same spans it does today (no duplicates, no shape change). Verified by existing tests `AutoWireChatClient_DefaultsToEnabled_EmitsChatSpan_Async` expecting 2 activities (invoke_agent + chat). +- **Same span shape for bare and decorated paths.** Bare `ChatClientAgent` with `AddSource` must emit the same 2-span (`invoke_agent` + `chat`) shape as today's default decorator. +- **Reuse mature code.** `OpenTelemetryChatClient` already implements the full GenAI semantic conventions. Re-implementing them in agent code would fork the convention and double maintenance. +- **Preserve advanced opt-in behavior.** `OpenTelemetryAgent.EnableSensitiveData` and similar per-instance knobs must remain available. +- **No collision with external libraries** that also use the `gen_ai.operation.name=invoke_agent` semantic convention tag. +- **No suppression of nested sub-agents.** When a `ChatClientAgent` A's tool invokes sub-agent B, B must emit its own `invoke_agent` span (not be suppressed by A's marker). +- **Support metrics-only subscribers.** A user who subscribes only to the Agent Framework meter (no tracer) must still get GenAI metrics. + +## Considered Options + +- **Option A**: Base `AIAgent` native emission via direct `ActivitySource.StartActivity` in `AIAgent.RunAsync`. +- **Option B**: `ChatClientAgent` self-wrap via `OpenTelemetryAgent(SelfForwardingAgent(this))` at agent level. +- **Option G**: `ChatClientAgent` self-wrap by lazily holding a `new OpenTelemetryAgent(this, defaultSource)` and delegating its `RunCoreAsync` through it (re-entering once with a per-instance marker that prevents recursion). +- **Option C**: `ChatClientAgent : OpenTelemetryAgent` (inheritance). +- **Option D**: Re-implement OTel logic into `ChatClientAgent` from scratch. +- **Option E**: Source-generator hook that wraps every `AIAgent` subclass at compile time. + +## Decision Outcome + +**Chosen option: Option G (`ChatClientAgent` self-wrap), reusing `OpenTelemetryAgent` directly.** + +`ChatClientAgent` lazily caches a `_selfTelemetryWrap` field built as: + +```csharp +new OpenTelemetryAgent(this, sourceName: OpenTelemetryConsts.DefaultSourceName) +``` + +When the agent's `RunCoreAsync` (or `RunCoreStreamingAsync`) is invoked, it checks `SuppressSelfTelemetryWrap()`: + +- if the per-agent opt-out `ChatClientAgentOptions.UseProvidedChatClientAsIs` is `true`, OR +- if the default `ActivitySource` has no listeners (`OpenTelemetryConsts.AgentActivitySource.HasListeners() == false`), OR +- if an outer pipeline already owns the `invoke_agent` scope for this specific agent instance (per-instance marker on the parent chain), + +then the call delegates straight to `RunChatClientCoreAsync` (the extracted core that performs the actual chat work). Otherwise the call delegates to `_selfTelemetryWrap.RunAsync(...)`, which routes back through `ChatClientAgent.RunCoreAsync` exactly once. On that re-entry the per-instance marker (now set on the outer `invoke_agent` activity by `OpenTelemetryAgent.UpdateCurrentActivity`) is found and the chat work executes. + +This produces **a symmetric 2-span shape** (`invoke_agent` + `chat`) for both the bare path and the explicit `new OpenTelemetryAgent(agent)` decorator path, because both paths go through the same `OpenTelemetryAgent` machinery. The auto-wired `OpenTelemetryChatClient` injected by `OpenTelemetryAgent.GetRunOptionsWithChatClientWiring` produces the nested `chat` span on both paths. + +Suppression uses a **per-instance** custom property marker on `Activity.Current` and walks the **Activity parent chain** to find it. The marker value is the specific `ChatClientAgent` instance that the outer pipeline covers. The walk handles intermediate Activities (e.g. tool execution spans created by `FunctionInvokingChatClient`) that would otherwise hide the marker. + +The marker is set in one place: + +- `OpenTelemetryAgent.UpdateCurrentActivity` resolves `this.InnerAgent.GetService()` and stores that reference as a custom property on the current `invoke_agent` activity. + +Both the bare path (where the inner agent is `this` ChatClientAgent that owns the `_selfTelemetryWrap`) and the explicit decorator path (where the user passes their own `OpenTelemetryAgent` over a `ChatClientAgent`) share this single mechanism. + +The `HasListeners` fast path is **required**, not optional. Without it, when no tracer subscribes to the default source, `OpenTelemetryChatClient` (inside the auto-wired layer) creates no `Activity`, so `UpdateCurrentActivity` has no activity on which to stamp the marker. The re-entrant call would then find no marker, re-enter the self-wrap, and recurse indefinitely. The fast path means that pure metrics-only subscribers (a meter subscriber without a tracer) do not trigger the self-wrap. This is an accepted trade-off; documented in code comments. + +Per-instance scoping correctly distinguishes between: + +- **Recursive calls into the same agent** (suppress — outer pipeline already owns the span) +- **Nested sub-agent calls via tools** (do not suppress — different instance, sub-agent emits its own span) + +Provider agents that wrap `ChatClientAgent` internally (`FoundryAgent`, `CopilotStudioAgent`, `GitHubCopilotAgent`, `PurviewAgent`, `HarnessAgent`) inherit telemetry transitively. They construct their inner `ChatClientAgent` with the user-facing Id and Name, so the emitted `invoke_agent` span carries the correct identity without any code change in those packages. + +Agents without an inner `ChatClientAgent` (`A2AAgent`, `DurableAIAgent`, `WorkflowHostAgent`) are **out of scope** for this ADR. A future ADR may add similar patterns. + +### Consequences + +- **Good**, because customers keep the same `AddSource("Experimental.Microsoft.Agents.AI")` configuration. +- **Good**, because bare `ChatClientAgent` runs now emit the same 2-span (`invoke_agent` + `chat`) shape as today's `new OpenTelemetryAgent(agent)` default — existing OTel dashboards continue to work. +- **Good**, because the bare path and the explicit decorator path produce identical telemetry — symmetric behavior with no path-specific surprises. +- **Good**, because explicit `OpenTelemetryAgent(ChatClientAgent)` continues to produce exactly the same spans as today (per-instance marker correctly suppresses the inner self-wrap). +- **Good**, because no custom `IChatClient` decorator is added to the codebase — the implementation reuses the existing `OpenTelemetryAgent` and its existing auto-wire factory. +- **Good**, because `OpenTelemetryAgent` decorator remains available for explicit `EnableSensitiveData`, custom source names, and provider-specific enrichment. +- **Good**, because no changes are required to `AIAgent` base, `DelegatingAIAgent`, or any provider package. +- **Good**, because no nested `AIAgent.RunAsync` chains beyond the one re-entry that the existing decorator path already performs — `CurrentRunContext` behavior matches today's decorator path exactly. +- **Good**, because per-instance marker is immune to external library collisions (external code cannot construct a meaningful `ChatClientAgent` reference matching `this`). +- **Good**, because nested sub-agent calls via tools emit their own `invoke_agent` spans (per-instance scoping does not over-suppress). +- **Bad**, because each `ChatClientAgent` instance lazily allocates one `OpenTelemetryAgent` (held until the agent is collected). Per-instance, not per-call. +- **Bad**, because pure metrics-only subscribers (no tracer) do not trigger the self-wrap; users in that mode must explicitly wrap with `OpenTelemetryAgent`. +- **Bad**, because the custom-source bypass edge case (custom source with no listeners + default source with listeners) is documented as known behavior, not fixed in this ADR. +- **Bad**, because per-class opt-in: future non-chat agents won't emit unless they implement their own pattern. + +## Validation + +- Unit test: bare `ChatClientAgent` with only `AddSource("Experimental.Microsoft.Agents.AI")` emits 2 activities (`invoke_agent` + `chat`) — matching today's `new OpenTelemetryAgent(agent)` default. +- Unit test: `OpenTelemetryAgent → ChatClientAgent` (existing decorated path) emits exactly the same 2 activities as today (no duplicates, no triple emission). +- Unit test: nested sub-agent invocation — agent A's tool calls agent B; both emit their own `invoke_agent` spans (per-instance scoping verified across parent-chain walk). +- Unit test: two sibling `ChatClientAgent` instances each emit their own 2 spans without cross-contamination. +- Unit test: `FoundryAgent`-style provider wrapper (passthrough `DelegatingAIAgent` over inner `ChatClientAgent`) emits 2 activities with the inner agent's user-facing identity. +- Unit test: passthrough `DelegatingAIAgent` does NOT suppress emission (the inner `ChatClientAgent` self-wraps as usual). +- Unit test: `UseProvidedChatClientAsIs = true` suppresses self-wrap; `ChatClientFactory` user hook is still invoked. +- Unit test: streaming path emits 2 activities per `RunStreamingAsync` call. +- Unit test: bare path defaults to NO message content capture (matches `OpenTelemetryChatClient.EnableSensitiveData = false` default). +- Unit test: explicit `OpenTelemetryAgent.EnableSensitiveData = true` continues to capture message content (existing knob unchanged). +- Unit test: lazy init under concurrent first calls produces a single cached `_selfTelemetryWrap` (Interlocked.CompareExchange race verified). +- Unit test: no listeners on default source — zero spans emitted; self-wrap is not allocated. + +### Test isolation pattern + +Tests in this ADR use a small `OwnerScopedActivityCapture` helper (raw `ActivityListener` + parent-chain marker filter) rather than a global `TracerProvider` + `InMemoryExporter`. This is necessary because OpenTelemetry .NET's listener registry is process-global per source name: two parallel tests both subscribing to `Experimental.Microsoft.Agents.AI` via separate `TracerProvider` instances see each other's activities and corrupt their exporters. The helper filters captured activities by walking each stopped activity's parent chain and only retains those whose chain contains the per-instance marker pointing at the test's owner `ChatClientAgent`. This makes tests fully parallel-safe without serialization attributes and without changing production behavior. The helper lives in the test project only; production users keep using `TracerProvider.AddSource(...)` exactly as today. + +## Pros and Cons of the Options + +### Option A: Base `AIAgent` native emission + +- **Good**, because universal coverage across every `AIAgent` subclass without per-class opt-in. +- **Good**, because matches `HttpClient`'s BCL pattern most literally. +- **Bad**, because re-implements GenAI semantic conventions in `AIAgent` base, forking maintenance from `OpenTelemetryChatClient`. +- **Bad**, because produces a thinner `invoke_agent` span (only basic agent.* tags) — different shape from today's default. +- **Bad**, because requires a service-key suppression contract across the `Microsoft.Agents.AI.Abstractions` → `Microsoft.Agents.AI` assembly boundary. + +### Option B: `ChatClientAgent` self-wrap at agent level via SelfForwardingAgent + +- **Good**, because reuses mature `OpenTelemetryAgent` plumbing as-is. +- **Bad**, because creates nested `AIAgent.RunAsync` chains — `CurrentRunContext` oscillates between three agent identities under streaming (each yield resets context at every nesting level). +- **Bad**, because `OpenTelemetryAgent` is `IDisposable` and the hidden instance per `ChatClientAgent` adds disposal lifetime concerns. +- **Bad**, because the ambient boolean marker on `Activity.Current` over-suppresses nested sub-agent calls. + +### Option G: `ChatClientAgent` self-wrap reusing `OpenTelemetryAgent` (recommended) + +- **Good**, because reuses `OpenTelemetryAgent` end-to-end — no new instrumentation code in `ChatClientAgent`. The existing auto-wired `OpenTelemetryChatClient` handles all GenAI semconv tracking (token usage, response metadata, model, finish reasons, provider name). +- **Good**, because the bare path and the explicit decorator path go through the exact same code, producing identical telemetry shape (no path-specific surprises). +- **Good**, because per-instance marker (`ChatClientAgent` reference + `ReferenceEquals`) avoids external library collisions and avoids over-suppressing nested sub-agents. +- **Good**, because two-span shape matches today's `new OpenTelemetryAgent(agent)` default exactly. +- **Good**, because the only behavioral change at the call boundary is `ChatClientAgent` delegating its run to a cached `OpenTelemetryAgent` instance, which then re-enters `ChatClientAgent` exactly once with the marker in place. +- **Bad**, because per-class opt-in: future non-chat agents won't emit unless they implement their own pattern. +- **Bad**, because each `ChatClientAgent` lazily allocates one `OpenTelemetryAgent` (per-instance, not per-call). +- **Bad**, because pure metrics-only subscribers (no tracer) bypass the self-wrap — the `HasListeners` fast path is required to prevent infinite recursion in that scenario. +- **Bad**, because narrow custom-source edge case (custom source no listeners + default source with listeners → self-wrap emits on default). + +### Option C: `ChatClientAgent : OpenTelemetryAgent` (inheritance) + +- **Bad**, because inverts the wrapper/wrappee relationship. +- **Bad**, because `OpenTelemetryAgent` is `sealed` today. +- **Bad**, because doesn't cover `FoundryAgent`, `A2AAgent`, or any other concrete agent. + +### Option D: Re-implement OTel logic into `ChatClientAgent` from scratch + +- **Bad**, because duplicates `OpenTelemetryChatClient`'s semconv implementation. +- **Bad**, because doubles maintenance. + +### Option E: Source-generator hook + +- **Bad**, because the repository has no existing source-generator infrastructure. +- **Bad**, because source generators have IDE tooling and trimming complications. + +## More Information + +This ADR is additive to ADR 0026. ADR 0026 describes activation extensions and DI auto-wrap. ADR 0027 defines the baseline native emission behavior so that the source subscription alone is sufficient. + +Implementation details (file-by-file change shape, shared-source-file wiring, prototype walkthrough) live in the session plan document. + +### Per-instance marker mechanism + +The custom property key is `internal const string OpenTelemetryConsts.OwnedInvokeAgentScopeMarker = "Microsoft.Agents.AI.OpenTelemetry.OwnedInvokeAgentScope"`. The value stored is a `ChatClientAgent` reference. + +Suppression check (inside `ChatClientAgent.SuppressSelfTelemetryWrap`): +```csharp +for (var act = Activity.Current; act is not null; act = act.Parent) +{ + if (ReferenceEquals(act.GetCustomProperty(OpenTelemetryConsts.OwnedInvokeAgentScopeMarker), this)) + { + return true; // an outer pipeline (this agent's own self-wrap, or a user's OpenTelemetryAgent decorator) already owns invoke_agent for this specific agent + } +} +return false; +``` + +The walk handles intermediate Activities (e.g. `FunctionInvokingChatClient` tool execution spans) that would otherwise hide the marker. Cost is O(depth) — typically 3-5 levels, each a dictionary lookup with no allocations. + +The marker is set in exactly one place, on the outer `invoke_agent` activity, by `OpenTelemetryAgent.UpdateCurrentActivity`: + +```csharp +var inner = this.InnerAgent.GetService(); +if (inner is not null) +{ + activity.SetCustomProperty(OpenTelemetryConsts.OwnedInvokeAgentScopeMarker, inner); +} +``` + +This unifies both the bare path (the `OpenTelemetryAgent` was created by `ChatClientAgent` itself, wrapping `this`) and the explicit decorator path (the user constructed `new OpenTelemetryAgent(myChatClientAgent)`). The `ReferenceEquals` check ensures nested sub-agent calls (different instances) are not suppressed. + +Custom properties are process-local state on `Activity` instances. They are NOT exported as OTLP span attributes — verified against OpenTelemetry .NET `ProtobufOtlpTraceSerializer.WriteActivityTags` which only serializes tags from `activity.EnumerateTagObjects()`. The marker has no impact on telemetry payload. + +### Why the `HasListeners` fast path is mandatory + +If `OpenTelemetryConsts.AgentActivitySource.HasListeners()` returns `false`, `ChatClientAgent.SuppressSelfTelemetryWrap` returns `true` immediately and the self-wrap is skipped. This is not just an optimization — it is required for correctness. + +When no tracer subscribes to the default source, the auto-wired `OpenTelemetryChatClient` inside `OpenTelemetryAgent.GetRunOptionsWithChatClientWiring` produces no `Activity` (its own `HasListeners` check short-circuits). With no activity, `OpenTelemetryAgent.UpdateCurrentActivity` has nothing to stamp the marker on. The re-entrant call to `ChatClientAgent.RunCoreAsync` from inside the self-wrap would then find no marker, re-enter the self-wrap, and recurse indefinitely — verified by stack overflow during initial implementation. + +The trade-off: pure metrics-only subscribers (a meter subscriber without a tracer) do not trigger the self-wrap. Such users must explicitly wrap with `new OpenTelemetryAgent(agent)` to get metric emission. This is acceptable because metrics-only configurations are rare and the explicit wrap remains available. + +### Known edge case: custom source with no listeners + +If a user wraps with `new OpenTelemetryAgent(agent, "MyCustomSource")` AND `MyCustomSource` has no listeners AND the default `Experimental.Microsoft.Agents.AI` source has listeners: + +1. `OpenTelemetryAgent._otelClient` (on `MyCustomSource`) creates no Activity (no listeners). +2. `UpdateCurrentActivity` bails because there is no Activity to attach the marker to. +3. Inner `ChatClientAgent.ResolveEffectiveChatClient` sees no marker → self-wraps on default source. +4. User unexpectedly sees telemetry on the default source. + +**Mitigation**: customers using custom source names should subscribe only to their custom source. A future ADR may add an opt-out API on `ChatClientAgentOptions` (e.g. `SuppressAutoTelemetry`) for users who need explicit control. + +### Cross-language comparison + +Python today applies telemetry via mixin layers on individual agent classes: + +| Python class | AgentTelemetryLayer | Chat-client based | +| --- | --- | --- | +| `Agent` | yes | yes (extends `RawAgent`) | +| `A2AAgent` | yes | NO (extends `BaseAgent`) | +| `ClaudeAgent` | yes | yes (extends `RawClaudeAgent`) | +| `FoundryAgent` | yes (also `ChatTelemetryLayer`) | yes | +| `GitHubCopilotAgent` | yes | yes | +| `RawAgent` | NO | yes (opt-out path) | +| `WorkflowAgent` | NO | no (silent failure mode) | + +Python's `Agent` is not the direct equivalent of .NET's `ChatClientAgent`. The closer mapping is: + +| Concept | Python | .NET today | .NET after ADR 0027 | +| --- | --- | --- | --- | +| Chat-client agent without telemetry | `RawAgent` | `ChatClientAgent` | `ChatClientAgent` with no `AddSource(...)` | +| Chat-client agent with telemetry | `Agent` (RawAgent + mixin) | `new OpenTelemetryAgent(new ChatClientAgent(...))` | `ChatClientAgent` with `AddSource(...)` | +| Provider agent with telemetry | e.g. `FoundryAgent` (mixin) | `OpenTelemetryAgent(FoundryAgent)` decorator | `FoundryAgent` with `AddSource(...)` (transitively via inner `ChatClientAgent`) | +| Sensitive data toggle | global `OBSERVABILITY_SETTINGS.enable_sensitive_data` via `ENABLE_SENSITIVE_DATA` env var | per-decorator `OpenTelemetryAgent.EnableSensitiveData` | unchanged: stays on advanced `OpenTelemetryAgent` path | + +Both Python and ADR 0027 (.NET) share the same characteristic that non-chat-client agents must explicitly opt in to telemetry. Python does this via the `AgentTelemetryLayer` mixin; .NET will do this by adding a self-wrap pattern in each agent class that needs it (only `ChatClientAgent` in this ADR). + +### Known telemetry richness gap (future ADR) + +Because Option G reuses `OpenTelemetryAgent` directly, the bare path emits exactly the same shape as today's `new OpenTelemetryAgent(agent)` default: an outer `invoke_agent` span and a nested `chat` span produced by the auto-wired `OpenTelemetryChatClient`. There is no richness delta between the bare path and the decorator path after this ADR. + +A small gap remains versus Python, which captures a few additional things on the `invoke_agent` span that .NET still does not on either path: + +| Span attribute | Python invoke_agent | .NET invoke_agent (after ADR 0027) | +| --- | --- | --- | +| Aggregated token usage across multiple round-trips in function-calling loops | yes | partial (whatever `OpenTelemetryChatClient` captures per request) | +| Input messages (when sensitive enabled) | yes | only on chat span | +| Output messages (when sensitive enabled) | yes | only on chat span | +| System instructions | yes | NO | +| Conversation/thread id | yes | NO | +| All chat options (model, temperature, etc.) | yes | only on chat span | +| Operation duration histogram metric | yes | NO | + +A future ADR (placeholder `0028-agent-otel-invoke-span-enrichment`) will propose closing this remaining gap by enriching the `invoke_agent` span produced by `OpenTelemetryAgent` itself, which will benefit both paths uniformly. This ADR scope is intentionally narrow: produce native emission with no regression and no customer configuration change, matching today's `new OpenTelemetryAgent(agent)` default exactly. diff --git a/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs b/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs index 1133e10a8a0..b71f02804cd 100644 --- a/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs +++ b/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Runtime.CompilerServices; using System.Text.Json; @@ -44,6 +45,21 @@ public sealed partial class ChatClientAgent : AIAgent private readonly ILogger _logger; private readonly Type _chatClientType; + /// + /// Lazy-initialized self-telemetry-wrap built as new OpenTelemetryAgent(this, DefaultSourceName). + /// Provides BCL-native OpenTelemetry emission for bare-path users (no explicit decorator) + /// by reusing the existing implementation directly. This + /// guarantees behavioral symmetry between bare-path and decorated-path: both produce the + /// 2-span shape (invoke_agent + chat) because both run through the same code. + /// + /// + /// Per-instance suppression marker (set by + /// ) prevents infinite recursion when + /// the wrap calls back into this via + /// . See ADR 0027 for the rationale. + /// + private OpenTelemetryAgent? _selfTelemetryWrap; + /// /// Initializes a new instance of the class. /// @@ -200,11 +216,32 @@ public ChatClientAgent(IChatClient chatClient, ChatClientAgentOptions? options, internal ChatOptions? ChatOptions => this._agentOptions?.ChatOptions; /// - protected override async Task RunCoreAsync( + protected override Task RunCoreAsync( IEnumerable messages, AgentSession? session = null, AgentRunOptions? options = null, CancellationToken cancellationToken = default) + { + // Suppression: outer telemetry pipeline already owns invoke_agent for THIS agent + // instance, or the caller opted out of default decoration entirely. Run the chat work + // directly without self-wrapping. + if (this.SuppressSelfTelemetryWrap()) + { + return this.RunChatClientCoreAsync(messages, session, options, cancellationToken); + } + + // Self-wrap by reusing the OpenTelemetryAgent decorator. When OpenTelemetryAgent's + // pipeline forwards back into this ChatClientAgent via its InnerAgent, the marker set + // by UpdateCurrentActivity will satisfy the SuppressSelfTelemetryWrap check above and + // we'll take the direct path then. + return this.EnsureSelfTelemetryWrap().RunAsync(messages, session, options, cancellationToken); + } + + private async Task RunChatClientCoreAsync( + IEnumerable messages, + AgentSession? session, + AgentRunOptions? options, + CancellationToken cancellationToken) { var inputMessages = Throw.IfNull(messages) as IReadOnlyCollection ?? messages.ToList(); @@ -285,12 +322,127 @@ private static IChatClient ApplyRunOptionsTransformations(AgentRunOptions? optio return chatClient; } + /// + /// Returns true when the self-telemetry wrap should be skipped. Conditions: + /// (1) caller explicitly opted out of default chat-client decoration via + /// , OR + /// (2) no trace listeners are subscribed to the AgentFramework default source (no telemetry + /// would be emitted anyway, and skipping prevents the + /// + ForwardingChatClient recursion that would + /// otherwise loop indefinitely because the per-instance marker can only be set on an + /// existing ), OR + /// (3) an outer telemetry pipeline already owns the invoke_agent scope for this + /// specific instance (verified by walking the + /// parent chain for our per-instance custom property marker + /// keyed by ). + /// + /// + /// Walking the parent chain is necessary because custom properties are attached to a specific + /// instance and are not inherited by child activities (e.g. intermediate + /// tool-execution spans created during function invocation can become ). + /// ensures nested sub-agent calls + /// (different instances in tool-invocation scenarios) are NOT + /// suppressed. + /// + private bool SuppressSelfTelemetryWrap() + { + if (this._agentOptions?.UseProvidedChatClientAsIs is true) + { + return true; + } + + // Avoid recursion when nothing would be emitted anyway. Without trace listeners on our + // source, OpenTelemetryChatClient creates no Activity, which means + // OpenTelemetryAgent.UpdateCurrentActivity cannot set our suppression marker. Without + // the marker, the recursive call from ForwardingChatClient.InnerAgent.RunAsync would + // not detect that we are already inside our own wrap, causing an infinite loop. + if (!OpenTelemetryConsts.AgentActivitySource.HasListeners()) + { + return true; + } + + for (var act = Activity.Current; act is not null; act = act.Parent) + { + if (act.GetCustomProperty(OpenTelemetryConsts.OwnedInvokeAgentScopeMarker) is ChatClientAgent covered + && ReferenceEquals(covered, this)) + { + return true; + } + } + + return false; + } + + /// + /// Returns the lazily-built self-telemetry wrap, creating it on first call via a race-safe + /// . The wrap reuses the existing + /// implementation so that bare-path emission and + /// decorator-path emission are byte-for-byte the same (matching shape, tags, source name, + /// auto-wired inner chat span). + /// + /// + /// + /// The wrap is constructed as new OpenTelemetryAgent(this, OpenTelemetryConsts.DefaultSourceName). + /// When called, it forwards back into this via its + /// — the per-instance marker set by + /// prevents infinite recursion by + /// causing to return true on the recursive call. + /// + /// + /// Sensitive-data capture follows the standard OpenTelemetry behavior driven by the + /// OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT environment variable (internally + /// handled by ). Per-instance overrides remain available + /// via an explicit new OpenTelemetryAgent(agent).EnableSensitiveData = true decorator + /// at the outermost layer. + /// + /// + private OpenTelemetryAgent EnsureSelfTelemetryWrap() + { + var existing = Volatile.Read(ref this._selfTelemetryWrap); + if (existing is not null) + { + return existing; + } + + var built = new OpenTelemetryAgent(this, OpenTelemetryConsts.DefaultSourceName); + var winner = Interlocked.CompareExchange(ref this._selfTelemetryWrap, built, null); + if (winner is not null) + { + built.Dispose(); + return winner; + } + + return built; + } + /// protected override async IAsyncEnumerable RunCoreStreamingAsync( IEnumerable messages, AgentSession? session = null, AgentRunOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (this.SuppressSelfTelemetryWrap()) + { + await foreach (var update in this.RunChatClientCoreStreamingAsync(messages, session, options, cancellationToken).WithCancellation(cancellationToken).ConfigureAwait(false)) + { + yield return update; + } + + yield break; + } + + await foreach (var update in this.EnsureSelfTelemetryWrap().RunStreamingAsync(messages, session, options, cancellationToken).WithCancellation(cancellationToken).ConfigureAwait(false)) + { + yield return update; + } + } + + private async IAsyncEnumerable RunChatClientCoreStreamingAsync( + IEnumerable messages, + AgentSession? session, + AgentRunOptions? options, + [EnumeratorCancellation] CancellationToken cancellationToken) { var inputMessages = Throw.IfNull(messages) as IReadOnlyCollection ?? messages.ToList(); diff --git a/dotnet/src/Microsoft.Agents.AI/OpenTelemetryAgent.cs b/dotnet/src/Microsoft.Agents.AI/OpenTelemetryAgent.cs index cffde717e43..d4354b30c85 100644 --- a/dotnet/src/Microsoft.Agents.AI/OpenTelemetryAgent.cs +++ b/dotnet/src/Microsoft.Agents.AI/OpenTelemetryAgent.cs @@ -183,6 +183,15 @@ private void UpdateCurrentActivity(Activity? previousActivity) { _ = activity.SetTag(OpenTelemetryConsts.GenAI.Agent.Description, description); } + + // Per-instance suppression marker for ADR 0027 / Option G: when a ChatClientAgent is + // reachable inward from us, mark the current activity with the inner ChatClientAgent + // reference so its self-wrap suppresses itself (avoiding triple-emission). + var innerChatClientAgent = this.InnerAgent.GetService(); + if (innerChatClientAgent is not null) + { + activity.SetCustomProperty(OpenTelemetryConsts.OwnedInvokeAgentScopeMarker, innerChatClientAgent); + } } /// State passed from this instance into the inner agent, circumventing the intermediate . diff --git a/dotnet/src/Microsoft.Agents.AI/OpenTelemetryConsts.cs b/dotnet/src/Microsoft.Agents.AI/OpenTelemetryConsts.cs index b130e16165d..d898c1bc75f 100644 --- a/dotnet/src/Microsoft.Agents.AI/OpenTelemetryConsts.cs +++ b/dotnet/src/Microsoft.Agents.AI/OpenTelemetryConsts.cs @@ -1,5 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Diagnostics; + namespace Microsoft.Agents.AI; /// Provides constants used by various telemetry services. @@ -7,6 +9,29 @@ internal static class OpenTelemetryConsts { public const string DefaultSourceName = "Experimental.Microsoft.Agents.AI"; + /// + /// Shared instance used by 's + /// self-telemetry-wrap suppression check. Only used for ; + /// the actual span emission is performed by the instance + /// that wraps itself with. + /// + public static readonly ActivitySource AgentActivitySource = new(DefaultSourceName); + + /// + /// Key for an custom property that marks the + /// activity as the owner of an invoke_agent scope for a specific + /// instance. The value stored is the + /// reference itself; suppression checks compare via + /// . + /// + /// + /// Custom properties are not exported as OTLP span attributes — they are process-local + /// state used purely to coordinate suppression between + /// + /// and 's self-wrap logic. + /// + public const string OwnedInvokeAgentScopeMarker = "Microsoft.Agents.AI.OpenTelemetry.OwnedInvokeAgentScope"; + public static class GenAI { public const string InvokeAgent = "invoke_agent"; diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentOpenTelemetryTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentOpenTelemetryTests.cs new file mode 100644 index 00000000000..dff61492fd8 --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentOpenTelemetryTests.cs @@ -0,0 +1,501 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using OpenTelemetry.Trace; + +namespace Microsoft.Agents.AI.UnitTests.ChatClient; + +/// +/// Tests covering OpenTelemetry behavior of . +/// +/// The tests are organized into two phases: +/// PHASE 1 (GREEN baseline): scenarios that exist today but were uncovered. They lock in +/// non-changing behavior so we detect any regression caused by the BCL-native emission work +/// (ADR 0027 / Option G). +/// PHASE 2 (RED -> GREEN): new behavior introduced by Option G. These tests start failing on +/// the current codebase and turn green once Option G is implemented in ChatClientAgent. +/// +/// +/// Tests that need to capture telemetry use to filter +/// activities by their owning instance, making them safe to run +/// in parallel even when multiple tests subscribe to the same global default source. +/// +public class ChatClientAgentOpenTelemetryTests +{ + private const string DefaultSourceName = "Experimental.Microsoft.Agents.AI"; + + // -------------------- PHASE 1: GREEN baseline (current behavior) -------------------- + + /// + /// Bare with no telemetry subscription emits no spans. + /// This must remain true after Option G (zero allocation when no listeners). + /// + [Fact] + public async Task BareChatClientAgent_NoListener_EmitsNothing_Async() + { + // Arrange + var activities = new List(); + // NOTE: no tracer provider built — there are no listeners at all. + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient); + + // Act + _ = await agent.RunAsync("hi"); + + // Assert + Assert.Empty(activities); + Assert.Equal(1, fakeChatClient.GetResponseAsyncCallCount); + } + + /// + /// Bare with a tracer subscribed to a different (unrelated) + /// source emits nothing on the AgentFramework source. Stays true after Option G because the + /// self-wrap targets the AgentFramework default source only. + /// + [Fact] + public async Task BareChatClientAgent_ListenerOnOtherSource_EmitsNothing_Async() + { + // Arrange: subscribe to a totally unrelated source. + var activities = new List(); + using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() + .AddSource("Some.Unrelated.Source") + .AddInMemoryExporter(activities) + .Build(); + + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient); + + // Act + _ = await agent.RunAsync("hi"); + + // Assert + Assert.Empty(activities); + Assert.Equal(1, fakeChatClient.GetResponseAsyncCallCount); + } + + /// + /// for returns the agent's + /// own decorated (not the raw constructor input). + /// Stays true after Option G — we self-wrap a separate field, not the publicly exposed one. + /// + [Fact] + public void ChatClientAgent_GetServiceIChatClient_ReturnsAgentChatClient() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient); + + // Act + var resolved = agent.GetService(); + + // Assert: the agent exposes its decorated stack via the ChatClient property and GetService. + Assert.NotNull(resolved); + Assert.Same(agent.ChatClient, resolved); + } + + /// + /// for returns this. + /// Critical for the per-instance suppression marker — both + /// OpenTelemetryAgent.UpdateCurrentActivity and other discovery paths rely on this. + /// + [Fact] + public void ChatClientAgent_GetServiceChatClientAgent_ReturnsSelf() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient); + + // Act + var resolved = agent.GetService(); + + // Assert + Assert.Same(agent, resolved); + } + + /// + /// When a custom wraps , calling + /// GetService<ChatClientAgent>() on the outer agent traverses inward and finds the + /// inner ChatClientAgent. This is the lookup OpenTelemetryAgent uses to set its per-instance + /// suppression marker after Option G. + /// + [Fact] + public void DelegatingWrapper_GetServiceChatClientAgent_FindsInner() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var inner = new ChatClientAgent(fakeChatClient); + var wrapper = new PassthroughDelegatingAgent(inner); + + // Act + var resolved = wrapper.GetService(); + + // Assert + Assert.Same(inner, resolved); + } + + // -------------------- PHASE 2: RED -> GREEN (Option G new behavior) -------------------- + + /// + /// Bare with a tracer subscribed to the AgentFramework default + /// source emits the same 2-span shape as the explicit new OpenTelemetryAgent(agent) + /// decorator path: one invoke_agent span plus one nested chat span. This is + /// the symmetric behavior of Option G — the bare path reuses + /// internally so its output is identical to the decorated path. + /// + [Fact] + public async Task BareChatClientAgent_ListenerOnDefaultSource_EmitsInvokeAgentAndChatSpans_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "BareAgent" }); + using var capture = new OwnerScopedActivityCapture(agent); + + // Act + _ = await agent.RunAsync("hi"); + + // Assert: 2 activities — matches today's `new OpenTelemetryAgent(agent)` default exactly. + Assert.Equal(2, capture.Activities.Count); + Assert.Contains(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Contains(capture.Activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); + } + + /// + /// Streaming path also produces the 2-span shape on a bare . + /// + [Fact] + public async Task BareChatClientAgent_Streaming_ListenerOnDefaultSource_EmitsInvokeAgentAndChatSpans_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "BareAgent" }); + using var capture = new OwnerScopedActivityCapture(agent); + + // Act + await foreach (var _ in agent.RunStreamingAsync("hi")) + { + } + + // Assert + Assert.Equal(2, capture.Activities.Count); + Assert.Contains(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Contains(capture.Activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); + } + + /// + /// The invoke_agent span carries the agent's Name, Id, and + /// Description in gen_ai.agent.* tags. + /// + [Fact] + public async Task BareChatClientAgent_InvokeAgentSpan_CarriesAgentIdentityTags_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions + { + Id = "agent-123", + Name = "MyAgent", + Description = "Helpful test agent.", + }); + using var capture = new OwnerScopedActivityCapture(agent); + + // Act + _ = await agent.RunAsync("hi"); + + // Assert + var invokeAgentSpan = Assert.Single(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Equal("invoke_agent", invokeAgentSpan.GetTagItem("gen_ai.operation.name")); + Assert.Equal("agent-123", invokeAgentSpan.GetTagItem("gen_ai.agent.id")); + Assert.Equal("MyAgent", invokeAgentSpan.GetTagItem("gen_ai.agent.name")); + Assert.Equal("Helpful test agent.", invokeAgentSpan.GetTagItem("gen_ai.agent.description")); + } + + /// + /// Passive wrapper (e.g. LoggingAgent-like) that simply + /// forwards must NOT suppress emission. The inner still self-wraps + /// and the 2-span shape is observed by the consumer. + /// + [Fact] + public async Task PassthroughDelegatingWrapper_DoesNotSuppress_InnerChatClientAgentEmits_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var inner = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "WrappedAgent" }); + var wrapper = new PassthroughDelegatingAgent(inner); + using var capture = new OwnerScopedActivityCapture(inner); + + // Act + _ = await wrapper.RunAsync("hi"); + + // Assert + Assert.Equal(2, capture.Activities.Count); + Assert.Contains(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Contains(capture.Activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); + } + + /// + /// Provider-style wrapper that constructs its inner with the + /// user-facing Id/Name (FoundryAgent pattern) yields an invoke_agent span tagged with + /// the user-facing identity, not some internal alias. + /// + [Fact] + public async Task ProviderStyleWrapper_PreservesUserFacingIdentityOnInvokeAgentSpan_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + // Simulates how FoundryAgent (line 302-303, 335-336) sets agent name on the inner ChatClientAgent. + var inner = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions + { + Id = "foundry-helper", + Name = "foundry-helper", + }); + var wrapper = new PassthroughDelegatingAgent(inner); + using var capture = new OwnerScopedActivityCapture(inner); + + // Act + _ = await wrapper.RunAsync("hi"); + + // Assert + var invokeAgentSpan = Assert.Single(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Equal("foundry-helper", invokeAgentSpan.GetTagItem("gen_ai.agent.id")); + Assert.Equal("foundry-helper", invokeAgentSpan.GetTagItem("gen_ai.agent.name")); + } + + /// + /// Explicit decorator wrapping a + /// continues to produce the same 2-span shape it does today — no triple emission from the + /// inner self-wrap. The per-instance marker on the outer chat span suppresses the inner agent. + /// + [Fact] + public async Task ExplicitOpenTelemetryAgent_NoTripleEmission_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var inner = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "ExplicitWrapped" }); + using var decorated = new OpenTelemetryAgent(inner, DefaultSourceName); + using var capture = new OwnerScopedActivityCapture(inner); + + // Act + _ = await decorated.RunAsync("hi"); + + // Assert: exactly 2 spans, today's exact behavior preserved. + Assert.Equal(2, capture.Activities.Count); + Assert.Contains(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Contains(capture.Activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); + } + + /// + /// Two sibling instances each emit their own 2-span pair when + /// invoked sequentially. Per-instance marker scoping ensures one agent's invocation does NOT + /// suppress the other's even if their Activities overlap temporally in the same trace. + /// + [Fact] + public async Task TwoSiblingChatClientAgents_EachEmitsOwnSpans_Async() + { + // Arrange + var spyA = new SpyChatClient(); + var spyB = new SpyChatClient(); + var agentA = new ChatClientAgent(spyA, new ChatClientAgentOptions { Id = "a", Name = "A" }); + var agentB = new ChatClientAgent(spyB, new ChatClientAgentOptions { Id = "b", Name = "B" }); + using var captureA = new OwnerScopedActivityCapture(agentA); + using var captureB = new OwnerScopedActivityCapture(agentB); + + // Act + _ = await agentA.RunAsync("hi A"); + _ = await agentB.RunAsync("hi B"); + + // Assert: each capture holds its own agent's spans (2 each). Per-instance scoping verified + // by the fact that captureA does not see agentB's spans and vice versa. + Assert.Equal(2, captureA.Activities.Count); + Assert.Equal(2, captureB.Activities.Count); + var invokeA = Assert.Single(captureA.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + var invokeB = Assert.Single(captureB.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Equal("a", invokeA.GetTagItem("gen_ai.agent.id")); + Assert.Equal("b", invokeB.GetTagItem("gen_ai.agent.id")); + } + + /// + /// Concurrent first-call invocations on the same must result in + /// a single cached self-wrap (no duplicate OpenTelemetryChatClient instances leaking). + /// Verifies the Interlocked.CompareExchange-based lazy init is safe. + /// + [Fact] + public async Task ConcurrentFirstCalls_LazyInitProducesSingleCachedWrap_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "RaceAgent" }); + using var capture = new OwnerScopedActivityCapture(agent); + + // Act: 8 concurrent first-call invocations. + const int Concurrency = 8; + var tasks = Enumerable.Range(0, Concurrency) + .Select(_ => Task.Run(() => agent.RunAsync("hi"))) + .ToArray(); + await Task.WhenAll(tasks); + + // Assert: each call emits 2 spans, no extras. + Assert.Equal(Concurrency * 2, capture.Activities.Count); + } + + /// + /// When a caller passes a , the + /// factory must still be applied even when self-wrap is active. Order: self-wrap is built + /// first, then the user factory wraps the result. + /// + [Fact] + public async Task BareChatClientAgent_UserChatClientFactoryStillApplied_Async() + { + // Arrange + var fakeChatClient = new SpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "FactoryAgent" }); + using var capture = new OwnerScopedActivityCapture(agent); + + var factoryInvoked = false; + var runOptions = new ChatClientAgentRunOptions + { + ChatClientFactory = cc => + { + factoryInvoked = true; + return cc; // identity wrap — just verify the hook fires. + }, + }; + + // Act + _ = await agent.RunAsync("hi", options: runOptions); + + // Assert + Assert.True(factoryInvoked, "User-provided ChatClientFactory must still be invoked after self-wrap."); + Assert.Equal(2, capture.Activities.Count); + } + + // -------------------- PHASE 2 — sensitive data behavior -------------------- + + /// + /// Bare does NOT capture message content by default. + /// The underlying defaults + /// EnableSensitiveData = false (matching the OpenTelemetry safe default), and the + /// bare path provides no per-instance override property. Users who need sensitive-data + /// capture must either (a) set the + /// OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT environment variable BEFORE + /// the process starts (read once at TelemetryHelpers static init), or + /// (b) explicitly wrap with and set + /// . + /// + [Fact] + public async Task BareChatClientAgent_DefaultsToNoMessageCapture_Async() + { + // Arrange + var fakeChatClient = new SensitiveSpyChatClient(); + var agent = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "DefaultSensitiveAgent" }); + using var capture = new OwnerScopedActivityCapture(agent); + + // Act + _ = await agent.RunAsync("hello world"); + + // Assert — no message-content tags on any span by default (assuming env var not set). + Assert.NotEmpty(capture.Activities); + foreach (var activity in capture.Activities) + { + Assert.Null(activity.GetTagItem("gen_ai.input.messages")); + Assert.Null(activity.GetTagItem("gen_ai.output.messages")); + Assert.Null(activity.GetTagItem("gen_ai.prompt")); + } + } + + /// + /// Explicit property still works as today + /// when wrapping a — verifies the existing per-instance override + /// remains independent of the env var and continues to gate the chat span's message capture. + /// This is the SAME mechanism today's users have relied on; ADR 0027 does not change it. + /// + [Fact] + public async Task ExplicitOpenTelemetryAgent_EnableSensitiveData_True_CapturesMessages_Async() + { + // Arrange + var fakeChatClient = new SensitiveSpyChatClient(); + var inner = new ChatClientAgent(fakeChatClient, new ChatClientAgentOptions { Name = "ExplicitWrapped" }); + using var decorated = new OpenTelemetryAgent(inner, DefaultSourceName) + { + EnableSensitiveData = true, + }; + using var capture = new OwnerScopedActivityCapture(inner); + + // Act + _ = await decorated.RunAsync("hello world"); + + // Assert — message content captured on at least one span. + Assert.NotEmpty(capture.Activities); + var hasInputMessages = capture.Activities.Any(a => + a.GetTagItem("gen_ai.input.messages") is not null || + a.GetTagItem("gen_ai.prompt") is not null); + Assert.True(hasInputMessages, "EnableSensitiveData=true on the decorator should capture message content."); + } + + // -------------------- helpers -------------------- + + /// Minimal IChatClient that records invocations and returns a canned response. + private sealed class SpyChatClient : IChatClient + { + public int GetResponseAsyncCallCount { get; private set; } + public int GetStreamingResponseAsyncCallCount { get; private set; } + + public Task GetResponseAsync(IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) + { + this.GetResponseAsyncCallCount++; + return Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "ok"))); + } + + public async IAsyncEnumerable GetStreamingResponseAsync(IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + this.GetStreamingResponseAsyncCallCount++; + await Task.Yield(); + yield return new ChatResponseUpdate(ChatRole.Assistant, "ok"); + } + + public object? GetService(Type serviceType, object? serviceKey = null) => + serviceType?.IsInstanceOfType(this) == true ? this : null; + + public void Dispose() { } + } + + /// + /// Variant of that advertises + /// so emits canonical chat-span shape (incl. provider name). + /// Used by sensitive-data tests where we want a "realistic" telemetry-friendly chat client. + /// + private sealed class SensitiveSpyChatClient : IChatClient + { + private static readonly ChatClientMetadata s_metadata = new("test-provider", new Uri("https://localhost:1234"), "test-model"); + + public Task GetResponseAsync(IEnumerable messages, ChatOptions? options = null, CancellationToken cancellationToken = default) + => Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "ack"))); + + public async IAsyncEnumerable GetStreamingResponseAsync(IEnumerable messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await Task.Yield(); + yield return new ChatResponseUpdate(ChatRole.Assistant, "ack"); + } + + public object? GetService(Type serviceType, object? serviceKey = null) => + serviceType == typeof(ChatClientMetadata) ? s_metadata : + serviceType?.IsInstanceOfType(this) == true ? this : + null; + + public void Dispose() { } + } + + /// + /// Minimal that just forwards calls — represents a + /// passive decorator (LoggingAgent-like) that should not affect telemetry emission. + /// + private sealed class PassthroughDelegatingAgent(AIAgent inner) : DelegatingAIAgent(inner) + { + } +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/OwnerScopedActivityCapture.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/OwnerScopedActivityCapture.cs new file mode 100644 index 00000000000..ed2b02343de --- /dev/null +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/OwnerScopedActivityCapture.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Microsoft.Agents.AI.UnitTests.ChatClient; + +/// +/// Test helper that captures instances on the AgentFramework default +/// source while filtering by ownership. Each capture instance only collects activities whose +/// parent chain contains the +/// custom property pointing at the specified owner . This makes +/// tests parallel-safe: two tests that both create agents and subscribe to the same global +/// name will not see each other's activities in their captures. +/// +/// +/// +/// The .NET OpenTelemetry collection model is fundamentally source-based at the listener level +/// — every or +/// subscribed to a given source name receives every activity created on that source, regardless +/// of which test or agent created it. Filtering must therefore happen on the receiving side. +/// +/// +/// This helper attaches a raw (not via OpenTelemetry SDK) and +/// applies an ownership predicate in its callback, +/// keeping the test's captured activities isolated from any other test running concurrently. +/// +/// +internal sealed class OwnerScopedActivityCapture : IDisposable +{ + private readonly List _activities = new(); + private readonly object _lock = new(); + private readonly ActivityListener _listener; + private readonly ChatClientAgent _ownerAgent; + + /// + /// Creates a new capture scoped to the activities owned by . + /// + /// + /// The whose own-pipeline activities should be captured. The + /// per-instance marker set by either the bare self-wrap path or by + /// OpenTelemetryAgent.UpdateCurrentActivity when this agent is the inner + /// agent of an explicit decorator is matched via + /// . + /// + public OwnerScopedActivityCapture(ChatClientAgent ownerAgent) + { + this._ownerAgent = ownerAgent; + this._listener = new ActivityListener + { + ShouldListenTo = source => source.Name == OpenTelemetryConsts.DefaultSourceName, + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllDataAndRecorded, + ActivityStopped = this.OnActivityStopped, + }; + ActivitySource.AddActivityListener(this._listener); + } + + /// Gets the activities captured for this owner agent. + public IReadOnlyList Activities + { + get + { + lock (this._lock) + { + return this._activities.ToArray(); + } + } + } + + /// + public void Dispose() => this._listener.Dispose(); + + private void OnActivityStopped(Activity activity) + { + for (var current = activity; current is not null; current = current.Parent) + { + if (ReferenceEquals(current.GetCustomProperty(OpenTelemetryConsts.OwnedInvokeAgentScopeMarker), this._ownerAgent)) + { + lock (this._lock) + { + this._activities.Add(activity); + } + + return; + } + } + } +} diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/OpenTelemetryAgentTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/OpenTelemetryAgentTests.cs index 1f04d09ba30..5454c3c6cf5 100644 --- a/dotnet/tests/Microsoft.Agents.AI.UnitTests/OpenTelemetryAgentTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/OpenTelemetryAgentTests.cs @@ -871,22 +871,17 @@ public async Task Ctor_NullOrWhitespaceSourceName_AutoWiredChatClientUsesDefault // Both the agent-level invoke_agent span and the auto-wired chat span must be emitted under // OpenTelemetryConsts.DefaultSourceName when the caller passes null, "", or whitespace, so they reach // the same ActivitySource and are not silently dropped by the exporter. - var activities = new List(); - using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() - .AddSource("Experimental.Microsoft.Agents.AI") - .AddInMemoryExporter(activities) - .Build(); - var fakeChatClient = new AutoWireTestChatClient(); var inner = new ChatClientAgent(fakeChatClient); using var agent = new OpenTelemetryAgent(inner, sourceName); + using var capture = new ChatClient.OwnerScopedActivityCapture(inner); _ = await agent.RunAsync("hi"); - Assert.Equal(2, activities.Count); - Assert.All(activities, a => Assert.Equal("Experimental.Microsoft.Agents.AI", a.Source.Name)); - Assert.Contains(activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); - Assert.Contains(activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); + Assert.Equal(2, capture.Activities.Count); + Assert.All(capture.Activities, a => Assert.Equal("Experimental.Microsoft.Agents.AI", a.Source.Name)); + Assert.Contains(capture.Activities, a => a.DisplayName.StartsWith("invoke_agent", StringComparison.Ordinal)); + Assert.Contains(capture.Activities, a => string.Equals(a.GetTagItem("gen_ai.operation.name") as string, "chat", StringComparison.Ordinal)); } #pragma warning disable MEAI001 // ResponseContinuationToken is experimental.